* config/i386/i386.c (ix86_rtx_costs): Drop NEG from sub for FMA.
[official-gcc.git] / gcc / config / i386 / i386.c
blobc4f0b9dea0e7f3e04eff33a4b6699b0b76f3f86c
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
3 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
4 Free Software Foundation, Inc.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
11 any later version.
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "tm_p.h"
29 #include "regs.h"
30 #include "hard-reg-set.h"
31 #include "insn-config.h"
32 #include "conditions.h"
33 #include "output.h"
34 #include "insn-codes.h"
35 #include "insn-attr.h"
36 #include "flags.h"
37 #include "except.h"
38 #include "function.h"
39 #include "recog.h"
40 #include "expr.h"
41 #include "optabs.h"
42 #include "diagnostic-core.h"
43 #include "toplev.h"
44 #include "basic-block.h"
45 #include "ggc.h"
46 #include "target.h"
47 #include "target-def.h"
48 #include "langhooks.h"
49 #include "cgraph.h"
50 #include "gimple.h"
51 #include "dwarf2.h"
52 #include "df.h"
53 #include "tm-constrs.h"
54 #include "params.h"
55 #include "cselib.h"
56 #include "debug.h"
57 #include "dwarf2out.h"
58 #include "sched-int.h"
59 #include "sbitmap.h"
60 #include "fibheap.h"
61 #include "opts.h"
62 #include "diagnostic.h"
64 enum upper_128bits_state
66 unknown = 0,
67 unused,
68 used
71 typedef struct block_info_def
73 /* State of the upper 128bits of AVX registers at exit. */
74 enum upper_128bits_state state;
75 /* TRUE if state of the upper 128bits of AVX registers is unchanged
76 in this block. */
77 bool unchanged;
78 /* TRUE if block has been processed. */
79 bool processed;
80 /* TRUE if block has been scanned. */
81 bool scanned;
82 /* Previous state of the upper 128bits of AVX registers at entry. */
83 enum upper_128bits_state prev;
84 } *block_info;
86 #define BLOCK_INFO(B) ((block_info) (B)->aux)
88 enum call_avx256_state
90 /* Callee returns 256bit AVX register. */
91 callee_return_avx256 = -1,
92 /* Callee returns and passes 256bit AVX register. */
93 callee_return_pass_avx256,
94 /* Callee passes 256bit AVX register. */
95 callee_pass_avx256,
96 /* Callee doesn't return nor passe 256bit AVX register, or no
97 256bit AVX register in function return. */
98 call_no_avx256,
99 /* vzeroupper intrinsic. */
100 vzeroupper_intrinsic
103 /* Check if a 256bit AVX register is referenced in stores. */
105 static void
106 check_avx256_stores (rtx dest, const_rtx set, void *data)
108 if ((REG_P (dest)
109 && VALID_AVX256_REG_MODE (GET_MODE (dest)))
110 || (GET_CODE (set) == SET
111 && REG_P (SET_SRC (set))
112 && VALID_AVX256_REG_MODE (GET_MODE (SET_SRC (set)))))
114 enum upper_128bits_state *state
115 = (enum upper_128bits_state *) data;
116 *state = used;
120 /* Helper function for move_or_delete_vzeroupper_1. Look for vzeroupper
121 in basic block BB. Delete it if upper 128bit AVX registers are
122 unused. If it isn't deleted, move it to just before a jump insn.
124 STATE is state of the upper 128bits of AVX registers at entry. */
126 static void
127 move_or_delete_vzeroupper_2 (basic_block bb,
128 enum upper_128bits_state state)
130 rtx insn, bb_end;
131 rtx vzeroupper_insn = NULL_RTX;
132 rtx pat;
133 int avx256;
134 bool unchanged;
136 if (BLOCK_INFO (bb)->unchanged)
138 if (dump_file)
139 fprintf (dump_file, " [bb %i] unchanged: upper 128bits: %d\n",
140 bb->index, state);
142 BLOCK_INFO (bb)->state = state;
143 return;
146 if (BLOCK_INFO (bb)->scanned && BLOCK_INFO (bb)->prev == state)
148 if (dump_file)
149 fprintf (dump_file, " [bb %i] scanned: upper 128bits: %d\n",
150 bb->index, BLOCK_INFO (bb)->state);
151 return;
154 BLOCK_INFO (bb)->prev = state;
156 if (dump_file)
157 fprintf (dump_file, " [bb %i] entry: upper 128bits: %d\n",
158 bb->index, state);
160 unchanged = true;
162 /* BB_END changes when it is deleted. */
163 bb_end = BB_END (bb);
164 insn = BB_HEAD (bb);
165 while (insn != bb_end)
167 insn = NEXT_INSN (insn);
169 if (!NONDEBUG_INSN_P (insn))
170 continue;
172 /* Move vzeroupper before jump/call. */
173 if (JUMP_P (insn) || CALL_P (insn))
175 if (!vzeroupper_insn)
176 continue;
178 if (PREV_INSN (insn) != vzeroupper_insn)
180 if (dump_file)
182 fprintf (dump_file, "Move vzeroupper after:\n");
183 print_rtl_single (dump_file, PREV_INSN (insn));
184 fprintf (dump_file, "before:\n");
185 print_rtl_single (dump_file, insn);
187 reorder_insns_nobb (vzeroupper_insn, vzeroupper_insn,
188 PREV_INSN (insn));
190 vzeroupper_insn = NULL_RTX;
191 continue;
194 pat = PATTERN (insn);
196 /* Check insn for vzeroupper intrinsic. */
197 if (GET_CODE (pat) == UNSPEC_VOLATILE
198 && XINT (pat, 1) == UNSPECV_VZEROUPPER)
200 if (dump_file)
202 /* Found vzeroupper intrinsic. */
203 fprintf (dump_file, "Found vzeroupper:\n");
204 print_rtl_single (dump_file, insn);
207 else
209 /* Check insn for vzeroall intrinsic. */
210 if (GET_CODE (pat) == PARALLEL
211 && GET_CODE (XVECEXP (pat, 0, 0)) == UNSPEC_VOLATILE
212 && XINT (XVECEXP (pat, 0, 0), 1) == UNSPECV_VZEROALL)
214 state = unused;
215 unchanged = false;
217 /* Delete pending vzeroupper insertion. */
218 if (vzeroupper_insn)
220 delete_insn (vzeroupper_insn);
221 vzeroupper_insn = NULL_RTX;
224 else if (state != used)
226 note_stores (pat, check_avx256_stores, &state);
227 if (state == used)
228 unchanged = false;
230 continue;
233 /* Process vzeroupper intrinsic. */
234 avx256 = INTVAL (XVECEXP (pat, 0, 0));
236 if (state == unused)
238 /* Since the upper 128bits are cleared, callee must not pass
239 256bit AVX register. We only need to check if callee
240 returns 256bit AVX register. */
241 if (avx256 == callee_return_avx256)
243 state = used;
244 unchanged = false;
247 /* Remove unnecessary vzeroupper since upper 128bits are
248 cleared. */
249 if (dump_file)
251 fprintf (dump_file, "Delete redundant vzeroupper:\n");
252 print_rtl_single (dump_file, insn);
254 delete_insn (insn);
256 else
258 /* Set state to UNUSED if callee doesn't return 256bit AVX
259 register. */
260 if (avx256 != callee_return_pass_avx256)
261 state = unused;
263 if (avx256 == callee_return_pass_avx256
264 || avx256 == callee_pass_avx256)
266 /* Must remove vzeroupper since callee passes in 256bit
267 AVX register. */
268 if (dump_file)
270 fprintf (dump_file, "Delete callee pass vzeroupper:\n");
271 print_rtl_single (dump_file, insn);
273 delete_insn (insn);
275 else
277 vzeroupper_insn = insn;
278 unchanged = false;
283 BLOCK_INFO (bb)->state = state;
284 BLOCK_INFO (bb)->unchanged = unchanged;
285 BLOCK_INFO (bb)->scanned = true;
287 if (dump_file)
288 fprintf (dump_file, " [bb %i] exit: %s: upper 128bits: %d\n",
289 bb->index, unchanged ? "unchanged" : "changed",
290 state);
293 /* Helper function for move_or_delete_vzeroupper. Process vzeroupper
294 in BLOCK and check its predecessor blocks. Treat UNKNOWN state
295 as USED if UNKNOWN_IS_UNUSED is true. Return TRUE if the exit
296 state is changed. */
298 static bool
299 move_or_delete_vzeroupper_1 (basic_block block, bool unknown_is_unused)
301 edge e;
302 edge_iterator ei;
303 enum upper_128bits_state state, old_state, new_state;
304 bool seen_unknown;
306 if (dump_file)
307 fprintf (dump_file, " Process [bb %i]: status: %d\n",
308 block->index, BLOCK_INFO (block)->processed);
310 if (BLOCK_INFO (block)->processed)
311 return false;
313 state = unused;
315 /* Check all predecessor edges of this block. */
316 seen_unknown = false;
317 FOR_EACH_EDGE (e, ei, block->preds)
319 if (e->src == block)
320 continue;
321 switch (BLOCK_INFO (e->src)->state)
323 case unknown:
324 if (!unknown_is_unused)
325 seen_unknown = true;
326 case unused:
327 break;
328 case used:
329 state = used;
330 goto done;
334 if (seen_unknown)
335 state = unknown;
337 done:
338 old_state = BLOCK_INFO (block)->state;
339 move_or_delete_vzeroupper_2 (block, state);
340 new_state = BLOCK_INFO (block)->state;
342 if (state != unknown || new_state == used)
343 BLOCK_INFO (block)->processed = true;
345 /* Need to rescan if the upper 128bits of AVX registers are changed
346 to USED at exit. */
347 if (new_state != old_state)
349 if (new_state == used)
350 cfun->machine->rescan_vzeroupper_p = 1;
351 return true;
353 else
354 return false;
357 /* Go through the instruction stream looking for vzeroupper. Delete
358 it if upper 128bit AVX registers are unused. If it isn't deleted,
359 move it to just before a jump insn. */
361 static void
362 move_or_delete_vzeroupper (void)
364 edge e;
365 edge_iterator ei;
366 basic_block bb;
367 fibheap_t worklist, pending, fibheap_swap;
368 sbitmap visited, in_worklist, in_pending, sbitmap_swap;
369 int *bb_order;
370 int *rc_order;
371 int i;
373 /* Set up block info for each basic block. */
374 alloc_aux_for_blocks (sizeof (struct block_info_def));
376 /* Process outgoing edges of entry point. */
377 if (dump_file)
378 fprintf (dump_file, "Process outgoing edges of entry point\n");
380 FOR_EACH_EDGE (e, ei, ENTRY_BLOCK_PTR->succs)
382 move_or_delete_vzeroupper_2 (e->dest,
383 cfun->machine->caller_pass_avx256_p
384 ? used : unused);
385 BLOCK_INFO (e->dest)->processed = true;
388 /* Compute reverse completion order of depth first search of the CFG
389 so that the data-flow runs faster. */
390 rc_order = XNEWVEC (int, n_basic_blocks - NUM_FIXED_BLOCKS);
391 bb_order = XNEWVEC (int, last_basic_block);
392 pre_and_rev_post_order_compute (NULL, rc_order, false);
393 for (i = 0; i < n_basic_blocks - NUM_FIXED_BLOCKS; i++)
394 bb_order[rc_order[i]] = i;
395 free (rc_order);
397 worklist = fibheap_new ();
398 pending = fibheap_new ();
399 visited = sbitmap_alloc (last_basic_block);
400 in_worklist = sbitmap_alloc (last_basic_block);
401 in_pending = sbitmap_alloc (last_basic_block);
402 sbitmap_zero (in_worklist);
404 /* Don't check outgoing edges of entry point. */
405 sbitmap_ones (in_pending);
406 FOR_EACH_BB (bb)
407 if (BLOCK_INFO (bb)->processed)
408 RESET_BIT (in_pending, bb->index);
409 else
411 move_or_delete_vzeroupper_1 (bb, false);
412 fibheap_insert (pending, bb_order[bb->index], bb);
415 if (dump_file)
416 fprintf (dump_file, "Check remaining basic blocks\n");
418 while (!fibheap_empty (pending))
420 fibheap_swap = pending;
421 pending = worklist;
422 worklist = fibheap_swap;
423 sbitmap_swap = in_pending;
424 in_pending = in_worklist;
425 in_worklist = sbitmap_swap;
427 sbitmap_zero (visited);
429 cfun->machine->rescan_vzeroupper_p = 0;
431 while (!fibheap_empty (worklist))
433 bb = (basic_block) fibheap_extract_min (worklist);
434 RESET_BIT (in_worklist, bb->index);
435 gcc_assert (!TEST_BIT (visited, bb->index));
436 if (!TEST_BIT (visited, bb->index))
438 edge_iterator ei;
440 SET_BIT (visited, bb->index);
442 if (move_or_delete_vzeroupper_1 (bb, false))
443 FOR_EACH_EDGE (e, ei, bb->succs)
445 if (e->dest == EXIT_BLOCK_PTR
446 || BLOCK_INFO (e->dest)->processed)
447 continue;
449 if (TEST_BIT (visited, e->dest->index))
451 if (!TEST_BIT (in_pending, e->dest->index))
453 /* Send E->DEST to next round. */
454 SET_BIT (in_pending, e->dest->index);
455 fibheap_insert (pending,
456 bb_order[e->dest->index],
457 e->dest);
460 else if (!TEST_BIT (in_worklist, e->dest->index))
462 /* Add E->DEST to current round. */
463 SET_BIT (in_worklist, e->dest->index);
464 fibheap_insert (worklist, bb_order[e->dest->index],
465 e->dest);
471 if (!cfun->machine->rescan_vzeroupper_p)
472 break;
475 free (bb_order);
476 fibheap_delete (worklist);
477 fibheap_delete (pending);
478 sbitmap_free (visited);
479 sbitmap_free (in_worklist);
480 sbitmap_free (in_pending);
482 if (dump_file)
483 fprintf (dump_file, "Process remaining basic blocks\n");
485 FOR_EACH_BB (bb)
486 move_or_delete_vzeroupper_1 (bb, true);
488 free_aux_for_blocks ();
491 static rtx legitimize_dllimport_symbol (rtx, bool);
493 #ifndef CHECK_STACK_LIMIT
494 #define CHECK_STACK_LIMIT (-1)
495 #endif
497 /* Return index of given mode in mult and division cost tables. */
498 #define MODE_INDEX(mode) \
499 ((mode) == QImode ? 0 \
500 : (mode) == HImode ? 1 \
501 : (mode) == SImode ? 2 \
502 : (mode) == DImode ? 3 \
503 : 4)
505 /* Processor costs (relative to an add) */
506 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
507 #define COSTS_N_BYTES(N) ((N) * 2)
509 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
511 const
512 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
513 COSTS_N_BYTES (2), /* cost of an add instruction */
514 COSTS_N_BYTES (3), /* cost of a lea instruction */
515 COSTS_N_BYTES (2), /* variable shift costs */
516 COSTS_N_BYTES (3), /* constant shift costs */
517 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
518 COSTS_N_BYTES (3), /* HI */
519 COSTS_N_BYTES (3), /* SI */
520 COSTS_N_BYTES (3), /* DI */
521 COSTS_N_BYTES (5)}, /* other */
522 0, /* cost of multiply per each bit set */
523 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
524 COSTS_N_BYTES (3), /* HI */
525 COSTS_N_BYTES (3), /* SI */
526 COSTS_N_BYTES (3), /* DI */
527 COSTS_N_BYTES (5)}, /* other */
528 COSTS_N_BYTES (3), /* cost of movsx */
529 COSTS_N_BYTES (3), /* cost of movzx */
530 0, /* "large" insn */
531 2, /* MOVE_RATIO */
532 2, /* cost for loading QImode using movzbl */
533 {2, 2, 2}, /* cost of loading integer registers
534 in QImode, HImode and SImode.
535 Relative to reg-reg move (2). */
536 {2, 2, 2}, /* cost of storing integer registers */
537 2, /* cost of reg,reg fld/fst */
538 {2, 2, 2}, /* cost of loading fp registers
539 in SFmode, DFmode and XFmode */
540 {2, 2, 2}, /* cost of storing fp registers
541 in SFmode, DFmode and XFmode */
542 3, /* cost of moving MMX register */
543 {3, 3}, /* cost of loading MMX registers
544 in SImode and DImode */
545 {3, 3}, /* cost of storing MMX registers
546 in SImode and DImode */
547 3, /* cost of moving SSE register */
548 {3, 3, 3}, /* cost of loading SSE registers
549 in SImode, DImode and TImode */
550 {3, 3, 3}, /* cost of storing SSE registers
551 in SImode, DImode and TImode */
552 3, /* MMX or SSE register to integer */
553 0, /* size of l1 cache */
554 0, /* size of l2 cache */
555 0, /* size of prefetch block */
556 0, /* number of parallel prefetches */
557 2, /* Branch cost */
558 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
559 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
560 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
561 COSTS_N_BYTES (2), /* cost of FABS instruction. */
562 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
563 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
564 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
565 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
566 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
567 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
568 1, /* scalar_stmt_cost. */
569 1, /* scalar load_cost. */
570 1, /* scalar_store_cost. */
571 1, /* vec_stmt_cost. */
572 1, /* vec_to_scalar_cost. */
573 1, /* scalar_to_vec_cost. */
574 1, /* vec_align_load_cost. */
575 1, /* vec_unalign_load_cost. */
576 1, /* vec_store_cost. */
577 1, /* cond_taken_branch_cost. */
578 1, /* cond_not_taken_branch_cost. */
581 /* Processor costs (relative to an add) */
582 static const
583 struct processor_costs i386_cost = { /* 386 specific costs */
584 COSTS_N_INSNS (1), /* cost of an add instruction */
585 COSTS_N_INSNS (1), /* cost of a lea instruction */
586 COSTS_N_INSNS (3), /* variable shift costs */
587 COSTS_N_INSNS (2), /* constant shift costs */
588 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
589 COSTS_N_INSNS (6), /* HI */
590 COSTS_N_INSNS (6), /* SI */
591 COSTS_N_INSNS (6), /* DI */
592 COSTS_N_INSNS (6)}, /* other */
593 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
594 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
595 COSTS_N_INSNS (23), /* HI */
596 COSTS_N_INSNS (23), /* SI */
597 COSTS_N_INSNS (23), /* DI */
598 COSTS_N_INSNS (23)}, /* other */
599 COSTS_N_INSNS (3), /* cost of movsx */
600 COSTS_N_INSNS (2), /* cost of movzx */
601 15, /* "large" insn */
602 3, /* MOVE_RATIO */
603 4, /* cost for loading QImode using movzbl */
604 {2, 4, 2}, /* cost of loading integer registers
605 in QImode, HImode and SImode.
606 Relative to reg-reg move (2). */
607 {2, 4, 2}, /* cost of storing integer registers */
608 2, /* cost of reg,reg fld/fst */
609 {8, 8, 8}, /* cost of loading fp registers
610 in SFmode, DFmode and XFmode */
611 {8, 8, 8}, /* cost of storing fp registers
612 in SFmode, DFmode and XFmode */
613 2, /* cost of moving MMX register */
614 {4, 8}, /* cost of loading MMX registers
615 in SImode and DImode */
616 {4, 8}, /* cost of storing MMX registers
617 in SImode and DImode */
618 2, /* cost of moving SSE register */
619 {4, 8, 16}, /* cost of loading SSE registers
620 in SImode, DImode and TImode */
621 {4, 8, 16}, /* cost of storing SSE registers
622 in SImode, DImode and TImode */
623 3, /* MMX or SSE register to integer */
624 0, /* size of l1 cache */
625 0, /* size of l2 cache */
626 0, /* size of prefetch block */
627 0, /* number of parallel prefetches */
628 1, /* Branch cost */
629 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
630 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
631 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
632 COSTS_N_INSNS (22), /* cost of FABS instruction. */
633 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
634 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
635 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
636 DUMMY_STRINGOP_ALGS},
637 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
638 DUMMY_STRINGOP_ALGS},
639 1, /* scalar_stmt_cost. */
640 1, /* scalar load_cost. */
641 1, /* scalar_store_cost. */
642 1, /* vec_stmt_cost. */
643 1, /* vec_to_scalar_cost. */
644 1, /* scalar_to_vec_cost. */
645 1, /* vec_align_load_cost. */
646 2, /* vec_unalign_load_cost. */
647 1, /* vec_store_cost. */
648 3, /* cond_taken_branch_cost. */
649 1, /* cond_not_taken_branch_cost. */
652 static const
653 struct processor_costs i486_cost = { /* 486 specific costs */
654 COSTS_N_INSNS (1), /* cost of an add instruction */
655 COSTS_N_INSNS (1), /* cost of a lea instruction */
656 COSTS_N_INSNS (3), /* variable shift costs */
657 COSTS_N_INSNS (2), /* constant shift costs */
658 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
659 COSTS_N_INSNS (12), /* HI */
660 COSTS_N_INSNS (12), /* SI */
661 COSTS_N_INSNS (12), /* DI */
662 COSTS_N_INSNS (12)}, /* other */
663 1, /* cost of multiply per each bit set */
664 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
665 COSTS_N_INSNS (40), /* HI */
666 COSTS_N_INSNS (40), /* SI */
667 COSTS_N_INSNS (40), /* DI */
668 COSTS_N_INSNS (40)}, /* other */
669 COSTS_N_INSNS (3), /* cost of movsx */
670 COSTS_N_INSNS (2), /* cost of movzx */
671 15, /* "large" insn */
672 3, /* MOVE_RATIO */
673 4, /* cost for loading QImode using movzbl */
674 {2, 4, 2}, /* cost of loading integer registers
675 in QImode, HImode and SImode.
676 Relative to reg-reg move (2). */
677 {2, 4, 2}, /* cost of storing integer registers */
678 2, /* cost of reg,reg fld/fst */
679 {8, 8, 8}, /* cost of loading fp registers
680 in SFmode, DFmode and XFmode */
681 {8, 8, 8}, /* cost of storing fp registers
682 in SFmode, DFmode and XFmode */
683 2, /* cost of moving MMX register */
684 {4, 8}, /* cost of loading MMX registers
685 in SImode and DImode */
686 {4, 8}, /* cost of storing MMX registers
687 in SImode and DImode */
688 2, /* cost of moving SSE register */
689 {4, 8, 16}, /* cost of loading SSE registers
690 in SImode, DImode and TImode */
691 {4, 8, 16}, /* cost of storing SSE registers
692 in SImode, DImode and TImode */
693 3, /* MMX or SSE register to integer */
694 4, /* size of l1 cache. 486 has 8kB cache
695 shared for code and data, so 4kB is
696 not really precise. */
697 4, /* size of l2 cache */
698 0, /* size of prefetch block */
699 0, /* number of parallel prefetches */
700 1, /* Branch cost */
701 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
702 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
703 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
704 COSTS_N_INSNS (3), /* cost of FABS instruction. */
705 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
706 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
707 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
708 DUMMY_STRINGOP_ALGS},
709 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
710 DUMMY_STRINGOP_ALGS},
711 1, /* scalar_stmt_cost. */
712 1, /* scalar load_cost. */
713 1, /* scalar_store_cost. */
714 1, /* vec_stmt_cost. */
715 1, /* vec_to_scalar_cost. */
716 1, /* scalar_to_vec_cost. */
717 1, /* vec_align_load_cost. */
718 2, /* vec_unalign_load_cost. */
719 1, /* vec_store_cost. */
720 3, /* cond_taken_branch_cost. */
721 1, /* cond_not_taken_branch_cost. */
724 static const
725 struct processor_costs pentium_cost = {
726 COSTS_N_INSNS (1), /* cost of an add instruction */
727 COSTS_N_INSNS (1), /* cost of a lea instruction */
728 COSTS_N_INSNS (4), /* variable shift costs */
729 COSTS_N_INSNS (1), /* constant shift costs */
730 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
731 COSTS_N_INSNS (11), /* HI */
732 COSTS_N_INSNS (11), /* SI */
733 COSTS_N_INSNS (11), /* DI */
734 COSTS_N_INSNS (11)}, /* other */
735 0, /* cost of multiply per each bit set */
736 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
737 COSTS_N_INSNS (25), /* HI */
738 COSTS_N_INSNS (25), /* SI */
739 COSTS_N_INSNS (25), /* DI */
740 COSTS_N_INSNS (25)}, /* other */
741 COSTS_N_INSNS (3), /* cost of movsx */
742 COSTS_N_INSNS (2), /* cost of movzx */
743 8, /* "large" insn */
744 6, /* MOVE_RATIO */
745 6, /* cost for loading QImode using movzbl */
746 {2, 4, 2}, /* cost of loading integer registers
747 in QImode, HImode and SImode.
748 Relative to reg-reg move (2). */
749 {2, 4, 2}, /* cost of storing integer registers */
750 2, /* cost of reg,reg fld/fst */
751 {2, 2, 6}, /* cost of loading fp registers
752 in SFmode, DFmode and XFmode */
753 {4, 4, 6}, /* cost of storing fp registers
754 in SFmode, DFmode and XFmode */
755 8, /* cost of moving MMX register */
756 {8, 8}, /* cost of loading MMX registers
757 in SImode and DImode */
758 {8, 8}, /* cost of storing MMX registers
759 in SImode and DImode */
760 2, /* cost of moving SSE register */
761 {4, 8, 16}, /* cost of loading SSE registers
762 in SImode, DImode and TImode */
763 {4, 8, 16}, /* cost of storing SSE registers
764 in SImode, DImode and TImode */
765 3, /* MMX or SSE register to integer */
766 8, /* size of l1 cache. */
767 8, /* size of l2 cache */
768 0, /* size of prefetch block */
769 0, /* number of parallel prefetches */
770 2, /* Branch cost */
771 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
772 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
773 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
774 COSTS_N_INSNS (1), /* cost of FABS instruction. */
775 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
776 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
777 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
778 DUMMY_STRINGOP_ALGS},
779 {{libcall, {{-1, rep_prefix_4_byte}}},
780 DUMMY_STRINGOP_ALGS},
781 1, /* scalar_stmt_cost. */
782 1, /* scalar load_cost. */
783 1, /* scalar_store_cost. */
784 1, /* vec_stmt_cost. */
785 1, /* vec_to_scalar_cost. */
786 1, /* scalar_to_vec_cost. */
787 1, /* vec_align_load_cost. */
788 2, /* vec_unalign_load_cost. */
789 1, /* vec_store_cost. */
790 3, /* cond_taken_branch_cost. */
791 1, /* cond_not_taken_branch_cost. */
794 static const
795 struct processor_costs pentiumpro_cost = {
796 COSTS_N_INSNS (1), /* cost of an add instruction */
797 COSTS_N_INSNS (1), /* cost of a lea instruction */
798 COSTS_N_INSNS (1), /* variable shift costs */
799 COSTS_N_INSNS (1), /* constant shift costs */
800 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
801 COSTS_N_INSNS (4), /* HI */
802 COSTS_N_INSNS (4), /* SI */
803 COSTS_N_INSNS (4), /* DI */
804 COSTS_N_INSNS (4)}, /* other */
805 0, /* cost of multiply per each bit set */
806 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
807 COSTS_N_INSNS (17), /* HI */
808 COSTS_N_INSNS (17), /* SI */
809 COSTS_N_INSNS (17), /* DI */
810 COSTS_N_INSNS (17)}, /* other */
811 COSTS_N_INSNS (1), /* cost of movsx */
812 COSTS_N_INSNS (1), /* cost of movzx */
813 8, /* "large" insn */
814 6, /* MOVE_RATIO */
815 2, /* cost for loading QImode using movzbl */
816 {4, 4, 4}, /* cost of loading integer registers
817 in QImode, HImode and SImode.
818 Relative to reg-reg move (2). */
819 {2, 2, 2}, /* cost of storing integer registers */
820 2, /* cost of reg,reg fld/fst */
821 {2, 2, 6}, /* cost of loading fp registers
822 in SFmode, DFmode and XFmode */
823 {4, 4, 6}, /* cost of storing fp registers
824 in SFmode, DFmode and XFmode */
825 2, /* cost of moving MMX register */
826 {2, 2}, /* cost of loading MMX registers
827 in SImode and DImode */
828 {2, 2}, /* cost of storing MMX registers
829 in SImode and DImode */
830 2, /* cost of moving SSE register */
831 {2, 2, 8}, /* cost of loading SSE registers
832 in SImode, DImode and TImode */
833 {2, 2, 8}, /* cost of storing SSE registers
834 in SImode, DImode and TImode */
835 3, /* MMX or SSE register to integer */
836 8, /* size of l1 cache. */
837 256, /* size of l2 cache */
838 32, /* size of prefetch block */
839 6, /* number of parallel prefetches */
840 2, /* Branch cost */
841 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
842 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
843 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
844 COSTS_N_INSNS (2), /* cost of FABS instruction. */
845 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
846 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
847 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
848 (we ensure the alignment). For small blocks inline loop is still a
849 noticeable win, for bigger blocks either rep movsl or rep movsb is
850 way to go. Rep movsb has apparently more expensive startup time in CPU,
851 but after 4K the difference is down in the noise. */
852 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
853 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
854 DUMMY_STRINGOP_ALGS},
855 {{rep_prefix_4_byte, {{1024, unrolled_loop},
856 {8192, rep_prefix_4_byte}, {-1, libcall}}},
857 DUMMY_STRINGOP_ALGS},
858 1, /* scalar_stmt_cost. */
859 1, /* scalar load_cost. */
860 1, /* scalar_store_cost. */
861 1, /* vec_stmt_cost. */
862 1, /* vec_to_scalar_cost. */
863 1, /* scalar_to_vec_cost. */
864 1, /* vec_align_load_cost. */
865 2, /* vec_unalign_load_cost. */
866 1, /* vec_store_cost. */
867 3, /* cond_taken_branch_cost. */
868 1, /* cond_not_taken_branch_cost. */
871 static const
872 struct processor_costs geode_cost = {
873 COSTS_N_INSNS (1), /* cost of an add instruction */
874 COSTS_N_INSNS (1), /* cost of a lea instruction */
875 COSTS_N_INSNS (2), /* variable shift costs */
876 COSTS_N_INSNS (1), /* constant shift costs */
877 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
878 COSTS_N_INSNS (4), /* HI */
879 COSTS_N_INSNS (7), /* SI */
880 COSTS_N_INSNS (7), /* DI */
881 COSTS_N_INSNS (7)}, /* other */
882 0, /* cost of multiply per each bit set */
883 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
884 COSTS_N_INSNS (23), /* HI */
885 COSTS_N_INSNS (39), /* SI */
886 COSTS_N_INSNS (39), /* DI */
887 COSTS_N_INSNS (39)}, /* other */
888 COSTS_N_INSNS (1), /* cost of movsx */
889 COSTS_N_INSNS (1), /* cost of movzx */
890 8, /* "large" insn */
891 4, /* MOVE_RATIO */
892 1, /* cost for loading QImode using movzbl */
893 {1, 1, 1}, /* cost of loading integer registers
894 in QImode, HImode and SImode.
895 Relative to reg-reg move (2). */
896 {1, 1, 1}, /* cost of storing integer registers */
897 1, /* cost of reg,reg fld/fst */
898 {1, 1, 1}, /* cost of loading fp registers
899 in SFmode, DFmode and XFmode */
900 {4, 6, 6}, /* cost of storing fp registers
901 in SFmode, DFmode and XFmode */
903 1, /* cost of moving MMX register */
904 {1, 1}, /* cost of loading MMX registers
905 in SImode and DImode */
906 {1, 1}, /* cost of storing MMX registers
907 in SImode and DImode */
908 1, /* cost of moving SSE register */
909 {1, 1, 1}, /* cost of loading SSE registers
910 in SImode, DImode and TImode */
911 {1, 1, 1}, /* cost of storing SSE registers
912 in SImode, DImode and TImode */
913 1, /* MMX or SSE register to integer */
914 64, /* size of l1 cache. */
915 128, /* size of l2 cache. */
916 32, /* size of prefetch block */
917 1, /* number of parallel prefetches */
918 1, /* Branch cost */
919 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
920 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
921 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
922 COSTS_N_INSNS (1), /* cost of FABS instruction. */
923 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
924 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
925 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
926 DUMMY_STRINGOP_ALGS},
927 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
928 DUMMY_STRINGOP_ALGS},
929 1, /* scalar_stmt_cost. */
930 1, /* scalar load_cost. */
931 1, /* scalar_store_cost. */
932 1, /* vec_stmt_cost. */
933 1, /* vec_to_scalar_cost. */
934 1, /* scalar_to_vec_cost. */
935 1, /* vec_align_load_cost. */
936 2, /* vec_unalign_load_cost. */
937 1, /* vec_store_cost. */
938 3, /* cond_taken_branch_cost. */
939 1, /* cond_not_taken_branch_cost. */
942 static const
943 struct processor_costs k6_cost = {
944 COSTS_N_INSNS (1), /* cost of an add instruction */
945 COSTS_N_INSNS (2), /* cost of a lea instruction */
946 COSTS_N_INSNS (1), /* variable shift costs */
947 COSTS_N_INSNS (1), /* constant shift costs */
948 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
949 COSTS_N_INSNS (3), /* HI */
950 COSTS_N_INSNS (3), /* SI */
951 COSTS_N_INSNS (3), /* DI */
952 COSTS_N_INSNS (3)}, /* other */
953 0, /* cost of multiply per each bit set */
954 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
955 COSTS_N_INSNS (18), /* HI */
956 COSTS_N_INSNS (18), /* SI */
957 COSTS_N_INSNS (18), /* DI */
958 COSTS_N_INSNS (18)}, /* other */
959 COSTS_N_INSNS (2), /* cost of movsx */
960 COSTS_N_INSNS (2), /* cost of movzx */
961 8, /* "large" insn */
962 4, /* MOVE_RATIO */
963 3, /* cost for loading QImode using movzbl */
964 {4, 5, 4}, /* cost of loading integer registers
965 in QImode, HImode and SImode.
966 Relative to reg-reg move (2). */
967 {2, 3, 2}, /* cost of storing integer registers */
968 4, /* cost of reg,reg fld/fst */
969 {6, 6, 6}, /* cost of loading fp registers
970 in SFmode, DFmode and XFmode */
971 {4, 4, 4}, /* cost of storing fp registers
972 in SFmode, DFmode and XFmode */
973 2, /* cost of moving MMX register */
974 {2, 2}, /* cost of loading MMX registers
975 in SImode and DImode */
976 {2, 2}, /* cost of storing MMX registers
977 in SImode and DImode */
978 2, /* cost of moving SSE register */
979 {2, 2, 8}, /* cost of loading SSE registers
980 in SImode, DImode and TImode */
981 {2, 2, 8}, /* cost of storing SSE registers
982 in SImode, DImode and TImode */
983 6, /* MMX or SSE register to integer */
984 32, /* size of l1 cache. */
985 32, /* size of l2 cache. Some models
986 have integrated l2 cache, but
987 optimizing for k6 is not important
988 enough to worry about that. */
989 32, /* size of prefetch block */
990 1, /* number of parallel prefetches */
991 1, /* Branch cost */
992 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
993 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
994 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
995 COSTS_N_INSNS (2), /* cost of FABS instruction. */
996 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
997 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
998 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
999 DUMMY_STRINGOP_ALGS},
1000 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
1001 DUMMY_STRINGOP_ALGS},
1002 1, /* scalar_stmt_cost. */
1003 1, /* scalar load_cost. */
1004 1, /* scalar_store_cost. */
1005 1, /* vec_stmt_cost. */
1006 1, /* vec_to_scalar_cost. */
1007 1, /* scalar_to_vec_cost. */
1008 1, /* vec_align_load_cost. */
1009 2, /* vec_unalign_load_cost. */
1010 1, /* vec_store_cost. */
1011 3, /* cond_taken_branch_cost. */
1012 1, /* cond_not_taken_branch_cost. */
1015 static const
1016 struct processor_costs athlon_cost = {
1017 COSTS_N_INSNS (1), /* cost of an add instruction */
1018 COSTS_N_INSNS (2), /* cost of a lea instruction */
1019 COSTS_N_INSNS (1), /* variable shift costs */
1020 COSTS_N_INSNS (1), /* constant shift costs */
1021 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
1022 COSTS_N_INSNS (5), /* HI */
1023 COSTS_N_INSNS (5), /* SI */
1024 COSTS_N_INSNS (5), /* DI */
1025 COSTS_N_INSNS (5)}, /* other */
1026 0, /* cost of multiply per each bit set */
1027 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1028 COSTS_N_INSNS (26), /* HI */
1029 COSTS_N_INSNS (42), /* SI */
1030 COSTS_N_INSNS (74), /* DI */
1031 COSTS_N_INSNS (74)}, /* other */
1032 COSTS_N_INSNS (1), /* cost of movsx */
1033 COSTS_N_INSNS (1), /* cost of movzx */
1034 8, /* "large" insn */
1035 9, /* MOVE_RATIO */
1036 4, /* cost for loading QImode using movzbl */
1037 {3, 4, 3}, /* cost of loading integer registers
1038 in QImode, HImode and SImode.
1039 Relative to reg-reg move (2). */
1040 {3, 4, 3}, /* cost of storing integer registers */
1041 4, /* cost of reg,reg fld/fst */
1042 {4, 4, 12}, /* cost of loading fp registers
1043 in SFmode, DFmode and XFmode */
1044 {6, 6, 8}, /* cost of storing fp registers
1045 in SFmode, DFmode and XFmode */
1046 2, /* cost of moving MMX register */
1047 {4, 4}, /* cost of loading MMX registers
1048 in SImode and DImode */
1049 {4, 4}, /* cost of storing MMX registers
1050 in SImode and DImode */
1051 2, /* cost of moving SSE register */
1052 {4, 4, 6}, /* cost of loading SSE registers
1053 in SImode, DImode and TImode */
1054 {4, 4, 5}, /* cost of storing SSE registers
1055 in SImode, DImode and TImode */
1056 5, /* MMX or SSE register to integer */
1057 64, /* size of l1 cache. */
1058 256, /* size of l2 cache. */
1059 64, /* size of prefetch block */
1060 6, /* number of parallel prefetches */
1061 5, /* Branch cost */
1062 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1063 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1064 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
1065 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1066 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1067 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1068 /* For some reason, Athlon deals better with REP prefix (relative to loops)
1069 compared to K8. Alignment becomes important after 8 bytes for memcpy and
1070 128 bytes for memset. */
1071 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
1072 DUMMY_STRINGOP_ALGS},
1073 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
1074 DUMMY_STRINGOP_ALGS},
1075 1, /* scalar_stmt_cost. */
1076 1, /* scalar load_cost. */
1077 1, /* scalar_store_cost. */
1078 1, /* vec_stmt_cost. */
1079 1, /* vec_to_scalar_cost. */
1080 1, /* scalar_to_vec_cost. */
1081 1, /* vec_align_load_cost. */
1082 2, /* vec_unalign_load_cost. */
1083 1, /* vec_store_cost. */
1084 3, /* cond_taken_branch_cost. */
1085 1, /* cond_not_taken_branch_cost. */
1088 static const
1089 struct processor_costs k8_cost = {
1090 COSTS_N_INSNS (1), /* cost of an add instruction */
1091 COSTS_N_INSNS (2), /* cost of a lea instruction */
1092 COSTS_N_INSNS (1), /* variable shift costs */
1093 COSTS_N_INSNS (1), /* constant shift costs */
1094 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1095 COSTS_N_INSNS (4), /* HI */
1096 COSTS_N_INSNS (3), /* SI */
1097 COSTS_N_INSNS (4), /* DI */
1098 COSTS_N_INSNS (5)}, /* other */
1099 0, /* cost of multiply per each bit set */
1100 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1101 COSTS_N_INSNS (26), /* HI */
1102 COSTS_N_INSNS (42), /* SI */
1103 COSTS_N_INSNS (74), /* DI */
1104 COSTS_N_INSNS (74)}, /* other */
1105 COSTS_N_INSNS (1), /* cost of movsx */
1106 COSTS_N_INSNS (1), /* cost of movzx */
1107 8, /* "large" insn */
1108 9, /* MOVE_RATIO */
1109 4, /* cost for loading QImode using movzbl */
1110 {3, 4, 3}, /* cost of loading integer registers
1111 in QImode, HImode and SImode.
1112 Relative to reg-reg move (2). */
1113 {3, 4, 3}, /* cost of storing integer registers */
1114 4, /* cost of reg,reg fld/fst */
1115 {4, 4, 12}, /* cost of loading fp registers
1116 in SFmode, DFmode and XFmode */
1117 {6, 6, 8}, /* cost of storing fp registers
1118 in SFmode, DFmode and XFmode */
1119 2, /* cost of moving MMX register */
1120 {3, 3}, /* cost of loading MMX registers
1121 in SImode and DImode */
1122 {4, 4}, /* cost of storing MMX registers
1123 in SImode and DImode */
1124 2, /* cost of moving SSE register */
1125 {4, 3, 6}, /* cost of loading SSE registers
1126 in SImode, DImode and TImode */
1127 {4, 4, 5}, /* cost of storing SSE registers
1128 in SImode, DImode and TImode */
1129 5, /* MMX or SSE register to integer */
1130 64, /* size of l1 cache. */
1131 512, /* size of l2 cache. */
1132 64, /* size of prefetch block */
1133 /* New AMD processors never drop prefetches; if they cannot be performed
1134 immediately, they are queued. We set number of simultaneous prefetches
1135 to a large constant to reflect this (it probably is not a good idea not
1136 to limit number of prefetches at all, as their execution also takes some
1137 time). */
1138 100, /* number of parallel prefetches */
1139 3, /* Branch cost */
1140 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1141 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1142 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1143 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1144 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1145 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1146 /* K8 has optimized REP instruction for medium sized blocks, but for very
1147 small blocks it is better to use loop. For large blocks, libcall can
1148 do nontemporary accesses and beat inline considerably. */
1149 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1150 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1151 {{libcall, {{8, loop}, {24, unrolled_loop},
1152 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1153 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1154 4, /* scalar_stmt_cost. */
1155 2, /* scalar load_cost. */
1156 2, /* scalar_store_cost. */
1157 5, /* vec_stmt_cost. */
1158 0, /* vec_to_scalar_cost. */
1159 2, /* scalar_to_vec_cost. */
1160 2, /* vec_align_load_cost. */
1161 3, /* vec_unalign_load_cost. */
1162 3, /* vec_store_cost. */
1163 3, /* cond_taken_branch_cost. */
1164 2, /* cond_not_taken_branch_cost. */
1167 struct processor_costs amdfam10_cost = {
1168 COSTS_N_INSNS (1), /* cost of an add instruction */
1169 COSTS_N_INSNS (2), /* cost of a lea instruction */
1170 COSTS_N_INSNS (1), /* variable shift costs */
1171 COSTS_N_INSNS (1), /* constant shift costs */
1172 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1173 COSTS_N_INSNS (4), /* HI */
1174 COSTS_N_INSNS (3), /* SI */
1175 COSTS_N_INSNS (4), /* DI */
1176 COSTS_N_INSNS (5)}, /* other */
1177 0, /* cost of multiply per each bit set */
1178 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1179 COSTS_N_INSNS (35), /* HI */
1180 COSTS_N_INSNS (51), /* SI */
1181 COSTS_N_INSNS (83), /* DI */
1182 COSTS_N_INSNS (83)}, /* other */
1183 COSTS_N_INSNS (1), /* cost of movsx */
1184 COSTS_N_INSNS (1), /* cost of movzx */
1185 8, /* "large" insn */
1186 9, /* MOVE_RATIO */
1187 4, /* cost for loading QImode using movzbl */
1188 {3, 4, 3}, /* cost of loading integer registers
1189 in QImode, HImode and SImode.
1190 Relative to reg-reg move (2). */
1191 {3, 4, 3}, /* cost of storing integer registers */
1192 4, /* cost of reg,reg fld/fst */
1193 {4, 4, 12}, /* cost of loading fp registers
1194 in SFmode, DFmode and XFmode */
1195 {6, 6, 8}, /* cost of storing fp registers
1196 in SFmode, DFmode and XFmode */
1197 2, /* cost of moving MMX register */
1198 {3, 3}, /* cost of loading MMX registers
1199 in SImode and DImode */
1200 {4, 4}, /* cost of storing MMX registers
1201 in SImode and DImode */
1202 2, /* cost of moving SSE register */
1203 {4, 4, 3}, /* cost of loading SSE registers
1204 in SImode, DImode and TImode */
1205 {4, 4, 5}, /* cost of storing SSE registers
1206 in SImode, DImode and TImode */
1207 3, /* MMX or SSE register to integer */
1208 /* On K8:
1209 MOVD reg64, xmmreg Double FSTORE 4
1210 MOVD reg32, xmmreg Double FSTORE 4
1211 On AMDFAM10:
1212 MOVD reg64, xmmreg Double FADD 3
1213 1/1 1/1
1214 MOVD reg32, xmmreg Double FADD 3
1215 1/1 1/1 */
1216 64, /* size of l1 cache. */
1217 512, /* size of l2 cache. */
1218 64, /* size of prefetch block */
1219 /* New AMD processors never drop prefetches; if they cannot be performed
1220 immediately, they are queued. We set number of simultaneous prefetches
1221 to a large constant to reflect this (it probably is not a good idea not
1222 to limit number of prefetches at all, as their execution also takes some
1223 time). */
1224 100, /* number of parallel prefetches */
1225 2, /* Branch cost */
1226 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1227 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1228 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1229 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1230 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1231 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1233 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
1234 very small blocks it is better to use loop. For large blocks, libcall can
1235 do nontemporary accesses and beat inline considerably. */
1236 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1237 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1238 {{libcall, {{8, loop}, {24, unrolled_loop},
1239 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1240 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1241 4, /* scalar_stmt_cost. */
1242 2, /* scalar load_cost. */
1243 2, /* scalar_store_cost. */
1244 6, /* vec_stmt_cost. */
1245 0, /* vec_to_scalar_cost. */
1246 2, /* scalar_to_vec_cost. */
1247 2, /* vec_align_load_cost. */
1248 2, /* vec_unalign_load_cost. */
1249 2, /* vec_store_cost. */
1250 2, /* cond_taken_branch_cost. */
1251 1, /* cond_not_taken_branch_cost. */
1254 struct processor_costs bdver1_cost = {
1255 COSTS_N_INSNS (1), /* cost of an add instruction */
1256 COSTS_N_INSNS (1), /* cost of a lea instruction */
1257 COSTS_N_INSNS (1), /* variable shift costs */
1258 COSTS_N_INSNS (1), /* constant shift costs */
1259 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1260 COSTS_N_INSNS (4), /* HI */
1261 COSTS_N_INSNS (4), /* SI */
1262 COSTS_N_INSNS (6), /* DI */
1263 COSTS_N_INSNS (6)}, /* other */
1264 0, /* cost of multiply per each bit set */
1265 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1266 COSTS_N_INSNS (35), /* HI */
1267 COSTS_N_INSNS (51), /* SI */
1268 COSTS_N_INSNS (83), /* DI */
1269 COSTS_N_INSNS (83)}, /* other */
1270 COSTS_N_INSNS (1), /* cost of movsx */
1271 COSTS_N_INSNS (1), /* cost of movzx */
1272 8, /* "large" insn */
1273 9, /* MOVE_RATIO */
1274 4, /* cost for loading QImode using movzbl */
1275 {5, 5, 4}, /* cost of loading integer registers
1276 in QImode, HImode and SImode.
1277 Relative to reg-reg move (2). */
1278 {4, 4, 4}, /* cost of storing integer registers */
1279 2, /* cost of reg,reg fld/fst */
1280 {5, 5, 12}, /* cost of loading fp registers
1281 in SFmode, DFmode and XFmode */
1282 {4, 4, 8}, /* cost of storing fp registers
1283 in SFmode, DFmode and XFmode */
1284 2, /* cost of moving MMX register */
1285 {4, 4}, /* cost of loading MMX registers
1286 in SImode and DImode */
1287 {4, 4}, /* cost of storing MMX registers
1288 in SImode and DImode */
1289 2, /* cost of moving SSE register */
1290 {4, 4, 4}, /* cost of loading SSE registers
1291 in SImode, DImode and TImode */
1292 {4, 4, 4}, /* cost of storing SSE registers
1293 in SImode, DImode and TImode */
1294 2, /* MMX or SSE register to integer */
1295 /* On K8:
1296 MOVD reg64, xmmreg Double FSTORE 4
1297 MOVD reg32, xmmreg Double FSTORE 4
1298 On AMDFAM10:
1299 MOVD reg64, xmmreg Double FADD 3
1300 1/1 1/1
1301 MOVD reg32, xmmreg Double FADD 3
1302 1/1 1/1 */
1303 16, /* size of l1 cache. */
1304 2048, /* size of l2 cache. */
1305 64, /* size of prefetch block */
1306 /* New AMD processors never drop prefetches; if they cannot be performed
1307 immediately, they are queued. We set number of simultaneous prefetches
1308 to a large constant to reflect this (it probably is not a good idea not
1309 to limit number of prefetches at all, as their execution also takes some
1310 time). */
1311 100, /* number of parallel prefetches */
1312 2, /* Branch cost */
1313 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1314 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1315 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1316 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1317 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1318 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1320 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
1321 very small blocks it is better to use loop. For large blocks, libcall
1322 can do nontemporary accesses and beat inline considerably. */
1323 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1324 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1325 {{libcall, {{8, loop}, {24, unrolled_loop},
1326 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1327 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1328 6, /* scalar_stmt_cost. */
1329 4, /* scalar load_cost. */
1330 4, /* scalar_store_cost. */
1331 6, /* vec_stmt_cost. */
1332 0, /* vec_to_scalar_cost. */
1333 2, /* scalar_to_vec_cost. */
1334 4, /* vec_align_load_cost. */
1335 4, /* vec_unalign_load_cost. */
1336 4, /* vec_store_cost. */
1337 2, /* cond_taken_branch_cost. */
1338 1, /* cond_not_taken_branch_cost. */
1341 struct processor_costs btver1_cost = {
1342 COSTS_N_INSNS (1), /* cost of an add instruction */
1343 COSTS_N_INSNS (2), /* cost of a lea instruction */
1344 COSTS_N_INSNS (1), /* variable shift costs */
1345 COSTS_N_INSNS (1), /* constant shift costs */
1346 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1347 COSTS_N_INSNS (4), /* HI */
1348 COSTS_N_INSNS (3), /* SI */
1349 COSTS_N_INSNS (4), /* DI */
1350 COSTS_N_INSNS (5)}, /* other */
1351 0, /* cost of multiply per each bit set */
1352 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1353 COSTS_N_INSNS (35), /* HI */
1354 COSTS_N_INSNS (51), /* SI */
1355 COSTS_N_INSNS (83), /* DI */
1356 COSTS_N_INSNS (83)}, /* other */
1357 COSTS_N_INSNS (1), /* cost of movsx */
1358 COSTS_N_INSNS (1), /* cost of movzx */
1359 8, /* "large" insn */
1360 9, /* MOVE_RATIO */
1361 4, /* cost for loading QImode using movzbl */
1362 {3, 4, 3}, /* cost of loading integer registers
1363 in QImode, HImode and SImode.
1364 Relative to reg-reg move (2). */
1365 {3, 4, 3}, /* cost of storing integer registers */
1366 4, /* cost of reg,reg fld/fst */
1367 {4, 4, 12}, /* cost of loading fp registers
1368 in SFmode, DFmode and XFmode */
1369 {6, 6, 8}, /* cost of storing fp registers
1370 in SFmode, DFmode and XFmode */
1371 2, /* cost of moving MMX register */
1372 {3, 3}, /* cost of loading MMX registers
1373 in SImode and DImode */
1374 {4, 4}, /* cost of storing MMX registers
1375 in SImode and DImode */
1376 2, /* cost of moving SSE register */
1377 {4, 4, 3}, /* cost of loading SSE registers
1378 in SImode, DImode and TImode */
1379 {4, 4, 5}, /* cost of storing SSE registers
1380 in SImode, DImode and TImode */
1381 3, /* MMX or SSE register to integer */
1382 /* On K8:
1383 MOVD reg64, xmmreg Double FSTORE 4
1384 MOVD reg32, xmmreg Double FSTORE 4
1385 On AMDFAM10:
1386 MOVD reg64, xmmreg Double FADD 3
1387 1/1 1/1
1388 MOVD reg32, xmmreg Double FADD 3
1389 1/1 1/1 */
1390 32, /* size of l1 cache. */
1391 512, /* size of l2 cache. */
1392 64, /* size of prefetch block */
1393 100, /* number of parallel prefetches */
1394 2, /* Branch cost */
1395 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1396 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1397 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1398 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1399 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1400 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1402 /* BTVER1 has optimized REP instruction for medium sized blocks, but for
1403 very small blocks it is better to use loop. For large blocks, libcall can
1404 do nontemporary accesses and beat inline considerably. */
1405 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1406 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1407 {{libcall, {{8, loop}, {24, unrolled_loop},
1408 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1409 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1410 4, /* scalar_stmt_cost. */
1411 2, /* scalar load_cost. */
1412 2, /* scalar_store_cost. */
1413 6, /* vec_stmt_cost. */
1414 0, /* vec_to_scalar_cost. */
1415 2, /* scalar_to_vec_cost. */
1416 2, /* vec_align_load_cost. */
1417 2, /* vec_unalign_load_cost. */
1418 2, /* vec_store_cost. */
1419 2, /* cond_taken_branch_cost. */
1420 1, /* cond_not_taken_branch_cost. */
1423 static const
1424 struct processor_costs pentium4_cost = {
1425 COSTS_N_INSNS (1), /* cost of an add instruction */
1426 COSTS_N_INSNS (3), /* cost of a lea instruction */
1427 COSTS_N_INSNS (4), /* variable shift costs */
1428 COSTS_N_INSNS (4), /* constant shift costs */
1429 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
1430 COSTS_N_INSNS (15), /* HI */
1431 COSTS_N_INSNS (15), /* SI */
1432 COSTS_N_INSNS (15), /* DI */
1433 COSTS_N_INSNS (15)}, /* other */
1434 0, /* cost of multiply per each bit set */
1435 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
1436 COSTS_N_INSNS (56), /* HI */
1437 COSTS_N_INSNS (56), /* SI */
1438 COSTS_N_INSNS (56), /* DI */
1439 COSTS_N_INSNS (56)}, /* other */
1440 COSTS_N_INSNS (1), /* cost of movsx */
1441 COSTS_N_INSNS (1), /* cost of movzx */
1442 16, /* "large" insn */
1443 6, /* MOVE_RATIO */
1444 2, /* cost for loading QImode using movzbl */
1445 {4, 5, 4}, /* cost of loading integer registers
1446 in QImode, HImode and SImode.
1447 Relative to reg-reg move (2). */
1448 {2, 3, 2}, /* cost of storing integer registers */
1449 2, /* cost of reg,reg fld/fst */
1450 {2, 2, 6}, /* cost of loading fp registers
1451 in SFmode, DFmode and XFmode */
1452 {4, 4, 6}, /* cost of storing fp registers
1453 in SFmode, DFmode and XFmode */
1454 2, /* cost of moving MMX register */
1455 {2, 2}, /* cost of loading MMX registers
1456 in SImode and DImode */
1457 {2, 2}, /* cost of storing MMX registers
1458 in SImode and DImode */
1459 12, /* cost of moving SSE register */
1460 {12, 12, 12}, /* cost of loading SSE registers
1461 in SImode, DImode and TImode */
1462 {2, 2, 8}, /* cost of storing SSE registers
1463 in SImode, DImode and TImode */
1464 10, /* MMX or SSE register to integer */
1465 8, /* size of l1 cache. */
1466 256, /* size of l2 cache. */
1467 64, /* size of prefetch block */
1468 6, /* number of parallel prefetches */
1469 2, /* Branch cost */
1470 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1471 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
1472 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
1473 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1474 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1475 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
1476 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
1477 DUMMY_STRINGOP_ALGS},
1478 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
1479 {-1, libcall}}},
1480 DUMMY_STRINGOP_ALGS},
1481 1, /* scalar_stmt_cost. */
1482 1, /* scalar load_cost. */
1483 1, /* scalar_store_cost. */
1484 1, /* vec_stmt_cost. */
1485 1, /* vec_to_scalar_cost. */
1486 1, /* scalar_to_vec_cost. */
1487 1, /* vec_align_load_cost. */
1488 2, /* vec_unalign_load_cost. */
1489 1, /* vec_store_cost. */
1490 3, /* cond_taken_branch_cost. */
1491 1, /* cond_not_taken_branch_cost. */
1494 static const
1495 struct processor_costs nocona_cost = {
1496 COSTS_N_INSNS (1), /* cost of an add instruction */
1497 COSTS_N_INSNS (1), /* cost of a lea instruction */
1498 COSTS_N_INSNS (1), /* variable shift costs */
1499 COSTS_N_INSNS (1), /* constant shift costs */
1500 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
1501 COSTS_N_INSNS (10), /* HI */
1502 COSTS_N_INSNS (10), /* SI */
1503 COSTS_N_INSNS (10), /* DI */
1504 COSTS_N_INSNS (10)}, /* other */
1505 0, /* cost of multiply per each bit set */
1506 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
1507 COSTS_N_INSNS (66), /* HI */
1508 COSTS_N_INSNS (66), /* SI */
1509 COSTS_N_INSNS (66), /* DI */
1510 COSTS_N_INSNS (66)}, /* other */
1511 COSTS_N_INSNS (1), /* cost of movsx */
1512 COSTS_N_INSNS (1), /* cost of movzx */
1513 16, /* "large" insn */
1514 17, /* MOVE_RATIO */
1515 4, /* cost for loading QImode using movzbl */
1516 {4, 4, 4}, /* cost of loading integer registers
1517 in QImode, HImode and SImode.
1518 Relative to reg-reg move (2). */
1519 {4, 4, 4}, /* cost of storing integer registers */
1520 3, /* cost of reg,reg fld/fst */
1521 {12, 12, 12}, /* cost of loading fp registers
1522 in SFmode, DFmode and XFmode */
1523 {4, 4, 4}, /* cost of storing fp registers
1524 in SFmode, DFmode and XFmode */
1525 6, /* cost of moving MMX register */
1526 {12, 12}, /* cost of loading MMX registers
1527 in SImode and DImode */
1528 {12, 12}, /* cost of storing MMX registers
1529 in SImode and DImode */
1530 6, /* cost of moving SSE register */
1531 {12, 12, 12}, /* cost of loading SSE registers
1532 in SImode, DImode and TImode */
1533 {12, 12, 12}, /* cost of storing SSE registers
1534 in SImode, DImode and TImode */
1535 8, /* MMX or SSE register to integer */
1536 8, /* size of l1 cache. */
1537 1024, /* size of l2 cache. */
1538 128, /* size of prefetch block */
1539 8, /* number of parallel prefetches */
1540 1, /* Branch cost */
1541 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1542 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1543 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1544 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1545 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1546 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1547 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
1548 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
1549 {100000, unrolled_loop}, {-1, libcall}}}},
1550 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
1551 {-1, libcall}}},
1552 {libcall, {{24, loop}, {64, unrolled_loop},
1553 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1554 1, /* scalar_stmt_cost. */
1555 1, /* scalar load_cost. */
1556 1, /* scalar_store_cost. */
1557 1, /* vec_stmt_cost. */
1558 1, /* vec_to_scalar_cost. */
1559 1, /* scalar_to_vec_cost. */
1560 1, /* vec_align_load_cost. */
1561 2, /* vec_unalign_load_cost. */
1562 1, /* vec_store_cost. */
1563 3, /* cond_taken_branch_cost. */
1564 1, /* cond_not_taken_branch_cost. */
1567 static const
1568 struct processor_costs atom_cost = {
1569 COSTS_N_INSNS (1), /* cost of an add instruction */
1570 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1571 COSTS_N_INSNS (1), /* variable shift costs */
1572 COSTS_N_INSNS (1), /* constant shift costs */
1573 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1574 COSTS_N_INSNS (4), /* HI */
1575 COSTS_N_INSNS (3), /* SI */
1576 COSTS_N_INSNS (4), /* DI */
1577 COSTS_N_INSNS (2)}, /* other */
1578 0, /* cost of multiply per each bit set */
1579 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1580 COSTS_N_INSNS (26), /* HI */
1581 COSTS_N_INSNS (42), /* SI */
1582 COSTS_N_INSNS (74), /* DI */
1583 COSTS_N_INSNS (74)}, /* other */
1584 COSTS_N_INSNS (1), /* cost of movsx */
1585 COSTS_N_INSNS (1), /* cost of movzx */
1586 8, /* "large" insn */
1587 17, /* MOVE_RATIO */
1588 2, /* cost for loading QImode using movzbl */
1589 {4, 4, 4}, /* cost of loading integer registers
1590 in QImode, HImode and SImode.
1591 Relative to reg-reg move (2). */
1592 {4, 4, 4}, /* cost of storing integer registers */
1593 4, /* cost of reg,reg fld/fst */
1594 {12, 12, 12}, /* cost of loading fp registers
1595 in SFmode, DFmode and XFmode */
1596 {6, 6, 8}, /* cost of storing fp registers
1597 in SFmode, DFmode and XFmode */
1598 2, /* cost of moving MMX register */
1599 {8, 8}, /* cost of loading MMX registers
1600 in SImode and DImode */
1601 {8, 8}, /* cost of storing MMX registers
1602 in SImode and DImode */
1603 2, /* cost of moving SSE register */
1604 {8, 8, 8}, /* cost of loading SSE registers
1605 in SImode, DImode and TImode */
1606 {8, 8, 8}, /* cost of storing SSE registers
1607 in SImode, DImode and TImode */
1608 5, /* MMX or SSE register to integer */
1609 32, /* size of l1 cache. */
1610 256, /* size of l2 cache. */
1611 64, /* size of prefetch block */
1612 6, /* number of parallel prefetches */
1613 3, /* Branch cost */
1614 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1615 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1616 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1617 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1618 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1619 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1620 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1621 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1622 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1623 {{libcall, {{8, loop}, {15, unrolled_loop},
1624 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1625 {libcall, {{24, loop}, {32, unrolled_loop},
1626 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1627 1, /* scalar_stmt_cost. */
1628 1, /* scalar load_cost. */
1629 1, /* scalar_store_cost. */
1630 1, /* vec_stmt_cost. */
1631 1, /* vec_to_scalar_cost. */
1632 1, /* scalar_to_vec_cost. */
1633 1, /* vec_align_load_cost. */
1634 2, /* vec_unalign_load_cost. */
1635 1, /* vec_store_cost. */
1636 3, /* cond_taken_branch_cost. */
1637 1, /* cond_not_taken_branch_cost. */
1640 /* Generic64 should produce code tuned for Nocona and K8. */
1641 static const
1642 struct processor_costs generic64_cost = {
1643 COSTS_N_INSNS (1), /* cost of an add instruction */
1644 /* On all chips taken into consideration lea is 2 cycles and more. With
1645 this cost however our current implementation of synth_mult results in
1646 use of unnecessary temporary registers causing regression on several
1647 SPECfp benchmarks. */
1648 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1649 COSTS_N_INSNS (1), /* variable shift costs */
1650 COSTS_N_INSNS (1), /* constant shift costs */
1651 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1652 COSTS_N_INSNS (4), /* HI */
1653 COSTS_N_INSNS (3), /* SI */
1654 COSTS_N_INSNS (4), /* DI */
1655 COSTS_N_INSNS (2)}, /* other */
1656 0, /* cost of multiply per each bit set */
1657 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1658 COSTS_N_INSNS (26), /* HI */
1659 COSTS_N_INSNS (42), /* SI */
1660 COSTS_N_INSNS (74), /* DI */
1661 COSTS_N_INSNS (74)}, /* other */
1662 COSTS_N_INSNS (1), /* cost of movsx */
1663 COSTS_N_INSNS (1), /* cost of movzx */
1664 8, /* "large" insn */
1665 17, /* MOVE_RATIO */
1666 4, /* cost for loading QImode using movzbl */
1667 {4, 4, 4}, /* cost of loading integer registers
1668 in QImode, HImode and SImode.
1669 Relative to reg-reg move (2). */
1670 {4, 4, 4}, /* cost of storing integer registers */
1671 4, /* cost of reg,reg fld/fst */
1672 {12, 12, 12}, /* cost of loading fp registers
1673 in SFmode, DFmode and XFmode */
1674 {6, 6, 8}, /* cost of storing fp registers
1675 in SFmode, DFmode and XFmode */
1676 2, /* cost of moving MMX register */
1677 {8, 8}, /* cost of loading MMX registers
1678 in SImode and DImode */
1679 {8, 8}, /* cost of storing MMX registers
1680 in SImode and DImode */
1681 2, /* cost of moving SSE register */
1682 {8, 8, 8}, /* cost of loading SSE registers
1683 in SImode, DImode and TImode */
1684 {8, 8, 8}, /* cost of storing SSE registers
1685 in SImode, DImode and TImode */
1686 5, /* MMX or SSE register to integer */
1687 32, /* size of l1 cache. */
1688 512, /* size of l2 cache. */
1689 64, /* size of prefetch block */
1690 6, /* number of parallel prefetches */
1691 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1692 value is increased to perhaps more appropriate value of 5. */
1693 3, /* Branch cost */
1694 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1695 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1696 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1697 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1698 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1699 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1700 {DUMMY_STRINGOP_ALGS,
1701 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1702 {DUMMY_STRINGOP_ALGS,
1703 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1704 1, /* scalar_stmt_cost. */
1705 1, /* scalar load_cost. */
1706 1, /* scalar_store_cost. */
1707 1, /* vec_stmt_cost. */
1708 1, /* vec_to_scalar_cost. */
1709 1, /* scalar_to_vec_cost. */
1710 1, /* vec_align_load_cost. */
1711 2, /* vec_unalign_load_cost. */
1712 1, /* vec_store_cost. */
1713 3, /* cond_taken_branch_cost. */
1714 1, /* cond_not_taken_branch_cost. */
1717 /* Generic32 should produce code tuned for PPro, Pentium4, Nocona,
1718 Athlon and K8. */
1719 static const
1720 struct processor_costs generic32_cost = {
1721 COSTS_N_INSNS (1), /* cost of an add instruction */
1722 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1723 COSTS_N_INSNS (1), /* variable shift costs */
1724 COSTS_N_INSNS (1), /* constant shift costs */
1725 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1726 COSTS_N_INSNS (4), /* HI */
1727 COSTS_N_INSNS (3), /* SI */
1728 COSTS_N_INSNS (4), /* DI */
1729 COSTS_N_INSNS (2)}, /* other */
1730 0, /* cost of multiply per each bit set */
1731 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1732 COSTS_N_INSNS (26), /* HI */
1733 COSTS_N_INSNS (42), /* SI */
1734 COSTS_N_INSNS (74), /* DI */
1735 COSTS_N_INSNS (74)}, /* other */
1736 COSTS_N_INSNS (1), /* cost of movsx */
1737 COSTS_N_INSNS (1), /* cost of movzx */
1738 8, /* "large" insn */
1739 17, /* MOVE_RATIO */
1740 4, /* cost for loading QImode using movzbl */
1741 {4, 4, 4}, /* cost of loading integer registers
1742 in QImode, HImode and SImode.
1743 Relative to reg-reg move (2). */
1744 {4, 4, 4}, /* cost of storing integer registers */
1745 4, /* cost of reg,reg fld/fst */
1746 {12, 12, 12}, /* cost of loading fp registers
1747 in SFmode, DFmode and XFmode */
1748 {6, 6, 8}, /* cost of storing fp registers
1749 in SFmode, DFmode and XFmode */
1750 2, /* cost of moving MMX register */
1751 {8, 8}, /* cost of loading MMX registers
1752 in SImode and DImode */
1753 {8, 8}, /* cost of storing MMX registers
1754 in SImode and DImode */
1755 2, /* cost of moving SSE register */
1756 {8, 8, 8}, /* cost of loading SSE registers
1757 in SImode, DImode and TImode */
1758 {8, 8, 8}, /* cost of storing SSE registers
1759 in SImode, DImode and TImode */
1760 5, /* MMX or SSE register to integer */
1761 32, /* size of l1 cache. */
1762 256, /* size of l2 cache. */
1763 64, /* size of prefetch block */
1764 6, /* number of parallel prefetches */
1765 3, /* Branch cost */
1766 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1767 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1768 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1769 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1770 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1771 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1772 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1773 DUMMY_STRINGOP_ALGS},
1774 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1775 DUMMY_STRINGOP_ALGS},
1776 1, /* scalar_stmt_cost. */
1777 1, /* scalar load_cost. */
1778 1, /* scalar_store_cost. */
1779 1, /* vec_stmt_cost. */
1780 1, /* vec_to_scalar_cost. */
1781 1, /* scalar_to_vec_cost. */
1782 1, /* vec_align_load_cost. */
1783 2, /* vec_unalign_load_cost. */
1784 1, /* vec_store_cost. */
1785 3, /* cond_taken_branch_cost. */
1786 1, /* cond_not_taken_branch_cost. */
1789 const struct processor_costs *ix86_cost = &pentium_cost;
1791 /* Processor feature/optimization bitmasks. */
1792 #define m_386 (1<<PROCESSOR_I386)
1793 #define m_486 (1<<PROCESSOR_I486)
1794 #define m_PENT (1<<PROCESSOR_PENTIUM)
1795 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1796 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1797 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1798 #define m_CORE2_32 (1<<PROCESSOR_CORE2_32)
1799 #define m_CORE2_64 (1<<PROCESSOR_CORE2_64)
1800 #define m_COREI7_32 (1<<PROCESSOR_COREI7_32)
1801 #define m_COREI7_64 (1<<PROCESSOR_COREI7_64)
1802 #define m_COREI7 (m_COREI7_32 | m_COREI7_64)
1803 #define m_CORE2I7_32 (m_CORE2_32 | m_COREI7_32)
1804 #define m_CORE2I7_64 (m_CORE2_64 | m_COREI7_64)
1805 #define m_CORE2I7 (m_CORE2I7_32 | m_CORE2I7_64)
1806 #define m_ATOM (1<<PROCESSOR_ATOM)
1808 #define m_GEODE (1<<PROCESSOR_GEODE)
1809 #define m_K6 (1<<PROCESSOR_K6)
1810 #define m_K6_GEODE (m_K6 | m_GEODE)
1811 #define m_K8 (1<<PROCESSOR_K8)
1812 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1813 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1814 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1815 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
1816 #define m_BTVER1 (1<<PROCESSOR_BTVER1)
1817 #define m_AMD_MULTIPLE (m_K8 | m_ATHLON | m_AMDFAM10 | m_BDVER1 | m_BTVER1)
1819 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1820 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1822 /* Generic instruction choice should be common subset of supported CPUs
1823 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1824 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1826 /* Feature tests against the various tunings. */
1827 unsigned char ix86_tune_features[X86_TUNE_LAST];
1829 /* Feature tests against the various tunings used to create ix86_tune_features
1830 based on the processor mask. */
1831 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
1832 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1833 negatively, so enabling for Generic64 seems like good code size
1834 tradeoff. We can't enable it for 32bit generic because it does not
1835 work well with PPro base chips. */
1836 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_CORE2I7_64 | m_GENERIC64,
1838 /* X86_TUNE_PUSH_MEMORY */
1839 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4
1840 | m_NOCONA | m_CORE2I7 | m_GENERIC,
1842 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1843 m_486 | m_PENT,
1845 /* X86_TUNE_UNROLL_STRLEN */
1846 m_486 | m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_K6
1847 | m_CORE2I7 | m_GENERIC,
1849 /* X86_TUNE_DEEP_BRANCH_PREDICTION */
1850 m_ATOM | m_PPRO | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4
1851 | m_CORE2I7 | m_GENERIC,
1853 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1854 on simulation result. But after P4 was made, no performance benefit
1855 was observed with branch hints. It also increases the code size.
1856 As a result, icc never generates branch hints. */
1859 /* X86_TUNE_DOUBLE_WITH_ADD */
1860 ~m_386,
1862 /* X86_TUNE_USE_SAHF */
1863 m_ATOM | m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_BDVER1 | m_BTVER1
1864 | m_PENT4 | m_NOCONA | m_CORE2I7 | m_GENERIC,
1866 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1867 partial dependencies. */
1868 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_PENT4 | m_NOCONA
1869 | m_CORE2I7 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
1871 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1872 register stalls on Generic32 compilation setting as well. However
1873 in current implementation the partial register stalls are not eliminated
1874 very well - they can be introduced via subregs synthesized by combine
1875 and can happen in caller/callee saving sequences. Because this option
1876 pays back little on PPro based chips and is in conflict with partial reg
1877 dependencies used by Athlon/P4 based chips, it is better to leave it off
1878 for generic32 for now. */
1879 m_PPRO,
1881 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1882 m_CORE2I7 | m_GENERIC,
1884 /* X86_TUNE_USE_HIMODE_FIOP */
1885 m_386 | m_486 | m_K6_GEODE,
1887 /* X86_TUNE_USE_SIMODE_FIOP */
1888 ~(m_PPRO | m_AMD_MULTIPLE | m_PENT | m_ATOM | m_CORE2I7 | m_GENERIC),
1890 /* X86_TUNE_USE_MOV0 */
1891 m_K6,
1893 /* X86_TUNE_USE_CLTD */
1894 ~(m_PENT | m_ATOM | m_K6 | m_CORE2I7 | m_GENERIC),
1896 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1897 m_PENT4,
1899 /* X86_TUNE_SPLIT_LONG_MOVES */
1900 m_PPRO,
1902 /* X86_TUNE_READ_MODIFY_WRITE */
1903 ~m_PENT,
1905 /* X86_TUNE_READ_MODIFY */
1906 ~(m_PENT | m_PPRO),
1908 /* X86_TUNE_PROMOTE_QIMODE */
1909 m_K6_GEODE | m_PENT | m_ATOM | m_386 | m_486 | m_AMD_MULTIPLE
1910 | m_CORE2I7 | m_GENERIC /* | m_PENT4 ? */,
1912 /* X86_TUNE_FAST_PREFIX */
1913 ~(m_PENT | m_486 | m_386),
1915 /* X86_TUNE_SINGLE_STRINGOP */
1916 m_386 | m_PENT4 | m_NOCONA,
1918 /* X86_TUNE_QIMODE_MATH */
1921 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1922 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1923 might be considered for Generic32 if our scheme for avoiding partial
1924 stalls was more effective. */
1925 ~m_PPRO,
1927 /* X86_TUNE_PROMOTE_QI_REGS */
1930 /* X86_TUNE_PROMOTE_HI_REGS */
1931 m_PPRO,
1933 /* X86_TUNE_SINGLE_POP: Enable if single pop insn is preferred
1934 over esp addition. */
1935 m_386 | m_486 | m_PENT | m_PPRO,
1937 /* X86_TUNE_DOUBLE_POP: Enable if double pop insn is preferred
1938 over esp addition. */
1939 m_PENT,
1941 /* X86_TUNE_SINGLE_PUSH: Enable if single push insn is preferred
1942 over esp subtraction. */
1943 m_386 | m_486 | m_PENT | m_K6_GEODE,
1945 /* X86_TUNE_DOUBLE_PUSH. Enable if double push insn is preferred
1946 over esp subtraction. */
1947 m_PENT | m_K6_GEODE,
1949 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1950 for DFmode copies */
1951 ~(m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2I7
1952 | m_GENERIC | m_GEODE),
1954 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1955 m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2I7 | m_GENERIC,
1957 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1958 conflict here in between PPro/Pentium4 based chips that thread 128bit
1959 SSE registers as single units versus K8 based chips that divide SSE
1960 registers to two 64bit halves. This knob promotes all store destinations
1961 to be 128bit to allow register renaming on 128bit SSE units, but usually
1962 results in one extra microop on 64bit SSE units. Experimental results
1963 shows that disabling this option on P4 brings over 20% SPECfp regression,
1964 while enabling it on K8 brings roughly 2.4% regression that can be partly
1965 masked by careful scheduling of moves. */
1966 m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2I7 | m_GENERIC
1967 | m_AMDFAM10 | m_BDVER1,
1969 /* X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL */
1970 m_AMDFAM10 | m_BDVER1 | m_BTVER1 | m_COREI7,
1972 /* X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL */
1973 m_BDVER1 | m_COREI7,
1975 /* X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL */
1976 m_BDVER1,
1978 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1979 are resolved on SSE register parts instead of whole registers, so we may
1980 maintain just lower part of scalar values in proper format leaving the
1981 upper part undefined. */
1982 m_ATHLON_K8,
1984 /* X86_TUNE_SSE_TYPELESS_STORES */
1985 m_AMD_MULTIPLE,
1987 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1988 m_PPRO | m_PENT4 | m_NOCONA,
1990 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1991 m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2I7 | m_GENERIC,
1993 /* X86_TUNE_PROLOGUE_USING_MOVE */
1994 m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2I7 | m_GENERIC,
1996 /* X86_TUNE_EPILOGUE_USING_MOVE */
1997 m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2I7 | m_GENERIC,
1999 /* X86_TUNE_SHIFT1 */
2000 ~m_486,
2002 /* X86_TUNE_USE_FFREEP */
2003 m_AMD_MULTIPLE,
2005 /* X86_TUNE_INTER_UNIT_MOVES */
2006 ~(m_AMD_MULTIPLE | m_GENERIC),
2008 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
2009 ~(m_AMDFAM10 | m_BDVER1),
2011 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
2012 than 4 branch instructions in the 16 byte window. */
2013 m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2I7
2014 | m_GENERIC,
2016 /* X86_TUNE_SCHEDULE */
2017 m_PPRO | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT | m_ATOM | m_CORE2I7
2018 | m_GENERIC,
2020 /* X86_TUNE_USE_BT */
2021 m_AMD_MULTIPLE | m_ATOM | m_CORE2I7 | m_GENERIC,
2023 /* X86_TUNE_USE_INCDEC */
2024 ~(m_PENT4 | m_NOCONA | m_CORE2I7 | m_GENERIC | m_ATOM),
2026 /* X86_TUNE_PAD_RETURNS */
2027 m_AMD_MULTIPLE | m_CORE2I7 | m_GENERIC,
2029 /* X86_TUNE_PAD_SHORT_FUNCTION: Pad short funtion. */
2030 m_ATOM,
2032 /* X86_TUNE_EXT_80387_CONSTANTS */
2033 m_K6_GEODE | m_ATHLON_K8 | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO
2034 | m_CORE2I7 | m_GENERIC,
2036 /* X86_TUNE_SHORTEN_X87_SSE */
2037 ~m_K8,
2039 /* X86_TUNE_AVOID_VECTOR_DECODE */
2040 m_K8 | m_CORE2I7_64 | m_GENERIC64,
2042 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
2043 and SImode multiply, but 386 and 486 do HImode multiply faster. */
2044 ~(m_386 | m_486),
2046 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
2047 vector path on AMD machines. */
2048 m_K8 | m_CORE2I7_64 | m_GENERIC64 | m_AMDFAM10 | m_BDVER1 | m_BTVER1,
2050 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
2051 machines. */
2052 m_K8 | m_CORE2I7_64 | m_GENERIC64 | m_AMDFAM10 | m_BDVER1 | m_BTVER1,
2054 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
2055 than a MOV. */
2056 m_PENT,
2058 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
2059 but one byte longer. */
2060 m_PENT,
2062 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
2063 operand that cannot be represented using a modRM byte. The XOR
2064 replacement is long decoded, so this split helps here as well. */
2065 m_K6,
2067 /* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion
2068 from FP to FP. */
2069 m_AMDFAM10 | m_CORE2I7 | m_GENERIC,
2071 /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
2072 from integer to FP. */
2073 m_AMDFAM10,
2075 /* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction
2076 with a subsequent conditional jump instruction into a single
2077 compare-and-branch uop. */
2078 m_BDVER1,
2080 /* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
2081 will impact LEA instruction selection. */
2082 m_ATOM,
2084 /* X86_TUNE_VECTORIZE_DOUBLE: Enable double precision vector
2085 instructions. */
2086 ~m_ATOM,
2088 /* X86_SOFTARE_PREFETCHING_BENEFICIAL: Enable software prefetching
2089 at -O3. For the moment, the prefetching seems badly tuned for Intel
2090 chips. */
2091 m_K6_GEODE | m_AMD_MULTIPLE
2094 /* Feature tests against the various architecture variations. */
2095 unsigned char ix86_arch_features[X86_ARCH_LAST];
2097 /* Feature tests against the various architecture variations, used to create
2098 ix86_arch_features based on the processor mask. */
2099 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
2100 /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro. */
2101 ~(m_386 | m_486 | m_PENT | m_K6),
2103 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
2104 ~m_386,
2106 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
2107 ~(m_386 | m_486),
2109 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
2110 ~m_386,
2112 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
2113 ~m_386,
2116 static const unsigned int x86_accumulate_outgoing_args
2117 = m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2I7
2118 | m_GENERIC;
2120 static const unsigned int x86_arch_always_fancy_math_387
2121 = m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4
2122 | m_NOCONA | m_CORE2I7 | m_GENERIC;
2124 /* In case the average insn count for single function invocation is
2125 lower than this constant, emit fast (but longer) prologue and
2126 epilogue code. */
2127 #define FAST_PROLOGUE_INSN_COUNT 20
2129 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
2130 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
2131 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
2132 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
2134 /* Array of the smallest class containing reg number REGNO, indexed by
2135 REGNO. Used by REGNO_REG_CLASS in i386.h. */
2137 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
2139 /* ax, dx, cx, bx */
2140 AREG, DREG, CREG, BREG,
2141 /* si, di, bp, sp */
2142 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
2143 /* FP registers */
2144 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
2145 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
2146 /* arg pointer */
2147 NON_Q_REGS,
2148 /* flags, fpsr, fpcr, frame */
2149 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
2150 /* SSE registers */
2151 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2152 SSE_REGS, SSE_REGS,
2153 /* MMX registers */
2154 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
2155 MMX_REGS, MMX_REGS,
2156 /* REX registers */
2157 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2158 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2159 /* SSE REX registers */
2160 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2161 SSE_REGS, SSE_REGS,
2164 /* The "default" register map used in 32bit mode. */
2166 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
2168 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
2169 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
2170 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2171 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
2172 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
2173 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2174 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2177 /* The "default" register map used in 64bit mode. */
2179 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
2181 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
2182 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
2183 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2184 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
2185 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
2186 8,9,10,11,12,13,14,15, /* extended integer registers */
2187 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
2190 /* Define the register numbers to be used in Dwarf debugging information.
2191 The SVR4 reference port C compiler uses the following register numbers
2192 in its Dwarf output code:
2193 0 for %eax (gcc regno = 0)
2194 1 for %ecx (gcc regno = 2)
2195 2 for %edx (gcc regno = 1)
2196 3 for %ebx (gcc regno = 3)
2197 4 for %esp (gcc regno = 7)
2198 5 for %ebp (gcc regno = 6)
2199 6 for %esi (gcc regno = 4)
2200 7 for %edi (gcc regno = 5)
2201 The following three DWARF register numbers are never generated by
2202 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
2203 believes these numbers have these meanings.
2204 8 for %eip (no gcc equivalent)
2205 9 for %eflags (gcc regno = 17)
2206 10 for %trapno (no gcc equivalent)
2207 It is not at all clear how we should number the FP stack registers
2208 for the x86 architecture. If the version of SDB on x86/svr4 were
2209 a bit less brain dead with respect to floating-point then we would
2210 have a precedent to follow with respect to DWARF register numbers
2211 for x86 FP registers, but the SDB on x86/svr4 is so completely
2212 broken with respect to FP registers that it is hardly worth thinking
2213 of it as something to strive for compatibility with.
2214 The version of x86/svr4 SDB I have at the moment does (partially)
2215 seem to believe that DWARF register number 11 is associated with
2216 the x86 register %st(0), but that's about all. Higher DWARF
2217 register numbers don't seem to be associated with anything in
2218 particular, and even for DWARF regno 11, SDB only seems to under-
2219 stand that it should say that a variable lives in %st(0) (when
2220 asked via an `=' command) if we said it was in DWARF regno 11,
2221 but SDB still prints garbage when asked for the value of the
2222 variable in question (via a `/' command).
2223 (Also note that the labels SDB prints for various FP stack regs
2224 when doing an `x' command are all wrong.)
2225 Note that these problems generally don't affect the native SVR4
2226 C compiler because it doesn't allow the use of -O with -g and
2227 because when it is *not* optimizing, it allocates a memory
2228 location for each floating-point variable, and the memory
2229 location is what gets described in the DWARF AT_location
2230 attribute for the variable in question.
2231 Regardless of the severe mental illness of the x86/svr4 SDB, we
2232 do something sensible here and we use the following DWARF
2233 register numbers. Note that these are all stack-top-relative
2234 numbers.
2235 11 for %st(0) (gcc regno = 8)
2236 12 for %st(1) (gcc regno = 9)
2237 13 for %st(2) (gcc regno = 10)
2238 14 for %st(3) (gcc regno = 11)
2239 15 for %st(4) (gcc regno = 12)
2240 16 for %st(5) (gcc regno = 13)
2241 17 for %st(6) (gcc regno = 14)
2242 18 for %st(7) (gcc regno = 15)
2244 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
2246 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
2247 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
2248 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2249 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
2250 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
2251 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2252 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2255 /* Define parameter passing and return registers. */
2257 static int const x86_64_int_parameter_registers[6] =
2259 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
2262 static int const x86_64_ms_abi_int_parameter_registers[4] =
2264 CX_REG, DX_REG, R8_REG, R9_REG
2267 static int const x86_64_int_return_registers[4] =
2269 AX_REG, DX_REG, DI_REG, SI_REG
2272 /* Define the structure for the machine field in struct function. */
2274 struct GTY(()) stack_local_entry {
2275 unsigned short mode;
2276 unsigned short n;
2277 rtx rtl;
2278 struct stack_local_entry *next;
2281 /* Structure describing stack frame layout.
2282 Stack grows downward:
2284 [arguments]
2285 <- ARG_POINTER
2286 saved pc
2288 saved static chain if ix86_static_chain_on_stack
2290 saved frame pointer if frame_pointer_needed
2291 <- HARD_FRAME_POINTER
2292 [saved regs]
2293 <- regs_save_offset
2294 [padding0]
2296 [saved SSE regs]
2297 <- sse_regs_save_offset
2298 [padding1] |
2299 | <- FRAME_POINTER
2300 [va_arg registers] |
2302 [frame] |
2304 [padding2] | = to_allocate
2305 <- STACK_POINTER
2307 struct ix86_frame
2309 int nsseregs;
2310 int nregs;
2311 int va_arg_size;
2312 int red_zone_size;
2313 int outgoing_arguments_size;
2314 HOST_WIDE_INT frame;
2316 /* The offsets relative to ARG_POINTER. */
2317 HOST_WIDE_INT frame_pointer_offset;
2318 HOST_WIDE_INT hard_frame_pointer_offset;
2319 HOST_WIDE_INT stack_pointer_offset;
2320 HOST_WIDE_INT hfp_save_offset;
2321 HOST_WIDE_INT reg_save_offset;
2322 HOST_WIDE_INT sse_reg_save_offset;
2324 /* When save_regs_using_mov is set, emit prologue using
2325 move instead of push instructions. */
2326 bool save_regs_using_mov;
2329 /* Which cpu are we scheduling for. */
2330 enum attr_cpu ix86_schedule;
2332 /* Which cpu are we optimizing for. */
2333 enum processor_type ix86_tune;
2335 /* Which instruction set architecture to use. */
2336 enum processor_type ix86_arch;
2338 /* true if sse prefetch instruction is not NOOP. */
2339 int x86_prefetch_sse;
2341 /* -mstackrealign option */
2342 static const char ix86_force_align_arg_pointer_string[]
2343 = "force_align_arg_pointer";
2345 static rtx (*ix86_gen_leave) (void);
2346 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
2347 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
2348 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
2349 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
2350 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
2351 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
2352 static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx);
2353 static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx);
2354 static rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx);
2356 /* Preferred alignment for stack boundary in bits. */
2357 unsigned int ix86_preferred_stack_boundary;
2359 /* Alignment for incoming stack boundary in bits specified at
2360 command line. */
2361 static unsigned int ix86_user_incoming_stack_boundary;
2363 /* Default alignment for incoming stack boundary in bits. */
2364 static unsigned int ix86_default_incoming_stack_boundary;
2366 /* Alignment for incoming stack boundary in bits. */
2367 unsigned int ix86_incoming_stack_boundary;
2369 /* Calling abi specific va_list type nodes. */
2370 static GTY(()) tree sysv_va_list_type_node;
2371 static GTY(()) tree ms_va_list_type_node;
2373 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
2374 char internal_label_prefix[16];
2375 int internal_label_prefix_len;
2377 /* Fence to use after loop using movnt. */
2378 tree x86_mfence;
2380 /* Register class used for passing given 64bit part of the argument.
2381 These represent classes as documented by the PS ABI, with the exception
2382 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
2383 use SF or DFmode move instead of DImode to avoid reformatting penalties.
2385 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
2386 whenever possible (upper half does contain padding). */
2387 enum x86_64_reg_class
2389 X86_64_NO_CLASS,
2390 X86_64_INTEGER_CLASS,
2391 X86_64_INTEGERSI_CLASS,
2392 X86_64_SSE_CLASS,
2393 X86_64_SSESF_CLASS,
2394 X86_64_SSEDF_CLASS,
2395 X86_64_SSEUP_CLASS,
2396 X86_64_X87_CLASS,
2397 X86_64_X87UP_CLASS,
2398 X86_64_COMPLEX_X87_CLASS,
2399 X86_64_MEMORY_CLASS
2402 #define MAX_CLASSES 4
2404 /* Table of constants used by fldpi, fldln2, etc.... */
2405 static REAL_VALUE_TYPE ext_80387_constants_table [5];
2406 static bool ext_80387_constants_init = 0;
2409 static struct machine_function * ix86_init_machine_status (void);
2410 static rtx ix86_function_value (const_tree, const_tree, bool);
2411 static bool ix86_function_value_regno_p (const unsigned int);
2412 static unsigned int ix86_function_arg_boundary (enum machine_mode,
2413 const_tree);
2414 static rtx ix86_static_chain (const_tree, bool);
2415 static int ix86_function_regparm (const_tree, const_tree);
2416 static void ix86_compute_frame_layout (struct ix86_frame *);
2417 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
2418 rtx, rtx, int);
2419 static void ix86_add_new_builtins (int);
2420 static rtx ix86_expand_vec_perm_builtin (tree);
2421 static tree ix86_canonical_va_list_type (tree);
2422 static void predict_jump (int);
2423 static unsigned int split_stack_prologue_scratch_regno (void);
2424 static bool i386_asm_output_addr_const_extra (FILE *, rtx);
2426 enum ix86_function_specific_strings
2428 IX86_FUNCTION_SPECIFIC_ARCH,
2429 IX86_FUNCTION_SPECIFIC_TUNE,
2430 IX86_FUNCTION_SPECIFIC_MAX
2433 static char *ix86_target_string (int, int, const char *, const char *,
2434 enum fpmath_unit, bool);
2435 static void ix86_debug_options (void) ATTRIBUTE_UNUSED;
2436 static void ix86_function_specific_save (struct cl_target_option *);
2437 static void ix86_function_specific_restore (struct cl_target_option *);
2438 static void ix86_function_specific_print (FILE *, int,
2439 struct cl_target_option *);
2440 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
2441 static bool ix86_valid_target_attribute_inner_p (tree, char *[],
2442 struct gcc_options *);
2443 static bool ix86_can_inline_p (tree, tree);
2444 static void ix86_set_current_function (tree);
2445 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2447 static enum calling_abi ix86_function_abi (const_tree);
2450 #ifndef SUBTARGET32_DEFAULT_CPU
2451 #define SUBTARGET32_DEFAULT_CPU "i386"
2452 #endif
2454 /* The svr4 ABI for the i386 says that records and unions are returned
2455 in memory. */
2456 #ifndef DEFAULT_PCC_STRUCT_RETURN
2457 #define DEFAULT_PCC_STRUCT_RETURN 1
2458 #endif
2460 /* Whether -mtune= or -march= were specified */
2461 static int ix86_tune_defaulted;
2462 static int ix86_arch_specified;
2464 /* Define a set of ISAs which are available when a given ISA is
2465 enabled. MMX and SSE ISAs are handled separately. */
2467 #define OPTION_MASK_ISA_MMX_SET OPTION_MASK_ISA_MMX
2468 #define OPTION_MASK_ISA_3DNOW_SET \
2469 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_MMX_SET)
2471 #define OPTION_MASK_ISA_SSE_SET OPTION_MASK_ISA_SSE
2472 #define OPTION_MASK_ISA_SSE2_SET \
2473 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE_SET)
2474 #define OPTION_MASK_ISA_SSE3_SET \
2475 (OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSE2_SET)
2476 #define OPTION_MASK_ISA_SSSE3_SET \
2477 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE3_SET)
2478 #define OPTION_MASK_ISA_SSE4_1_SET \
2479 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSSE3_SET)
2480 #define OPTION_MASK_ISA_SSE4_2_SET \
2481 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_SSE4_1_SET)
2482 #define OPTION_MASK_ISA_AVX_SET \
2483 (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_SSE4_2_SET)
2484 #define OPTION_MASK_ISA_FMA_SET \
2485 (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_AVX_SET)
2487 /* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
2488 as -msse4.2. */
2489 #define OPTION_MASK_ISA_SSE4_SET OPTION_MASK_ISA_SSE4_2_SET
2491 #define OPTION_MASK_ISA_SSE4A_SET \
2492 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE3_SET)
2493 #define OPTION_MASK_ISA_FMA4_SET \
2494 (OPTION_MASK_ISA_FMA4 | OPTION_MASK_ISA_SSE4A_SET \
2495 | OPTION_MASK_ISA_AVX_SET)
2496 #define OPTION_MASK_ISA_XOP_SET \
2497 (OPTION_MASK_ISA_XOP | OPTION_MASK_ISA_FMA4_SET)
2498 #define OPTION_MASK_ISA_LWP_SET \
2499 OPTION_MASK_ISA_LWP
2501 /* AES and PCLMUL need SSE2 because they use xmm registers */
2502 #define OPTION_MASK_ISA_AES_SET \
2503 (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2_SET)
2504 #define OPTION_MASK_ISA_PCLMUL_SET \
2505 (OPTION_MASK_ISA_PCLMUL | OPTION_MASK_ISA_SSE2_SET)
2507 #define OPTION_MASK_ISA_ABM_SET \
2508 (OPTION_MASK_ISA_ABM | OPTION_MASK_ISA_POPCNT)
2510 #define OPTION_MASK_ISA_BMI_SET OPTION_MASK_ISA_BMI
2511 #define OPTION_MASK_ISA_TBM_SET OPTION_MASK_ISA_TBM
2512 #define OPTION_MASK_ISA_POPCNT_SET OPTION_MASK_ISA_POPCNT
2513 #define OPTION_MASK_ISA_CX16_SET OPTION_MASK_ISA_CX16
2514 #define OPTION_MASK_ISA_SAHF_SET OPTION_MASK_ISA_SAHF
2515 #define OPTION_MASK_ISA_MOVBE_SET OPTION_MASK_ISA_MOVBE
2516 #define OPTION_MASK_ISA_CRC32_SET OPTION_MASK_ISA_CRC32
2518 #define OPTION_MASK_ISA_FSGSBASE_SET OPTION_MASK_ISA_FSGSBASE
2519 #define OPTION_MASK_ISA_RDRND_SET OPTION_MASK_ISA_RDRND
2520 #define OPTION_MASK_ISA_F16C_SET \
2521 (OPTION_MASK_ISA_F16C | OPTION_MASK_ISA_AVX_SET)
2523 /* Define a set of ISAs which aren't available when a given ISA is
2524 disabled. MMX and SSE ISAs are handled separately. */
2526 #define OPTION_MASK_ISA_MMX_UNSET \
2527 (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_3DNOW_UNSET)
2528 #define OPTION_MASK_ISA_3DNOW_UNSET \
2529 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_A_UNSET)
2530 #define OPTION_MASK_ISA_3DNOW_A_UNSET OPTION_MASK_ISA_3DNOW_A
2532 #define OPTION_MASK_ISA_SSE_UNSET \
2533 (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_SSE2_UNSET)
2534 #define OPTION_MASK_ISA_SSE2_UNSET \
2535 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE3_UNSET)
2536 #define OPTION_MASK_ISA_SSE3_UNSET \
2537 (OPTION_MASK_ISA_SSE3 \
2538 | OPTION_MASK_ISA_SSSE3_UNSET \
2539 | OPTION_MASK_ISA_SSE4A_UNSET )
2540 #define OPTION_MASK_ISA_SSSE3_UNSET \
2541 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE4_1_UNSET)
2542 #define OPTION_MASK_ISA_SSE4_1_UNSET \
2543 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_2_UNSET)
2544 #define OPTION_MASK_ISA_SSE4_2_UNSET \
2545 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_AVX_UNSET )
2546 #define OPTION_MASK_ISA_AVX_UNSET \
2547 (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_FMA_UNSET \
2548 | OPTION_MASK_ISA_FMA4_UNSET | OPTION_MASK_ISA_F16C_UNSET)
2549 #define OPTION_MASK_ISA_FMA_UNSET OPTION_MASK_ISA_FMA
2551 /* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same
2552 as -mno-sse4.1. */
2553 #define OPTION_MASK_ISA_SSE4_UNSET OPTION_MASK_ISA_SSE4_1_UNSET
2555 #define OPTION_MASK_ISA_SSE4A_UNSET \
2556 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_FMA4_UNSET)
2558 #define OPTION_MASK_ISA_FMA4_UNSET \
2559 (OPTION_MASK_ISA_FMA4 | OPTION_MASK_ISA_XOP_UNSET)
2560 #define OPTION_MASK_ISA_XOP_UNSET OPTION_MASK_ISA_XOP
2561 #define OPTION_MASK_ISA_LWP_UNSET OPTION_MASK_ISA_LWP
2563 #define OPTION_MASK_ISA_AES_UNSET OPTION_MASK_ISA_AES
2564 #define OPTION_MASK_ISA_PCLMUL_UNSET OPTION_MASK_ISA_PCLMUL
2565 #define OPTION_MASK_ISA_ABM_UNSET OPTION_MASK_ISA_ABM
2566 #define OPTION_MASK_ISA_BMI_UNSET OPTION_MASK_ISA_BMI
2567 #define OPTION_MASK_ISA_TBM_UNSET OPTION_MASK_ISA_TBM
2568 #define OPTION_MASK_ISA_POPCNT_UNSET OPTION_MASK_ISA_POPCNT
2569 #define OPTION_MASK_ISA_CX16_UNSET OPTION_MASK_ISA_CX16
2570 #define OPTION_MASK_ISA_SAHF_UNSET OPTION_MASK_ISA_SAHF
2571 #define OPTION_MASK_ISA_MOVBE_UNSET OPTION_MASK_ISA_MOVBE
2572 #define OPTION_MASK_ISA_CRC32_UNSET OPTION_MASK_ISA_CRC32
2574 #define OPTION_MASK_ISA_FSGSBASE_UNSET OPTION_MASK_ISA_FSGSBASE
2575 #define OPTION_MASK_ISA_RDRND_UNSET OPTION_MASK_ISA_RDRND
2576 #define OPTION_MASK_ISA_F16C_UNSET OPTION_MASK_ISA_F16C
2578 /* Vectorization library interface and handlers. */
2579 static tree (*ix86_veclib_handler) (enum built_in_function, tree, tree);
2581 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
2582 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
2584 /* Processor target table, indexed by processor number */
2585 struct ptt
2587 const struct processor_costs *cost; /* Processor costs */
2588 const int align_loop; /* Default alignments. */
2589 const int align_loop_max_skip;
2590 const int align_jump;
2591 const int align_jump_max_skip;
2592 const int align_func;
2595 static const struct ptt processor_target_table[PROCESSOR_max] =
2597 {&i386_cost, 4, 3, 4, 3, 4},
2598 {&i486_cost, 16, 15, 16, 15, 16},
2599 {&pentium_cost, 16, 7, 16, 7, 16},
2600 {&pentiumpro_cost, 16, 15, 16, 10, 16},
2601 {&geode_cost, 0, 0, 0, 0, 0},
2602 {&k6_cost, 32, 7, 32, 7, 32},
2603 {&athlon_cost, 16, 7, 16, 7, 16},
2604 {&pentium4_cost, 0, 0, 0, 0, 0},
2605 {&k8_cost, 16, 7, 16, 7, 16},
2606 {&nocona_cost, 0, 0, 0, 0, 0},
2607 /* Core 2 32-bit. */
2608 {&generic32_cost, 16, 10, 16, 10, 16},
2609 /* Core 2 64-bit. */
2610 {&generic64_cost, 16, 10, 16, 10, 16},
2611 /* Core i7 32-bit. */
2612 {&generic32_cost, 16, 10, 16, 10, 16},
2613 /* Core i7 64-bit. */
2614 {&generic64_cost, 16, 10, 16, 10, 16},
2615 {&generic32_cost, 16, 7, 16, 7, 16},
2616 {&generic64_cost, 16, 10, 16, 10, 16},
2617 {&amdfam10_cost, 32, 24, 32, 7, 32},
2618 {&bdver1_cost, 32, 24, 32, 7, 32},
2619 {&btver1_cost, 32, 24, 32, 7, 32},
2620 {&atom_cost, 16, 7, 16, 7, 16}
2623 static const char *const cpu_names[TARGET_CPU_DEFAULT_max] =
2625 "generic",
2626 "i386",
2627 "i486",
2628 "pentium",
2629 "pentium-mmx",
2630 "pentiumpro",
2631 "pentium2",
2632 "pentium3",
2633 "pentium4",
2634 "pentium-m",
2635 "prescott",
2636 "nocona",
2637 "core2",
2638 "corei7",
2639 "atom",
2640 "geode",
2641 "k6",
2642 "k6-2",
2643 "k6-3",
2644 "athlon",
2645 "athlon-4",
2646 "k8",
2647 "amdfam10",
2648 "bdver1",
2649 "btver1"
2652 /* Return true if a red-zone is in use. */
2654 static inline bool
2655 ix86_using_red_zone (void)
2657 return TARGET_RED_ZONE && !TARGET_64BIT_MS_ABI;
2660 /* Implement TARGET_HANDLE_OPTION. */
2662 static bool
2663 ix86_handle_option (struct gcc_options *opts,
2664 struct gcc_options *opts_set ATTRIBUTE_UNUSED,
2665 const struct cl_decoded_option *decoded,
2666 location_t loc)
2668 size_t code = decoded->opt_index;
2669 int value = decoded->value;
2671 switch (code)
2673 case OPT_mmmx:
2674 if (value)
2676 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MMX_SET;
2677 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_SET;
2679 else
2681 opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_MMX_UNSET;
2682 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_UNSET;
2684 return true;
2686 case OPT_m3dnow:
2687 if (value)
2689 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_SET;
2690 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_SET;
2692 else
2694 opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_3DNOW_UNSET;
2695 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_UNSET;
2697 return true;
2699 case OPT_m3dnowa:
2700 return false;
2702 case OPT_msse:
2703 if (value)
2705 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE_SET;
2706 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_SET;
2708 else
2710 opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_SSE_UNSET;
2711 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_UNSET;
2713 return true;
2715 case OPT_msse2:
2716 if (value)
2718 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE2_SET;
2719 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_SET;
2721 else
2723 opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_SSE2_UNSET;
2724 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_UNSET;
2726 return true;
2728 case OPT_msse3:
2729 if (value)
2731 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE3_SET;
2732 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_SET;
2734 else
2736 opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_SSE3_UNSET;
2737 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_UNSET;
2739 return true;
2741 case OPT_mssse3:
2742 if (value)
2744 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSSE3_SET;
2745 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_SET;
2747 else
2749 opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_SSSE3_UNSET;
2750 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_UNSET;
2752 return true;
2754 case OPT_msse4_1:
2755 if (value)
2757 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1_SET;
2758 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_SET;
2760 else
2762 opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_1_UNSET;
2763 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_UNSET;
2765 return true;
2767 case OPT_msse4_2:
2768 if (value)
2770 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2_SET;
2771 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_SET;
2773 else
2775 opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_2_UNSET;
2776 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_UNSET;
2778 return true;
2780 case OPT_mavx:
2781 if (value)
2783 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX_SET;
2784 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_SET;
2786 else
2788 opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_AVX_UNSET;
2789 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_UNSET;
2791 return true;
2793 case OPT_mfma:
2794 if (value)
2796 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA_SET;
2797 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_SET;
2799 else
2801 opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_FMA_UNSET;
2802 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_UNSET;
2804 return true;
2806 case OPT_msse4:
2807 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_SET;
2808 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_SET;
2809 return true;
2811 case OPT_mno_sse4:
2812 opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_UNSET;
2813 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_UNSET;
2814 return true;
2816 case OPT_msse4a:
2817 if (value)
2819 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4A_SET;
2820 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_SET;
2822 else
2824 opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4A_UNSET;
2825 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_UNSET;
2827 return true;
2829 case OPT_mfma4:
2830 if (value)
2832 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA4_SET;
2833 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA4_SET;
2835 else
2837 opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_FMA4_UNSET;
2838 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA4_UNSET;
2840 return true;
2842 case OPT_mxop:
2843 if (value)
2845 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XOP_SET;
2846 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_XOP_SET;
2848 else
2850 opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_XOP_UNSET;
2851 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_XOP_UNSET;
2853 return true;
2855 case OPT_mlwp:
2856 if (value)
2858 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LWP_SET;
2859 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_LWP_SET;
2861 else
2863 opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_LWP_UNSET;
2864 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_LWP_UNSET;
2866 return true;
2868 case OPT_mabm:
2869 if (value)
2871 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ABM_SET;
2872 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_SET;
2874 else
2876 opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_ABM_UNSET;
2877 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_UNSET;
2879 return true;
2881 case OPT_mbmi:
2882 if (value)
2884 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI_SET;
2885 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_BMI_SET;
2887 else
2889 opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_BMI_UNSET;
2890 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_BMI_UNSET;
2892 return true;
2894 case OPT_mtbm:
2895 if (value)
2897 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_TBM_SET;
2898 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_TBM_SET;
2900 else
2902 opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_TBM_UNSET;
2903 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_TBM_UNSET;
2905 return true;
2907 case OPT_mpopcnt:
2908 if (value)
2910 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_POPCNT_SET;
2911 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_SET;
2913 else
2915 opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_POPCNT_UNSET;
2916 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_UNSET;
2918 return true;
2920 case OPT_msahf:
2921 if (value)
2923 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SAHF_SET;
2924 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_SET;
2926 else
2928 opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_SAHF_UNSET;
2929 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_UNSET;
2931 return true;
2933 case OPT_mcx16:
2934 if (value)
2936 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CX16_SET;
2937 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_SET;
2939 else
2941 opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_CX16_UNSET;
2942 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_UNSET;
2944 return true;
2946 case OPT_mmovbe:
2947 if (value)
2949 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MOVBE_SET;
2950 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_MOVBE_SET;
2952 else
2954 opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_MOVBE_UNSET;
2955 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_MOVBE_UNSET;
2957 return true;
2959 case OPT_mcrc32:
2960 if (value)
2962 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CRC32_SET;
2963 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_CRC32_SET;
2965 else
2967 opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_CRC32_UNSET;
2968 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_CRC32_UNSET;
2970 return true;
2972 case OPT_maes:
2973 if (value)
2975 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AES_SET;
2976 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_SET;
2978 else
2980 opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_AES_UNSET;
2981 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_UNSET;
2983 return true;
2985 case OPT_mpclmul:
2986 if (value)
2988 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL_SET;
2989 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_SET;
2991 else
2993 opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_PCLMUL_UNSET;
2994 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_UNSET;
2996 return true;
2998 case OPT_mfsgsbase:
2999 if (value)
3001 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE_SET;
3002 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_FSGSBASE_SET;
3004 else
3006 opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_FSGSBASE_UNSET;
3007 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_FSGSBASE_UNSET;
3009 return true;
3011 case OPT_mrdrnd:
3012 if (value)
3014 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDRND_SET;
3015 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_RDRND_SET;
3017 else
3019 opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_RDRND_UNSET;
3020 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_RDRND_UNSET;
3022 return true;
3024 case OPT_mf16c:
3025 if (value)
3027 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_F16C_SET;
3028 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_F16C_SET;
3030 else
3032 opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_F16C_UNSET;
3033 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_F16C_UNSET;
3035 return true;
3037 /* Comes from final.c -- no real reason to change it. */
3038 #define MAX_CODE_ALIGN 16
3040 case OPT_malign_loops_:
3041 warning_at (loc, 0, "-malign-loops is obsolete, use -falign-loops");
3042 if (value > MAX_CODE_ALIGN)
3043 error_at (loc, "-malign-loops=%d is not between 0 and %d",
3044 value, MAX_CODE_ALIGN);
3045 else
3046 opts->x_align_loops = 1 << value;
3047 return true;
3049 case OPT_malign_jumps_:
3050 warning_at (loc, 0, "-malign-jumps is obsolete, use -falign-jumps");
3051 if (value > MAX_CODE_ALIGN)
3052 error_at (loc, "-malign-jumps=%d is not between 0 and %d",
3053 value, MAX_CODE_ALIGN);
3054 else
3055 opts->x_align_jumps = 1 << value;
3056 return true;
3058 case OPT_malign_functions_:
3059 warning_at (loc, 0,
3060 "-malign-functions is obsolete, use -falign-functions");
3061 if (value > MAX_CODE_ALIGN)
3062 error_at (loc, "-malign-functions=%d is not between 0 and %d",
3063 value, MAX_CODE_ALIGN);
3064 else
3065 opts->x_align_functions = 1 << value;
3066 return true;
3068 case OPT_mbranch_cost_:
3069 if (value > 5)
3071 error_at (loc, "-mbranch-cost=%d is not between 0 and 5", value);
3072 opts->x_ix86_branch_cost = 5;
3074 return true;
3076 default:
3077 return true;
3081 /* Return a string that documents the current -m options. The caller is
3082 responsible for freeing the string. */
3084 static char *
3085 ix86_target_string (int isa, int flags, const char *arch, const char *tune,
3086 enum fpmath_unit fpmath, bool add_nl_p)
3088 struct ix86_target_opts
3090 const char *option; /* option string */
3091 int mask; /* isa mask options */
3094 /* This table is ordered so that options like -msse4.2 that imply
3095 preceding options while match those first. */
3096 static struct ix86_target_opts isa_opts[] =
3098 { "-m64", OPTION_MASK_ISA_64BIT },
3099 { "-mfma4", OPTION_MASK_ISA_FMA4 },
3100 { "-mfma", OPTION_MASK_ISA_FMA },
3101 { "-mxop", OPTION_MASK_ISA_XOP },
3102 { "-mlwp", OPTION_MASK_ISA_LWP },
3103 { "-msse4a", OPTION_MASK_ISA_SSE4A },
3104 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
3105 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
3106 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
3107 { "-msse3", OPTION_MASK_ISA_SSE3 },
3108 { "-msse2", OPTION_MASK_ISA_SSE2 },
3109 { "-msse", OPTION_MASK_ISA_SSE },
3110 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
3111 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
3112 { "-mmmx", OPTION_MASK_ISA_MMX },
3113 { "-mabm", OPTION_MASK_ISA_ABM },
3114 { "-mbmi", OPTION_MASK_ISA_BMI },
3115 { "-mtbm", OPTION_MASK_ISA_TBM },
3116 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
3117 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
3118 { "-mcrc32", OPTION_MASK_ISA_CRC32 },
3119 { "-maes", OPTION_MASK_ISA_AES },
3120 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
3121 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE },
3122 { "-mrdrnd", OPTION_MASK_ISA_RDRND },
3123 { "-mf16c", OPTION_MASK_ISA_F16C },
3126 /* Flag options. */
3127 static struct ix86_target_opts flag_opts[] =
3129 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
3130 { "-m80387", MASK_80387 },
3131 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
3132 { "-malign-double", MASK_ALIGN_DOUBLE },
3133 { "-mcld", MASK_CLD },
3134 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
3135 { "-mieee-fp", MASK_IEEE_FP },
3136 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
3137 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
3138 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
3139 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
3140 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
3141 { "-mno-push-args", MASK_NO_PUSH_ARGS },
3142 { "-mno-red-zone", MASK_NO_RED_ZONE },
3143 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
3144 { "-mrecip", MASK_RECIP },
3145 { "-mrtd", MASK_RTD },
3146 { "-msseregparm", MASK_SSEREGPARM },
3147 { "-mstack-arg-probe", MASK_STACK_PROBE },
3148 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
3149 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS },
3150 { "-m8bit-idiv", MASK_USE_8BIT_IDIV },
3151 { "-mvzeroupper", MASK_VZEROUPPER },
3152 { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD},
3153 { "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE},
3156 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
3158 char isa_other[40];
3159 char target_other[40];
3160 unsigned num = 0;
3161 unsigned i, j;
3162 char *ret;
3163 char *ptr;
3164 size_t len;
3165 size_t line_len;
3166 size_t sep_len;
3168 memset (opts, '\0', sizeof (opts));
3170 /* Add -march= option. */
3171 if (arch)
3173 opts[num][0] = "-march=";
3174 opts[num++][1] = arch;
3177 /* Add -mtune= option. */
3178 if (tune)
3180 opts[num][0] = "-mtune=";
3181 opts[num++][1] = tune;
3184 /* Pick out the options in isa options. */
3185 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
3187 if ((isa & isa_opts[i].mask) != 0)
3189 opts[num++][0] = isa_opts[i].option;
3190 isa &= ~ isa_opts[i].mask;
3194 if (isa && add_nl_p)
3196 opts[num++][0] = isa_other;
3197 sprintf (isa_other, "(other isa: %#x)", isa);
3200 /* Add flag options. */
3201 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
3203 if ((flags & flag_opts[i].mask) != 0)
3205 opts[num++][0] = flag_opts[i].option;
3206 flags &= ~ flag_opts[i].mask;
3210 if (flags && add_nl_p)
3212 opts[num++][0] = target_other;
3213 sprintf (target_other, "(other flags: %#x)", flags);
3216 /* Add -fpmath= option. */
3217 if (fpmath)
3219 opts[num][0] = "-mfpmath=";
3220 switch ((int) fpmath)
3222 case FPMATH_387:
3223 opts[num++][1] = "387";
3224 break;
3226 case FPMATH_SSE:
3227 opts[num++][1] = "sse";
3228 break;
3230 case FPMATH_387 | FPMATH_SSE:
3231 opts[num++][1] = "sse+387";
3232 break;
3234 default:
3235 gcc_unreachable ();
3239 /* Any options? */
3240 if (num == 0)
3241 return NULL;
3243 gcc_assert (num < ARRAY_SIZE (opts));
3245 /* Size the string. */
3246 len = 0;
3247 sep_len = (add_nl_p) ? 3 : 1;
3248 for (i = 0; i < num; i++)
3250 len += sep_len;
3251 for (j = 0; j < 2; j++)
3252 if (opts[i][j])
3253 len += strlen (opts[i][j]);
3256 /* Build the string. */
3257 ret = ptr = (char *) xmalloc (len);
3258 line_len = 0;
3260 for (i = 0; i < num; i++)
3262 size_t len2[2];
3264 for (j = 0; j < 2; j++)
3265 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
3267 if (i != 0)
3269 *ptr++ = ' ';
3270 line_len++;
3272 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
3274 *ptr++ = '\\';
3275 *ptr++ = '\n';
3276 line_len = 0;
3280 for (j = 0; j < 2; j++)
3281 if (opts[i][j])
3283 memcpy (ptr, opts[i][j], len2[j]);
3284 ptr += len2[j];
3285 line_len += len2[j];
3289 *ptr = '\0';
3290 gcc_assert (ret + len >= ptr);
3292 return ret;
3295 /* Return true, if profiling code should be emitted before
3296 prologue. Otherwise it returns false.
3297 Note: For x86 with "hotfix" it is sorried. */
3298 static bool
3299 ix86_profile_before_prologue (void)
3301 return flag_fentry != 0;
3304 /* Function that is callable from the debugger to print the current
3305 options. */
3306 void
3307 ix86_debug_options (void)
3309 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
3310 ix86_arch_string, ix86_tune_string,
3311 ix86_fpmath, true);
3313 if (opts)
3315 fprintf (stderr, "%s\n\n", opts);
3316 free (opts);
3318 else
3319 fputs ("<no options>\n\n", stderr);
3321 return;
3324 /* Override various settings based on options. If MAIN_ARGS_P, the
3325 options are from the command line, otherwise they are from
3326 attributes. */
3328 static void
3329 ix86_option_override_internal (bool main_args_p)
3331 int i;
3332 unsigned int ix86_arch_mask, ix86_tune_mask;
3333 const bool ix86_tune_specified = (ix86_tune_string != NULL);
3334 const char *prefix;
3335 const char *suffix;
3336 const char *sw;
3338 enum pta_flags
3340 PTA_SSE = 1 << 0,
3341 PTA_SSE2 = 1 << 1,
3342 PTA_SSE3 = 1 << 2,
3343 PTA_MMX = 1 << 3,
3344 PTA_PREFETCH_SSE = 1 << 4,
3345 PTA_3DNOW = 1 << 5,
3346 PTA_3DNOW_A = 1 << 6,
3347 PTA_64BIT = 1 << 7,
3348 PTA_SSSE3 = 1 << 8,
3349 PTA_CX16 = 1 << 9,
3350 PTA_POPCNT = 1 << 10,
3351 PTA_ABM = 1 << 11,
3352 PTA_SSE4A = 1 << 12,
3353 PTA_NO_SAHF = 1 << 13,
3354 PTA_SSE4_1 = 1 << 14,
3355 PTA_SSE4_2 = 1 << 15,
3356 PTA_AES = 1 << 16,
3357 PTA_PCLMUL = 1 << 17,
3358 PTA_AVX = 1 << 18,
3359 PTA_FMA = 1 << 19,
3360 PTA_MOVBE = 1 << 20,
3361 PTA_FMA4 = 1 << 21,
3362 PTA_XOP = 1 << 22,
3363 PTA_LWP = 1 << 23,
3364 PTA_FSGSBASE = 1 << 24,
3365 PTA_RDRND = 1 << 25,
3366 PTA_F16C = 1 << 26,
3367 PTA_BMI = 1 << 27,
3368 PTA_TBM = 1 << 28
3369 /* if this reaches 32, need to widen struct pta flags below */
3372 static struct pta
3374 const char *const name; /* processor name or nickname. */
3375 const enum processor_type processor;
3376 const enum attr_cpu schedule;
3377 const unsigned /*enum pta_flags*/ flags;
3379 const processor_alias_table[] =
3381 {"i386", PROCESSOR_I386, CPU_NONE, 0},
3382 {"i486", PROCESSOR_I486, CPU_NONE, 0},
3383 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3384 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3385 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
3386 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
3387 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
3388 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
3389 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_SSE},
3390 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3391 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3392 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX},
3393 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3394 PTA_MMX | PTA_SSE},
3395 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3396 PTA_MMX | PTA_SSE},
3397 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3398 PTA_MMX | PTA_SSE | PTA_SSE2},
3399 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
3400 PTA_MMX |PTA_SSE | PTA_SSE2},
3401 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
3402 PTA_MMX | PTA_SSE | PTA_SSE2},
3403 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
3404 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3},
3405 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
3406 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3407 | PTA_CX16 | PTA_NO_SAHF},
3408 {"core2", PROCESSOR_CORE2_64, CPU_CORE2,
3409 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3410 | PTA_SSSE3 | PTA_CX16},
3411 {"corei7", PROCESSOR_COREI7_64, CPU_COREI7,
3412 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3413 | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_CX16},
3414 {"corei7-avx", PROCESSOR_COREI7_64, CPU_COREI7,
3415 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3416 | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AVX
3417 | PTA_CX16 | PTA_POPCNT | PTA_AES | PTA_PCLMUL},
3418 {"atom", PROCESSOR_ATOM, CPU_ATOM,
3419 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3420 | PTA_SSSE3 | PTA_CX16 | PTA_MOVBE},
3421 {"geode", PROCESSOR_GEODE, CPU_GEODE,
3422 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A |PTA_PREFETCH_SSE},
3423 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
3424 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
3425 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
3426 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
3427 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
3428 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
3429 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
3430 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
3431 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
3432 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
3433 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
3434 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
3435 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
3436 {"x86-64", PROCESSOR_K8, CPU_K8,
3437 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF},
3438 {"k8", PROCESSOR_K8, CPU_K8,
3439 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3440 | PTA_SSE2 | PTA_NO_SAHF},
3441 {"k8-sse3", PROCESSOR_K8, CPU_K8,
3442 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3443 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
3444 {"opteron", PROCESSOR_K8, CPU_K8,
3445 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3446 | PTA_SSE2 | PTA_NO_SAHF},
3447 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
3448 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3449 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
3450 {"athlon64", PROCESSOR_K8, CPU_K8,
3451 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3452 | PTA_SSE2 | PTA_NO_SAHF},
3453 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
3454 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3455 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
3456 {"athlon-fx", PROCESSOR_K8, CPU_K8,
3457 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3458 | PTA_SSE2 | PTA_NO_SAHF},
3459 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3460 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3461 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
3462 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3463 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3464 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
3465 {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
3466 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3467 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3468 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3469 | PTA_XOP | PTA_LWP},
3470 {"btver1", PROCESSOR_BTVER1, CPU_GENERIC64,
3471 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3472 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16},
3473 {"generic32", PROCESSOR_GENERIC32, CPU_PENTIUMPRO,
3474 0 /* flags are only used for -march switch. */ },
3475 {"generic64", PROCESSOR_GENERIC64, CPU_GENERIC64,
3476 PTA_64BIT /* flags are only used for -march switch. */ },
3479 int const pta_size = ARRAY_SIZE (processor_alias_table);
3481 /* Set up prefix/suffix so the error messages refer to either the command
3482 line argument, or the attribute(target). */
3483 if (main_args_p)
3485 prefix = "-m";
3486 suffix = "";
3487 sw = "switch";
3489 else
3491 prefix = "option(\"";
3492 suffix = "\")";
3493 sw = "attribute";
3496 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3497 SUBTARGET_OVERRIDE_OPTIONS;
3498 #endif
3500 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3501 SUBSUBTARGET_OVERRIDE_OPTIONS;
3502 #endif
3504 /* -fPIC is the default for x86_64. */
3505 if (TARGET_MACHO && TARGET_64BIT)
3506 flag_pic = 2;
3508 /* Need to check -mtune=generic first. */
3509 if (ix86_tune_string)
3511 if (!strcmp (ix86_tune_string, "generic")
3512 || !strcmp (ix86_tune_string, "i686")
3513 /* As special support for cross compilers we read -mtune=native
3514 as -mtune=generic. With native compilers we won't see the
3515 -mtune=native, as it was changed by the driver. */
3516 || !strcmp (ix86_tune_string, "native"))
3518 if (TARGET_64BIT)
3519 ix86_tune_string = "generic64";
3520 else
3521 ix86_tune_string = "generic32";
3523 /* If this call is for setting the option attribute, allow the
3524 generic32/generic64 that was previously set. */
3525 else if (!main_args_p
3526 && (!strcmp (ix86_tune_string, "generic32")
3527 || !strcmp (ix86_tune_string, "generic64")))
3529 else if (!strncmp (ix86_tune_string, "generic", 7))
3530 error ("bad value (%s) for %stune=%s %s",
3531 ix86_tune_string, prefix, suffix, sw);
3532 else if (!strcmp (ix86_tune_string, "x86-64"))
3533 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated; use "
3534 "%stune=k8%s or %stune=generic%s instead as appropriate",
3535 prefix, suffix, prefix, suffix, prefix, suffix);
3537 else
3539 if (ix86_arch_string)
3540 ix86_tune_string = ix86_arch_string;
3541 if (!ix86_tune_string)
3543 ix86_tune_string = cpu_names[TARGET_CPU_DEFAULT];
3544 ix86_tune_defaulted = 1;
3547 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
3548 need to use a sensible tune option. */
3549 if (!strcmp (ix86_tune_string, "generic")
3550 || !strcmp (ix86_tune_string, "x86-64")
3551 || !strcmp (ix86_tune_string, "i686"))
3553 if (TARGET_64BIT)
3554 ix86_tune_string = "generic64";
3555 else
3556 ix86_tune_string = "generic32";
3560 if (ix86_stringop_alg == rep_prefix_8_byte && !TARGET_64BIT)
3562 /* rep; movq isn't available in 32-bit code. */
3563 error ("-mstringop-strategy=rep_8byte not supported for 32-bit code");
3564 ix86_stringop_alg = no_stringop;
3567 if (!ix86_arch_string)
3568 ix86_arch_string = TARGET_64BIT ? "x86-64" : SUBTARGET32_DEFAULT_CPU;
3569 else
3570 ix86_arch_specified = 1;
3572 if (!global_options_set.x_ix86_abi)
3573 ix86_abi = DEFAULT_ABI;
3575 if (global_options_set.x_ix86_cmodel)
3577 switch (ix86_cmodel)
3579 case CM_SMALL:
3580 case CM_SMALL_PIC:
3581 if (flag_pic)
3582 ix86_cmodel = CM_SMALL_PIC;
3583 if (!TARGET_64BIT)
3584 error ("code model %qs not supported in the %s bit mode",
3585 "small", "32");
3586 break;
3588 case CM_MEDIUM:
3589 case CM_MEDIUM_PIC:
3590 if (flag_pic)
3591 ix86_cmodel = CM_MEDIUM_PIC;
3592 if (!TARGET_64BIT)
3593 error ("code model %qs not supported in the %s bit mode",
3594 "medium", "32");
3595 break;
3597 case CM_LARGE:
3598 case CM_LARGE_PIC:
3599 if (flag_pic)
3600 ix86_cmodel = CM_LARGE_PIC;
3601 if (!TARGET_64BIT)
3602 error ("code model %qs not supported in the %s bit mode",
3603 "large", "32");
3604 break;
3606 case CM_32:
3607 if (flag_pic)
3608 error ("code model %s does not support PIC mode", "32");
3609 if (TARGET_64BIT)
3610 error ("code model %qs not supported in the %s bit mode",
3611 "32", "64");
3612 break;
3614 case CM_KERNEL:
3615 if (flag_pic)
3617 error ("code model %s does not support PIC mode", "kernel");
3618 ix86_cmodel = CM_32;
3620 if (!TARGET_64BIT)
3621 error ("code model %qs not supported in the %s bit mode",
3622 "kernel", "32");
3623 break;
3625 default:
3626 gcc_unreachable ();
3629 else
3631 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3632 use of rip-relative addressing. This eliminates fixups that
3633 would otherwise be needed if this object is to be placed in a
3634 DLL, and is essentially just as efficient as direct addressing. */
3635 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
3636 ix86_cmodel = CM_SMALL_PIC, flag_pic = 1;
3637 else if (TARGET_64BIT)
3638 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
3639 else
3640 ix86_cmodel = CM_32;
3642 if (TARGET_MACHO && ix86_asm_dialect == ASM_INTEL)
3644 error ("-masm=intel not supported in this configuration");
3645 ix86_asm_dialect = ASM_ATT;
3647 if ((TARGET_64BIT != 0) != ((ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
3648 sorry ("%i-bit mode not compiled in",
3649 (ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
3651 for (i = 0; i < pta_size; i++)
3652 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
3654 ix86_schedule = processor_alias_table[i].schedule;
3655 ix86_arch = processor_alias_table[i].processor;
3656 /* Default cpu tuning to the architecture. */
3657 ix86_tune = ix86_arch;
3659 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
3660 error ("CPU you selected does not support x86-64 "
3661 "instruction set");
3663 if (processor_alias_table[i].flags & PTA_MMX
3664 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
3665 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
3666 if (processor_alias_table[i].flags & PTA_3DNOW
3667 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
3668 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
3669 if (processor_alias_table[i].flags & PTA_3DNOW_A
3670 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
3671 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
3672 if (processor_alias_table[i].flags & PTA_SSE
3673 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
3674 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
3675 if (processor_alias_table[i].flags & PTA_SSE2
3676 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
3677 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
3678 if (processor_alias_table[i].flags & PTA_SSE3
3679 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
3680 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
3681 if (processor_alias_table[i].flags & PTA_SSSE3
3682 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
3683 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
3684 if (processor_alias_table[i].flags & PTA_SSE4_1
3685 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
3686 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
3687 if (processor_alias_table[i].flags & PTA_SSE4_2
3688 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
3689 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
3690 if (processor_alias_table[i].flags & PTA_AVX
3691 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
3692 ix86_isa_flags |= OPTION_MASK_ISA_AVX;
3693 if (processor_alias_table[i].flags & PTA_FMA
3694 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
3695 ix86_isa_flags |= OPTION_MASK_ISA_FMA;
3696 if (processor_alias_table[i].flags & PTA_SSE4A
3697 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
3698 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
3699 if (processor_alias_table[i].flags & PTA_FMA4
3700 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
3701 ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
3702 if (processor_alias_table[i].flags & PTA_XOP
3703 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
3704 ix86_isa_flags |= OPTION_MASK_ISA_XOP;
3705 if (processor_alias_table[i].flags & PTA_LWP
3706 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
3707 ix86_isa_flags |= OPTION_MASK_ISA_LWP;
3708 if (processor_alias_table[i].flags & PTA_ABM
3709 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
3710 ix86_isa_flags |= OPTION_MASK_ISA_ABM;
3711 if (processor_alias_table[i].flags & PTA_BMI
3712 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI))
3713 ix86_isa_flags |= OPTION_MASK_ISA_BMI;
3714 if (processor_alias_table[i].flags & PTA_TBM
3715 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_TBM))
3716 ix86_isa_flags |= OPTION_MASK_ISA_TBM;
3717 if (processor_alias_table[i].flags & PTA_CX16
3718 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
3719 ix86_isa_flags |= OPTION_MASK_ISA_CX16;
3720 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
3721 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
3722 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
3723 if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF))
3724 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
3725 ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
3726 if (processor_alias_table[i].flags & PTA_MOVBE
3727 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
3728 ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
3729 if (processor_alias_table[i].flags & PTA_AES
3730 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
3731 ix86_isa_flags |= OPTION_MASK_ISA_AES;
3732 if (processor_alias_table[i].flags & PTA_PCLMUL
3733 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
3734 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
3735 if (processor_alias_table[i].flags & PTA_FSGSBASE
3736 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE))
3737 ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE;
3738 if (processor_alias_table[i].flags & PTA_RDRND
3739 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND))
3740 ix86_isa_flags |= OPTION_MASK_ISA_RDRND;
3741 if (processor_alias_table[i].flags & PTA_F16C
3742 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C))
3743 ix86_isa_flags |= OPTION_MASK_ISA_F16C;
3744 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
3745 x86_prefetch_sse = true;
3747 break;
3750 if (!strcmp (ix86_arch_string, "generic"))
3751 error ("generic CPU can be used only for %stune=%s %s",
3752 prefix, suffix, sw);
3753 else if (!strncmp (ix86_arch_string, "generic", 7) || i == pta_size)
3754 error ("bad value (%s) for %sarch=%s %s",
3755 ix86_arch_string, prefix, suffix, sw);
3757 ix86_arch_mask = 1u << ix86_arch;
3758 for (i = 0; i < X86_ARCH_LAST; ++i)
3759 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3761 for (i = 0; i < pta_size; i++)
3762 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
3764 ix86_schedule = processor_alias_table[i].schedule;
3765 ix86_tune = processor_alias_table[i].processor;
3766 if (TARGET_64BIT)
3768 if (!(processor_alias_table[i].flags & PTA_64BIT))
3770 if (ix86_tune_defaulted)
3772 ix86_tune_string = "x86-64";
3773 for (i = 0; i < pta_size; i++)
3774 if (! strcmp (ix86_tune_string,
3775 processor_alias_table[i].name))
3776 break;
3777 ix86_schedule = processor_alias_table[i].schedule;
3778 ix86_tune = processor_alias_table[i].processor;
3780 else
3781 error ("CPU you selected does not support x86-64 "
3782 "instruction set");
3785 else
3787 /* Adjust tuning when compiling for 32-bit ABI. */
3788 switch (ix86_tune)
3790 case PROCESSOR_GENERIC64:
3791 ix86_tune = PROCESSOR_GENERIC32;
3792 ix86_schedule = CPU_PENTIUMPRO;
3793 break;
3795 case PROCESSOR_CORE2_64:
3796 ix86_tune = PROCESSOR_CORE2_32;
3797 break;
3799 case PROCESSOR_COREI7_64:
3800 ix86_tune = PROCESSOR_COREI7_32;
3801 break;
3803 default:
3804 break;
3807 /* Intel CPUs have always interpreted SSE prefetch instructions as
3808 NOPs; so, we can enable SSE prefetch instructions even when
3809 -mtune (rather than -march) points us to a processor that has them.
3810 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3811 higher processors. */
3812 if (TARGET_CMOVE
3813 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
3814 x86_prefetch_sse = true;
3815 break;
3818 if (ix86_tune_specified && i == pta_size)
3819 error ("bad value (%s) for %stune=%s %s",
3820 ix86_tune_string, prefix, suffix, sw);
3822 ix86_tune_mask = 1u << ix86_tune;
3823 for (i = 0; i < X86_TUNE_LAST; ++i)
3824 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3826 #ifndef USE_IX86_FRAME_POINTER
3827 #define USE_IX86_FRAME_POINTER 0
3828 #endif
3830 #ifndef USE_X86_64_FRAME_POINTER
3831 #define USE_X86_64_FRAME_POINTER 0
3832 #endif
3834 /* Set the default values for switches whose default depends on TARGET_64BIT
3835 in case they weren't overwritten by command line options. */
3836 if (TARGET_64BIT)
3838 if (optimize > 1 && !global_options_set.x_flag_zee)
3839 flag_zee = 1;
3840 if (optimize >= 1 && !global_options_set.x_flag_omit_frame_pointer)
3841 flag_omit_frame_pointer = !USE_X86_64_FRAME_POINTER;
3842 if (flag_asynchronous_unwind_tables == 2)
3843 flag_unwind_tables = flag_asynchronous_unwind_tables = 1;
3844 if (flag_pcc_struct_return == 2)
3845 flag_pcc_struct_return = 0;
3847 else
3849 if (optimize >= 1 && !global_options_set.x_flag_omit_frame_pointer)
3850 flag_omit_frame_pointer = !(USE_IX86_FRAME_POINTER || optimize_size);
3851 if (flag_asynchronous_unwind_tables == 2)
3852 flag_asynchronous_unwind_tables = !USE_IX86_FRAME_POINTER;
3853 if (flag_pcc_struct_return == 2)
3854 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
3857 if (optimize_size)
3858 ix86_cost = &ix86_size_cost;
3859 else
3860 ix86_cost = processor_target_table[ix86_tune].cost;
3862 /* Arrange to set up i386_stack_locals for all functions. */
3863 init_machine_status = ix86_init_machine_status;
3865 /* Validate -mregparm= value. */
3866 if (global_options_set.x_ix86_regparm)
3868 if (TARGET_64BIT)
3869 warning (0, "-mregparm is ignored in 64-bit mode");
3870 if (ix86_regparm > REGPARM_MAX)
3872 error ("-mregparm=%d is not between 0 and %d",
3873 ix86_regparm, REGPARM_MAX);
3874 ix86_regparm = 0;
3877 if (TARGET_64BIT)
3878 ix86_regparm = REGPARM_MAX;
3880 /* Default align_* from the processor table. */
3881 if (align_loops == 0)
3883 align_loops = processor_target_table[ix86_tune].align_loop;
3884 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3886 if (align_jumps == 0)
3888 align_jumps = processor_target_table[ix86_tune].align_jump;
3889 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3891 if (align_functions == 0)
3893 align_functions = processor_target_table[ix86_tune].align_func;
3896 /* Provide default for -mbranch-cost= value. */
3897 if (!global_options_set.x_ix86_branch_cost)
3898 ix86_branch_cost = ix86_cost->branch_cost;
3900 if (TARGET_64BIT)
3902 target_flags |= TARGET_SUBTARGET64_DEFAULT & ~target_flags_explicit;
3904 /* Enable by default the SSE and MMX builtins. Do allow the user to
3905 explicitly disable any of these. In particular, disabling SSE and
3906 MMX for kernel code is extremely useful. */
3907 if (!ix86_arch_specified)
3908 ix86_isa_flags
3909 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3910 | TARGET_SUBTARGET64_ISA_DEFAULT) & ~ix86_isa_flags_explicit);
3912 if (TARGET_RTD)
3913 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3915 else
3917 target_flags |= TARGET_SUBTARGET32_DEFAULT & ~target_flags_explicit;
3919 if (!ix86_arch_specified)
3920 ix86_isa_flags
3921 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~ix86_isa_flags_explicit;
3923 /* i386 ABI does not specify red zone. It still makes sense to use it
3924 when programmer takes care to stack from being destroyed. */
3925 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
3926 target_flags |= MASK_NO_RED_ZONE;
3929 /* Keep nonleaf frame pointers. */
3930 if (flag_omit_frame_pointer)
3931 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3932 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
3933 flag_omit_frame_pointer = 1;
3935 /* If we're doing fast math, we don't care about comparison order
3936 wrt NaNs. This lets us use a shorter comparison sequence. */
3937 if (flag_finite_math_only)
3938 target_flags &= ~MASK_IEEE_FP;
3940 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3941 since the insns won't need emulation. */
3942 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
3943 target_flags &= ~MASK_NO_FANCY_MATH_387;
3945 /* Likewise, if the target doesn't have a 387, or we've specified
3946 software floating point, don't use 387 inline intrinsics. */
3947 if (!TARGET_80387)
3948 target_flags |= MASK_NO_FANCY_MATH_387;
3950 /* Turn on MMX builtins for -msse. */
3951 if (TARGET_SSE)
3953 ix86_isa_flags |= OPTION_MASK_ISA_MMX & ~ix86_isa_flags_explicit;
3954 x86_prefetch_sse = true;
3957 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
3958 if (TARGET_SSE4_2 || TARGET_ABM)
3959 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT & ~ix86_isa_flags_explicit;
3961 /* Validate -mpreferred-stack-boundary= value or default it to
3962 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3963 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
3964 if (global_options_set.x_ix86_preferred_stack_boundary_arg)
3966 int min = (TARGET_64BIT ? 4 : 2);
3967 int max = (TARGET_SEH ? 4 : 12);
3969 if (ix86_preferred_stack_boundary_arg < min
3970 || ix86_preferred_stack_boundary_arg > max)
3972 if (min == max)
3973 error ("-mpreferred-stack-boundary is not supported "
3974 "for this target");
3975 else
3976 error ("-mpreferred-stack-boundary=%d is not between %d and %d",
3977 ix86_preferred_stack_boundary_arg, min, max);
3979 else
3980 ix86_preferred_stack_boundary
3981 = (1 << ix86_preferred_stack_boundary_arg) * BITS_PER_UNIT;
3984 /* Set the default value for -mstackrealign. */
3985 if (ix86_force_align_arg_pointer == -1)
3986 ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
3988 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
3990 /* Validate -mincoming-stack-boundary= value or default it to
3991 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3992 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
3993 if (global_options_set.x_ix86_incoming_stack_boundary_arg)
3995 if (ix86_incoming_stack_boundary_arg < (TARGET_64BIT ? 4 : 2)
3996 || ix86_incoming_stack_boundary_arg > 12)
3997 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3998 ix86_incoming_stack_boundary_arg, TARGET_64BIT ? 4 : 2);
3999 else
4001 ix86_user_incoming_stack_boundary
4002 = (1 << ix86_incoming_stack_boundary_arg) * BITS_PER_UNIT;
4003 ix86_incoming_stack_boundary
4004 = ix86_user_incoming_stack_boundary;
4008 /* Accept -msseregparm only if at least SSE support is enabled. */
4009 if (TARGET_SSEREGPARM
4010 && ! TARGET_SSE)
4011 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
4013 if (global_options_set.x_ix86_fpmath)
4015 if (ix86_fpmath & FPMATH_SSE)
4017 if (!TARGET_SSE)
4019 warning (0, "SSE instruction set disabled, using 387 arithmetics");
4020 ix86_fpmath = FPMATH_387;
4022 else if ((ix86_fpmath & FPMATH_387) && !TARGET_80387)
4024 warning (0, "387 instruction set disabled, using SSE arithmetics");
4025 ix86_fpmath = FPMATH_SSE;
4029 else
4030 ix86_fpmath = TARGET_FPMATH_DEFAULT;
4032 /* If the i387 is disabled, then do not return values in it. */
4033 if (!TARGET_80387)
4034 target_flags &= ~MASK_FLOAT_RETURNS;
4036 /* Use external vectorized library in vectorizing intrinsics. */
4037 if (global_options_set.x_ix86_veclibabi_type)
4038 switch (ix86_veclibabi_type)
4040 case ix86_veclibabi_type_svml:
4041 ix86_veclib_handler = ix86_veclibabi_svml;
4042 break;
4044 case ix86_veclibabi_type_acml:
4045 ix86_veclib_handler = ix86_veclibabi_acml;
4046 break;
4048 default:
4049 gcc_unreachable ();
4052 if ((!USE_IX86_FRAME_POINTER
4053 || (x86_accumulate_outgoing_args & ix86_tune_mask))
4054 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
4055 && !optimize_size)
4056 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4058 /* ??? Unwind info is not correct around the CFG unless either a frame
4059 pointer is present or M_A_O_A is set. Fixing this requires rewriting
4060 unwind info generation to be aware of the CFG and propagating states
4061 around edges. */
4062 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
4063 || flag_exceptions || flag_non_call_exceptions)
4064 && flag_omit_frame_pointer
4065 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
4067 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
4068 warning (0, "unwind tables currently require either a frame pointer "
4069 "or %saccumulate-outgoing-args%s for correctness",
4070 prefix, suffix);
4071 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4074 /* If stack probes are required, the space used for large function
4075 arguments on the stack must also be probed, so enable
4076 -maccumulate-outgoing-args so this happens in the prologue. */
4077 if (TARGET_STACK_PROBE
4078 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
4080 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
4081 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
4082 "for correctness", prefix, suffix);
4083 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4086 /* For sane SSE instruction set generation we need fcomi instruction.
4087 It is safe to enable all CMOVE instructions. Also, RDRAND intrinsic
4088 expands to a sequence that includes conditional move. */
4089 if (TARGET_SSE || TARGET_RDRND)
4090 TARGET_CMOVE = 1;
4092 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
4094 char *p;
4095 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
4096 p = strchr (internal_label_prefix, 'X');
4097 internal_label_prefix_len = p - internal_label_prefix;
4098 *p = '\0';
4101 /* When scheduling description is not available, disable scheduler pass
4102 so it won't slow down the compilation and make x87 code slower. */
4103 if (!TARGET_SCHEDULE)
4104 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
4106 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
4107 ix86_cost->simultaneous_prefetches,
4108 global_options.x_param_values,
4109 global_options_set.x_param_values);
4110 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE, ix86_cost->prefetch_block,
4111 global_options.x_param_values,
4112 global_options_set.x_param_values);
4113 maybe_set_param_value (PARAM_L1_CACHE_SIZE, ix86_cost->l1_cache_size,
4114 global_options.x_param_values,
4115 global_options_set.x_param_values);
4116 maybe_set_param_value (PARAM_L2_CACHE_SIZE, ix86_cost->l2_cache_size,
4117 global_options.x_param_values,
4118 global_options_set.x_param_values);
4120 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
4121 if (flag_prefetch_loop_arrays < 0
4122 && HAVE_prefetch
4123 && optimize >= 3
4124 && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL)
4125 flag_prefetch_loop_arrays = 1;
4127 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
4128 can be optimized to ap = __builtin_next_arg (0). */
4129 if (!TARGET_64BIT && !flag_split_stack)
4130 targetm.expand_builtin_va_start = NULL;
4132 if (TARGET_64BIT)
4134 ix86_gen_leave = gen_leave_rex64;
4135 ix86_gen_add3 = gen_adddi3;
4136 ix86_gen_sub3 = gen_subdi3;
4137 ix86_gen_sub3_carry = gen_subdi3_carry;
4138 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
4139 ix86_gen_monitor = gen_sse3_monitor64;
4140 ix86_gen_andsp = gen_anddi3;
4141 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_di;
4142 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi;
4143 ix86_gen_probe_stack_range = gen_probe_stack_rangedi;
4145 else
4147 ix86_gen_leave = gen_leave;
4148 ix86_gen_add3 = gen_addsi3;
4149 ix86_gen_sub3 = gen_subsi3;
4150 ix86_gen_sub3_carry = gen_subsi3_carry;
4151 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
4152 ix86_gen_monitor = gen_sse3_monitor;
4153 ix86_gen_andsp = gen_andsi3;
4154 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_si;
4155 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi;
4156 ix86_gen_probe_stack_range = gen_probe_stack_rangesi;
4159 #ifdef USE_IX86_CLD
4160 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
4161 if (!TARGET_64BIT)
4162 target_flags |= MASK_CLD & ~target_flags_explicit;
4163 #endif
4165 if (!TARGET_64BIT && flag_pic)
4167 if (flag_fentry > 0)
4168 sorry ("-mfentry isn%'t supported for 32-bit in combination "
4169 "with -fpic");
4170 flag_fentry = 0;
4172 else if (TARGET_SEH)
4174 if (flag_fentry == 0)
4175 sorry ("-mno-fentry isn%'t compatible with SEH");
4176 flag_fentry = 1;
4178 else if (flag_fentry < 0)
4180 #if defined(PROFILE_BEFORE_PROLOGUE)
4181 flag_fentry = 1;
4182 #else
4183 flag_fentry = 0;
4184 #endif
4187 if (TARGET_AVX)
4189 /* When not optimize for size, enable vzeroupper optimization for
4190 TARGET_AVX with -fexpensive-optimizations and split 32-byte
4191 AVX unaligned load/store. */
4192 if (!optimize_size)
4194 if (flag_expensive_optimizations
4195 && !(target_flags_explicit & MASK_VZEROUPPER))
4196 target_flags |= MASK_VZEROUPPER;
4197 if (!(target_flags_explicit & MASK_AVX256_SPLIT_UNALIGNED_LOAD))
4198 target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD;
4199 if (!(target_flags_explicit & MASK_AVX256_SPLIT_UNALIGNED_STORE))
4200 target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE;
4203 else
4205 /* Disable vzeroupper pass if TARGET_AVX is disabled. */
4206 target_flags &= ~MASK_VZEROUPPER;
4209 /* Save the initial options in case the user does function specific
4210 options. */
4211 if (main_args_p)
4212 target_option_default_node = target_option_current_node
4213 = build_target_option_node ();
4216 /* Return TRUE if VAL is passed in register with 256bit AVX modes. */
4218 static bool
4219 function_pass_avx256_p (const_rtx val)
4221 if (!val)
4222 return false;
4224 if (REG_P (val) && VALID_AVX256_REG_MODE (GET_MODE (val)))
4225 return true;
4227 if (GET_CODE (val) == PARALLEL)
4229 int i;
4230 rtx r;
4232 for (i = XVECLEN (val, 0) - 1; i >= 0; i--)
4234 r = XVECEXP (val, 0, i);
4235 if (GET_CODE (r) == EXPR_LIST
4236 && XEXP (r, 0)
4237 && REG_P (XEXP (r, 0))
4238 && (GET_MODE (XEXP (r, 0)) == OImode
4239 || VALID_AVX256_REG_MODE (GET_MODE (XEXP (r, 0)))))
4240 return true;
4244 return false;
4247 /* Implement the TARGET_OPTION_OVERRIDE hook. */
4249 static void
4250 ix86_option_override (void)
4252 ix86_option_override_internal (true);
4255 /* Update register usage after having seen the compiler flags. */
4257 static void
4258 ix86_conditional_register_usage (void)
4260 int i;
4261 unsigned int j;
4263 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4265 if (fixed_regs[i] > 1)
4266 fixed_regs[i] = (fixed_regs[i] == (TARGET_64BIT ? 3 : 2));
4267 if (call_used_regs[i] > 1)
4268 call_used_regs[i] = (call_used_regs[i] == (TARGET_64BIT ? 3 : 2));
4271 /* The PIC register, if it exists, is fixed. */
4272 j = PIC_OFFSET_TABLE_REGNUM;
4273 if (j != INVALID_REGNUM)
4274 fixed_regs[j] = call_used_regs[j] = 1;
4276 /* The 64-bit MS_ABI changes the set of call-used registers. */
4277 if (TARGET_64BIT_MS_ABI)
4279 call_used_regs[SI_REG] = 0;
4280 call_used_regs[DI_REG] = 0;
4281 call_used_regs[XMM6_REG] = 0;
4282 call_used_regs[XMM7_REG] = 0;
4283 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
4284 call_used_regs[i] = 0;
4287 /* The default setting of CLOBBERED_REGS is for 32-bit; add in the
4288 other call-clobbered regs for 64-bit. */
4289 if (TARGET_64BIT)
4291 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
4293 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4294 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
4295 && call_used_regs[i])
4296 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
4299 /* If MMX is disabled, squash the registers. */
4300 if (! TARGET_MMX)
4301 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4302 if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
4303 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4305 /* If SSE is disabled, squash the registers. */
4306 if (! TARGET_SSE)
4307 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4308 if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
4309 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4311 /* If the FPU is disabled, squash the registers. */
4312 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
4313 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4314 if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
4315 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4317 /* If 32-bit, squash the 64-bit registers. */
4318 if (! TARGET_64BIT)
4320 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
4321 reg_names[i] = "";
4322 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
4323 reg_names[i] = "";
4328 /* Save the current options */
4330 static void
4331 ix86_function_specific_save (struct cl_target_option *ptr)
4333 ptr->arch = ix86_arch;
4334 ptr->schedule = ix86_schedule;
4335 ptr->tune = ix86_tune;
4336 ptr->branch_cost = ix86_branch_cost;
4337 ptr->tune_defaulted = ix86_tune_defaulted;
4338 ptr->arch_specified = ix86_arch_specified;
4339 ptr->x_ix86_isa_flags_explicit = ix86_isa_flags_explicit;
4340 ptr->ix86_target_flags_explicit = target_flags_explicit;
4342 /* The fields are char but the variables are not; make sure the
4343 values fit in the fields. */
4344 gcc_assert (ptr->arch == ix86_arch);
4345 gcc_assert (ptr->schedule == ix86_schedule);
4346 gcc_assert (ptr->tune == ix86_tune);
4347 gcc_assert (ptr->branch_cost == ix86_branch_cost);
4350 /* Restore the current options */
4352 static void
4353 ix86_function_specific_restore (struct cl_target_option *ptr)
4355 enum processor_type old_tune = ix86_tune;
4356 enum processor_type old_arch = ix86_arch;
4357 unsigned int ix86_arch_mask, ix86_tune_mask;
4358 int i;
4360 ix86_arch = (enum processor_type) ptr->arch;
4361 ix86_schedule = (enum attr_cpu) ptr->schedule;
4362 ix86_tune = (enum processor_type) ptr->tune;
4363 ix86_branch_cost = ptr->branch_cost;
4364 ix86_tune_defaulted = ptr->tune_defaulted;
4365 ix86_arch_specified = ptr->arch_specified;
4366 ix86_isa_flags_explicit = ptr->x_ix86_isa_flags_explicit;
4367 target_flags_explicit = ptr->ix86_target_flags_explicit;
4369 /* Recreate the arch feature tests if the arch changed */
4370 if (old_arch != ix86_arch)
4372 ix86_arch_mask = 1u << ix86_arch;
4373 for (i = 0; i < X86_ARCH_LAST; ++i)
4374 ix86_arch_features[i]
4375 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
4378 /* Recreate the tune optimization tests */
4379 if (old_tune != ix86_tune)
4381 ix86_tune_mask = 1u << ix86_tune;
4382 for (i = 0; i < X86_TUNE_LAST; ++i)
4383 ix86_tune_features[i]
4384 = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
4388 /* Print the current options */
4390 static void
4391 ix86_function_specific_print (FILE *file, int indent,
4392 struct cl_target_option *ptr)
4394 char *target_string
4395 = ix86_target_string (ptr->x_ix86_isa_flags, ptr->x_target_flags,
4396 NULL, NULL, ptr->x_ix86_fpmath, false);
4398 fprintf (file, "%*sarch = %d (%s)\n",
4399 indent, "",
4400 ptr->arch,
4401 ((ptr->arch < TARGET_CPU_DEFAULT_max)
4402 ? cpu_names[ptr->arch]
4403 : "<unknown>"));
4405 fprintf (file, "%*stune = %d (%s)\n",
4406 indent, "",
4407 ptr->tune,
4408 ((ptr->tune < TARGET_CPU_DEFAULT_max)
4409 ? cpu_names[ptr->tune]
4410 : "<unknown>"));
4412 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
4414 if (target_string)
4416 fprintf (file, "%*s%s\n", indent, "", target_string);
4417 free (target_string);
4422 /* Inner function to process the attribute((target(...))), take an argument and
4423 set the current options from the argument. If we have a list, recursively go
4424 over the list. */
4426 static bool
4427 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[],
4428 struct gcc_options *enum_opts_set)
4430 char *next_optstr;
4431 bool ret = true;
4433 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
4434 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
4435 #define IX86_ATTR_ENUM(S,O) { S, sizeof (S)-1, ix86_opt_enum, O, 0 }
4436 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
4437 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
4439 enum ix86_opt_type
4441 ix86_opt_unknown,
4442 ix86_opt_yes,
4443 ix86_opt_no,
4444 ix86_opt_str,
4445 ix86_opt_enum,
4446 ix86_opt_isa
4449 static const struct
4451 const char *string;
4452 size_t len;
4453 enum ix86_opt_type type;
4454 int opt;
4455 int mask;
4456 } attrs[] = {
4457 /* isa options */
4458 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
4459 IX86_ATTR_ISA ("abm", OPT_mabm),
4460 IX86_ATTR_ISA ("bmi", OPT_mbmi),
4461 IX86_ATTR_ISA ("tbm", OPT_mtbm),
4462 IX86_ATTR_ISA ("aes", OPT_maes),
4463 IX86_ATTR_ISA ("avx", OPT_mavx),
4464 IX86_ATTR_ISA ("mmx", OPT_mmmx),
4465 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
4466 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
4467 IX86_ATTR_ISA ("sse", OPT_msse),
4468 IX86_ATTR_ISA ("sse2", OPT_msse2),
4469 IX86_ATTR_ISA ("sse3", OPT_msse3),
4470 IX86_ATTR_ISA ("sse4", OPT_msse4),
4471 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
4472 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
4473 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
4474 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
4475 IX86_ATTR_ISA ("fma4", OPT_mfma4),
4476 IX86_ATTR_ISA ("xop", OPT_mxop),
4477 IX86_ATTR_ISA ("lwp", OPT_mlwp),
4478 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase),
4479 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd),
4480 IX86_ATTR_ISA ("f16c", OPT_mf16c),
4482 /* enum options */
4483 IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_),
4485 /* string options */
4486 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
4487 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
4489 /* flag options */
4490 IX86_ATTR_YES ("cld",
4491 OPT_mcld,
4492 MASK_CLD),
4494 IX86_ATTR_NO ("fancy-math-387",
4495 OPT_mfancy_math_387,
4496 MASK_NO_FANCY_MATH_387),
4498 IX86_ATTR_YES ("ieee-fp",
4499 OPT_mieee_fp,
4500 MASK_IEEE_FP),
4502 IX86_ATTR_YES ("inline-all-stringops",
4503 OPT_minline_all_stringops,
4504 MASK_INLINE_ALL_STRINGOPS),
4506 IX86_ATTR_YES ("inline-stringops-dynamically",
4507 OPT_minline_stringops_dynamically,
4508 MASK_INLINE_STRINGOPS_DYNAMICALLY),
4510 IX86_ATTR_NO ("align-stringops",
4511 OPT_mno_align_stringops,
4512 MASK_NO_ALIGN_STRINGOPS),
4514 IX86_ATTR_YES ("recip",
4515 OPT_mrecip,
4516 MASK_RECIP),
4520 /* If this is a list, recurse to get the options. */
4521 if (TREE_CODE (args) == TREE_LIST)
4523 bool ret = true;
4525 for (; args; args = TREE_CHAIN (args))
4526 if (TREE_VALUE (args)
4527 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args),
4528 p_strings, enum_opts_set))
4529 ret = false;
4531 return ret;
4534 else if (TREE_CODE (args) != STRING_CST)
4535 gcc_unreachable ();
4537 /* Handle multiple arguments separated by commas. */
4538 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
4540 while (next_optstr && *next_optstr != '\0')
4542 char *p = next_optstr;
4543 char *orig_p = p;
4544 char *comma = strchr (next_optstr, ',');
4545 const char *opt_string;
4546 size_t len, opt_len;
4547 int opt;
4548 bool opt_set_p;
4549 char ch;
4550 unsigned i;
4551 enum ix86_opt_type type = ix86_opt_unknown;
4552 int mask = 0;
4554 if (comma)
4556 *comma = '\0';
4557 len = comma - next_optstr;
4558 next_optstr = comma + 1;
4560 else
4562 len = strlen (p);
4563 next_optstr = NULL;
4566 /* Recognize no-xxx. */
4567 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
4569 opt_set_p = false;
4570 p += 3;
4571 len -= 3;
4573 else
4574 opt_set_p = true;
4576 /* Find the option. */
4577 ch = *p;
4578 opt = N_OPTS;
4579 for (i = 0; i < ARRAY_SIZE (attrs); i++)
4581 type = attrs[i].type;
4582 opt_len = attrs[i].len;
4583 if (ch == attrs[i].string[0]
4584 && ((type != ix86_opt_str && type != ix86_opt_enum)
4585 ? len == opt_len
4586 : len > opt_len)
4587 && memcmp (p, attrs[i].string, opt_len) == 0)
4589 opt = attrs[i].opt;
4590 mask = attrs[i].mask;
4591 opt_string = attrs[i].string;
4592 break;
4596 /* Process the option. */
4597 if (opt == N_OPTS)
4599 error ("attribute(target(\"%s\")) is unknown", orig_p);
4600 ret = false;
4603 else if (type == ix86_opt_isa)
4605 struct cl_decoded_option decoded;
4607 generate_option (opt, NULL, opt_set_p, CL_TARGET, &decoded);
4608 ix86_handle_option (&global_options, &global_options_set,
4609 &decoded, input_location);
4612 else if (type == ix86_opt_yes || type == ix86_opt_no)
4614 if (type == ix86_opt_no)
4615 opt_set_p = !opt_set_p;
4617 if (opt_set_p)
4618 target_flags |= mask;
4619 else
4620 target_flags &= ~mask;
4623 else if (type == ix86_opt_str)
4625 if (p_strings[opt])
4627 error ("option(\"%s\") was already specified", opt_string);
4628 ret = false;
4630 else
4631 p_strings[opt] = xstrdup (p + opt_len);
4634 else if (type == ix86_opt_enum)
4636 bool arg_ok;
4637 int value;
4639 arg_ok = opt_enum_arg_to_value (opt, p + opt_len, &value, CL_TARGET);
4640 if (arg_ok)
4641 set_option (&global_options, enum_opts_set, opt, value,
4642 p + opt_len, DK_UNSPECIFIED, input_location,
4643 global_dc);
4644 else
4646 error ("attribute(target(\"%s\")) is unknown", orig_p);
4647 ret = false;
4651 else
4652 gcc_unreachable ();
4655 return ret;
4658 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
4660 tree
4661 ix86_valid_target_attribute_tree (tree args)
4663 const char *orig_arch_string = ix86_arch_string;
4664 const char *orig_tune_string = ix86_tune_string;
4665 enum fpmath_unit orig_fpmath_set = global_options_set.x_ix86_fpmath;
4666 int orig_tune_defaulted = ix86_tune_defaulted;
4667 int orig_arch_specified = ix86_arch_specified;
4668 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL };
4669 tree t = NULL_TREE;
4670 int i;
4671 struct cl_target_option *def
4672 = TREE_TARGET_OPTION (target_option_default_node);
4673 struct gcc_options enum_opts_set;
4675 memset (&enum_opts_set, 0, sizeof (enum_opts_set));
4677 /* Process each of the options on the chain. */
4678 if (! ix86_valid_target_attribute_inner_p (args, option_strings,
4679 &enum_opts_set))
4680 return NULL_TREE;
4682 /* If the changed options are different from the default, rerun
4683 ix86_option_override_internal, and then save the options away.
4684 The string options are are attribute options, and will be undone
4685 when we copy the save structure. */
4686 if (ix86_isa_flags != def->x_ix86_isa_flags
4687 || target_flags != def->x_target_flags
4688 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
4689 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
4690 || enum_opts_set.x_ix86_fpmath)
4692 /* If we are using the default tune= or arch=, undo the string assigned,
4693 and use the default. */
4694 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
4695 ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
4696 else if (!orig_arch_specified)
4697 ix86_arch_string = NULL;
4699 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
4700 ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
4701 else if (orig_tune_defaulted)
4702 ix86_tune_string = NULL;
4704 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
4705 if (enum_opts_set.x_ix86_fpmath)
4706 global_options_set.x_ix86_fpmath = (enum fpmath_unit) 1;
4707 else if (!TARGET_64BIT && TARGET_SSE)
4709 ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
4710 global_options_set.x_ix86_fpmath = (enum fpmath_unit) 1;
4713 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
4714 ix86_option_override_internal (false);
4716 /* Add any builtin functions with the new isa if any. */
4717 ix86_add_new_builtins (ix86_isa_flags);
4719 /* Save the current options unless we are validating options for
4720 #pragma. */
4721 t = build_target_option_node ();
4723 ix86_arch_string = orig_arch_string;
4724 ix86_tune_string = orig_tune_string;
4725 global_options_set.x_ix86_fpmath = orig_fpmath_set;
4727 /* Free up memory allocated to hold the strings */
4728 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
4729 free (option_strings[i]);
4732 return t;
4735 /* Hook to validate attribute((target("string"))). */
4737 static bool
4738 ix86_valid_target_attribute_p (tree fndecl,
4739 tree ARG_UNUSED (name),
4740 tree args,
4741 int ARG_UNUSED (flags))
4743 struct cl_target_option cur_target;
4744 bool ret = true;
4745 tree old_optimize = build_optimization_node ();
4746 tree new_target, new_optimize;
4747 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
4749 /* If the function changed the optimization levels as well as setting target
4750 options, start with the optimizations specified. */
4751 if (func_optimize && func_optimize != old_optimize)
4752 cl_optimization_restore (&global_options,
4753 TREE_OPTIMIZATION (func_optimize));
4755 /* The target attributes may also change some optimization flags, so update
4756 the optimization options if necessary. */
4757 cl_target_option_save (&cur_target, &global_options);
4758 new_target = ix86_valid_target_attribute_tree (args);
4759 new_optimize = build_optimization_node ();
4761 if (!new_target)
4762 ret = false;
4764 else if (fndecl)
4766 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
4768 if (old_optimize != new_optimize)
4769 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
4772 cl_target_option_restore (&global_options, &cur_target);
4774 if (old_optimize != new_optimize)
4775 cl_optimization_restore (&global_options,
4776 TREE_OPTIMIZATION (old_optimize));
4778 return ret;
4782 /* Hook to determine if one function can safely inline another. */
4784 static bool
4785 ix86_can_inline_p (tree caller, tree callee)
4787 bool ret = false;
4788 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
4789 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
4791 /* If callee has no option attributes, then it is ok to inline. */
4792 if (!callee_tree)
4793 ret = true;
4795 /* If caller has no option attributes, but callee does then it is not ok to
4796 inline. */
4797 else if (!caller_tree)
4798 ret = false;
4800 else
4802 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
4803 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
4805 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
4806 can inline a SSE2 function but a SSE2 function can't inline a SSE4
4807 function. */
4808 if ((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
4809 != callee_opts->x_ix86_isa_flags)
4810 ret = false;
4812 /* See if we have the same non-isa options. */
4813 else if (caller_opts->x_target_flags != callee_opts->x_target_flags)
4814 ret = false;
4816 /* See if arch, tune, etc. are the same. */
4817 else if (caller_opts->arch != callee_opts->arch)
4818 ret = false;
4820 else if (caller_opts->tune != callee_opts->tune)
4821 ret = false;
4823 else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath)
4824 ret = false;
4826 else if (caller_opts->branch_cost != callee_opts->branch_cost)
4827 ret = false;
4829 else
4830 ret = true;
4833 return ret;
4837 /* Remember the last target of ix86_set_current_function. */
4838 static GTY(()) tree ix86_previous_fndecl;
4840 /* Establish appropriate back-end context for processing the function
4841 FNDECL. The argument might be NULL to indicate processing at top
4842 level, outside of any function scope. */
4843 static void
4844 ix86_set_current_function (tree fndecl)
4846 /* Only change the context if the function changes. This hook is called
4847 several times in the course of compiling a function, and we don't want to
4848 slow things down too much or call target_reinit when it isn't safe. */
4849 if (fndecl && fndecl != ix86_previous_fndecl)
4851 tree old_tree = (ix86_previous_fndecl
4852 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
4853 : NULL_TREE);
4855 tree new_tree = (fndecl
4856 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
4857 : NULL_TREE);
4859 ix86_previous_fndecl = fndecl;
4860 if (old_tree == new_tree)
4863 else if (new_tree)
4865 cl_target_option_restore (&global_options,
4866 TREE_TARGET_OPTION (new_tree));
4867 target_reinit ();
4870 else if (old_tree)
4872 struct cl_target_option *def
4873 = TREE_TARGET_OPTION (target_option_current_node);
4875 cl_target_option_restore (&global_options, def);
4876 target_reinit ();
4882 /* Return true if this goes in large data/bss. */
4884 static bool
4885 ix86_in_large_data_p (tree exp)
4887 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
4888 return false;
4890 /* Functions are never large data. */
4891 if (TREE_CODE (exp) == FUNCTION_DECL)
4892 return false;
4894 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
4896 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
4897 if (strcmp (section, ".ldata") == 0
4898 || strcmp (section, ".lbss") == 0)
4899 return true;
4900 return false;
4902 else
4904 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
4906 /* If this is an incomplete type with size 0, then we can't put it
4907 in data because it might be too big when completed. */
4908 if (!size || size > ix86_section_threshold)
4909 return true;
4912 return false;
4915 /* Switch to the appropriate section for output of DECL.
4916 DECL is either a `VAR_DECL' node or a constant of some sort.
4917 RELOC indicates whether forming the initial value of DECL requires
4918 link-time relocations. */
4920 static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT)
4921 ATTRIBUTE_UNUSED;
4923 static section *
4924 x86_64_elf_select_section (tree decl, int reloc,
4925 unsigned HOST_WIDE_INT align)
4927 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4928 && ix86_in_large_data_p (decl))
4930 const char *sname = NULL;
4931 unsigned int flags = SECTION_WRITE;
4932 switch (categorize_decl_for_section (decl, reloc))
4934 case SECCAT_DATA:
4935 sname = ".ldata";
4936 break;
4937 case SECCAT_DATA_REL:
4938 sname = ".ldata.rel";
4939 break;
4940 case SECCAT_DATA_REL_LOCAL:
4941 sname = ".ldata.rel.local";
4942 break;
4943 case SECCAT_DATA_REL_RO:
4944 sname = ".ldata.rel.ro";
4945 break;
4946 case SECCAT_DATA_REL_RO_LOCAL:
4947 sname = ".ldata.rel.ro.local";
4948 break;
4949 case SECCAT_BSS:
4950 sname = ".lbss";
4951 flags |= SECTION_BSS;
4952 break;
4953 case SECCAT_RODATA:
4954 case SECCAT_RODATA_MERGE_STR:
4955 case SECCAT_RODATA_MERGE_STR_INIT:
4956 case SECCAT_RODATA_MERGE_CONST:
4957 sname = ".lrodata";
4958 flags = 0;
4959 break;
4960 case SECCAT_SRODATA:
4961 case SECCAT_SDATA:
4962 case SECCAT_SBSS:
4963 gcc_unreachable ();
4964 case SECCAT_TEXT:
4965 case SECCAT_TDATA:
4966 case SECCAT_TBSS:
4967 /* We don't split these for medium model. Place them into
4968 default sections and hope for best. */
4969 break;
4971 if (sname)
4973 /* We might get called with string constants, but get_named_section
4974 doesn't like them as they are not DECLs. Also, we need to set
4975 flags in that case. */
4976 if (!DECL_P (decl))
4977 return get_section (sname, flags, NULL);
4978 return get_named_section (decl, sname, reloc);
4981 return default_elf_select_section (decl, reloc, align);
4984 /* Build up a unique section name, expressed as a
4985 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
4986 RELOC indicates whether the initial value of EXP requires
4987 link-time relocations. */
4989 static void ATTRIBUTE_UNUSED
4990 x86_64_elf_unique_section (tree decl, int reloc)
4992 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4993 && ix86_in_large_data_p (decl))
4995 const char *prefix = NULL;
4996 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
4997 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
4999 switch (categorize_decl_for_section (decl, reloc))
5001 case SECCAT_DATA:
5002 case SECCAT_DATA_REL:
5003 case SECCAT_DATA_REL_LOCAL:
5004 case SECCAT_DATA_REL_RO:
5005 case SECCAT_DATA_REL_RO_LOCAL:
5006 prefix = one_only ? ".ld" : ".ldata";
5007 break;
5008 case SECCAT_BSS:
5009 prefix = one_only ? ".lb" : ".lbss";
5010 break;
5011 case SECCAT_RODATA:
5012 case SECCAT_RODATA_MERGE_STR:
5013 case SECCAT_RODATA_MERGE_STR_INIT:
5014 case SECCAT_RODATA_MERGE_CONST:
5015 prefix = one_only ? ".lr" : ".lrodata";
5016 break;
5017 case SECCAT_SRODATA:
5018 case SECCAT_SDATA:
5019 case SECCAT_SBSS:
5020 gcc_unreachable ();
5021 case SECCAT_TEXT:
5022 case SECCAT_TDATA:
5023 case SECCAT_TBSS:
5024 /* We don't split these for medium model. Place them into
5025 default sections and hope for best. */
5026 break;
5028 if (prefix)
5030 const char *name, *linkonce;
5031 char *string;
5033 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
5034 name = targetm.strip_name_encoding (name);
5036 /* If we're using one_only, then there needs to be a .gnu.linkonce
5037 prefix to the section name. */
5038 linkonce = one_only ? ".gnu.linkonce" : "";
5040 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
5042 DECL_SECTION_NAME (decl) = build_string (strlen (string), string);
5043 return;
5046 default_unique_section (decl, reloc);
5049 #ifdef COMMON_ASM_OP
5050 /* This says how to output assembler code to declare an
5051 uninitialized external linkage data object.
5053 For medium model x86-64 we need to use .largecomm opcode for
5054 large objects. */
5055 void
5056 x86_elf_aligned_common (FILE *file,
5057 const char *name, unsigned HOST_WIDE_INT size,
5058 int align)
5060 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5061 && size > (unsigned int)ix86_section_threshold)
5062 fputs (".largecomm\t", file);
5063 else
5064 fputs (COMMON_ASM_OP, file);
5065 assemble_name (file, name);
5066 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
5067 size, align / BITS_PER_UNIT);
5069 #endif
5071 /* Utility function for targets to use in implementing
5072 ASM_OUTPUT_ALIGNED_BSS. */
5074 void
5075 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
5076 const char *name, unsigned HOST_WIDE_INT size,
5077 int align)
5079 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5080 && size > (unsigned int)ix86_section_threshold)
5081 switch_to_section (get_named_section (decl, ".lbss", 0));
5082 else
5083 switch_to_section (bss_section);
5084 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
5085 #ifdef ASM_DECLARE_OBJECT_NAME
5086 last_assemble_variable_decl = decl;
5087 ASM_DECLARE_OBJECT_NAME (file, name, decl);
5088 #else
5089 /* Standard thing is just output label for the object. */
5090 ASM_OUTPUT_LABEL (file, name);
5091 #endif /* ASM_DECLARE_OBJECT_NAME */
5092 ASM_OUTPUT_SKIP (file, size ? size : 1);
5095 static const struct default_options ix86_option_optimization_table[] =
5097 /* Turn off -fschedule-insns by default. It tends to make the
5098 problem with not enough registers even worse. */
5099 #ifdef INSN_SCHEDULING
5100 { OPT_LEVELS_ALL, OPT_fschedule_insns, NULL, 0 },
5101 #endif
5103 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
5104 SUBTARGET_OPTIMIZATION_OPTIONS,
5105 #endif
5106 { OPT_LEVELS_NONE, 0, NULL, 0 }
5109 /* Implement TARGET_OPTION_INIT_STRUCT. */
5111 static void
5112 ix86_option_init_struct (struct gcc_options *opts)
5114 if (TARGET_MACHO)
5115 /* The Darwin libraries never set errno, so we might as well
5116 avoid calling them when that's the only reason we would. */
5117 opts->x_flag_errno_math = 0;
5119 opts->x_flag_pcc_struct_return = 2;
5120 opts->x_flag_asynchronous_unwind_tables = 2;
5121 opts->x_flag_vect_cost_model = 1;
5124 /* Decide whether we must probe the stack before any space allocation
5125 on this target. It's essentially TARGET_STACK_PROBE except when
5126 -fstack-check causes the stack to be already probed differently. */
5128 bool
5129 ix86_target_stack_probe (void)
5131 /* Do not probe the stack twice if static stack checking is enabled. */
5132 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
5133 return false;
5135 return TARGET_STACK_PROBE;
5138 /* Decide whether we can make a sibling call to a function. DECL is the
5139 declaration of the function being targeted by the call and EXP is the
5140 CALL_EXPR representing the call. */
5142 static bool
5143 ix86_function_ok_for_sibcall (tree decl, tree exp)
5145 tree type, decl_or_type;
5146 rtx a, b;
5148 /* If we are generating position-independent code, we cannot sibcall
5149 optimize any indirect call, or a direct call to a global function,
5150 as the PLT requires %ebx be live. (Darwin does not have a PLT.) */
5151 if (!TARGET_MACHO
5152 && !TARGET_64BIT
5153 && flag_pic
5154 && (!decl || !targetm.binds_local_p (decl)))
5155 return false;
5157 /* If we need to align the outgoing stack, then sibcalling would
5158 unalign the stack, which may break the called function. */
5159 if (ix86_minimum_incoming_stack_boundary (true)
5160 < PREFERRED_STACK_BOUNDARY)
5161 return false;
5163 if (decl)
5165 decl_or_type = decl;
5166 type = TREE_TYPE (decl);
5168 else
5170 /* We're looking at the CALL_EXPR, we need the type of the function. */
5171 type = CALL_EXPR_FN (exp); /* pointer expression */
5172 type = TREE_TYPE (type); /* pointer type */
5173 type = TREE_TYPE (type); /* function type */
5174 decl_or_type = type;
5177 /* Check that the return value locations are the same. Like
5178 if we are returning floats on the 80387 register stack, we cannot
5179 make a sibcall from a function that doesn't return a float to a
5180 function that does or, conversely, from a function that does return
5181 a float to a function that doesn't; the necessary stack adjustment
5182 would not be executed. This is also the place we notice
5183 differences in the return value ABI. Note that it is ok for one
5184 of the functions to have void return type as long as the return
5185 value of the other is passed in a register. */
5186 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
5187 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
5188 cfun->decl, false);
5189 if (STACK_REG_P (a) || STACK_REG_P (b))
5191 if (!rtx_equal_p (a, b))
5192 return false;
5194 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
5196 /* Disable sibcall if we need to generate vzeroupper after
5197 callee returns. */
5198 if (TARGET_VZEROUPPER
5199 && cfun->machine->callee_return_avx256_p
5200 && !cfun->machine->caller_return_avx256_p)
5201 return false;
5203 else if (!rtx_equal_p (a, b))
5204 return false;
5206 if (TARGET_64BIT)
5208 /* The SYSV ABI has more call-clobbered registers;
5209 disallow sibcalls from MS to SYSV. */
5210 if (cfun->machine->call_abi == MS_ABI
5211 && ix86_function_type_abi (type) == SYSV_ABI)
5212 return false;
5214 else
5216 /* If this call is indirect, we'll need to be able to use a
5217 call-clobbered register for the address of the target function.
5218 Make sure that all such registers are not used for passing
5219 parameters. Note that DLLIMPORT functions are indirect. */
5220 if (!decl
5221 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
5223 if (ix86_function_regparm (type, NULL) >= 3)
5225 /* ??? Need to count the actual number of registers to be used,
5226 not the possible number of registers. Fix later. */
5227 return false;
5232 /* Otherwise okay. That also includes certain types of indirect calls. */
5233 return true;
5236 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
5237 and "sseregparm" calling convention attributes;
5238 arguments as in struct attribute_spec.handler. */
5240 static tree
5241 ix86_handle_cconv_attribute (tree *node, tree name,
5242 tree args,
5243 int flags ATTRIBUTE_UNUSED,
5244 bool *no_add_attrs)
5246 if (TREE_CODE (*node) != FUNCTION_TYPE
5247 && TREE_CODE (*node) != METHOD_TYPE
5248 && TREE_CODE (*node) != FIELD_DECL
5249 && TREE_CODE (*node) != TYPE_DECL)
5251 warning (OPT_Wattributes, "%qE attribute only applies to functions",
5252 name);
5253 *no_add_attrs = true;
5254 return NULL_TREE;
5257 /* Can combine regparm with all attributes but fastcall, and thiscall. */
5258 if (is_attribute_p ("regparm", name))
5260 tree cst;
5262 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5264 error ("fastcall and regparm attributes are not compatible");
5267 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5269 error ("regparam and thiscall attributes are not compatible");
5272 cst = TREE_VALUE (args);
5273 if (TREE_CODE (cst) != INTEGER_CST)
5275 warning (OPT_Wattributes,
5276 "%qE attribute requires an integer constant argument",
5277 name);
5278 *no_add_attrs = true;
5280 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
5282 warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
5283 name, REGPARM_MAX);
5284 *no_add_attrs = true;
5287 return NULL_TREE;
5290 if (TARGET_64BIT)
5292 /* Do not warn when emulating the MS ABI. */
5293 if ((TREE_CODE (*node) != FUNCTION_TYPE
5294 && TREE_CODE (*node) != METHOD_TYPE)
5295 || ix86_function_type_abi (*node) != MS_ABI)
5296 warning (OPT_Wattributes, "%qE attribute ignored",
5297 name);
5298 *no_add_attrs = true;
5299 return NULL_TREE;
5302 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
5303 if (is_attribute_p ("fastcall", name))
5305 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5307 error ("fastcall and cdecl attributes are not compatible");
5309 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5311 error ("fastcall and stdcall attributes are not compatible");
5313 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
5315 error ("fastcall and regparm attributes are not compatible");
5317 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5319 error ("fastcall and thiscall attributes are not compatible");
5323 /* Can combine stdcall with fastcall (redundant), regparm and
5324 sseregparm. */
5325 else if (is_attribute_p ("stdcall", name))
5327 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5329 error ("stdcall and cdecl attributes are not compatible");
5331 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5333 error ("stdcall and fastcall attributes are not compatible");
5335 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5337 error ("stdcall and thiscall attributes are not compatible");
5341 /* Can combine cdecl with regparm and sseregparm. */
5342 else if (is_attribute_p ("cdecl", name))
5344 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5346 error ("stdcall and cdecl attributes are not compatible");
5348 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5350 error ("fastcall and cdecl attributes are not compatible");
5352 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5354 error ("cdecl and thiscall attributes are not compatible");
5357 else if (is_attribute_p ("thiscall", name))
5359 if (TREE_CODE (*node) != METHOD_TYPE && pedantic)
5360 warning (OPT_Wattributes, "%qE attribute is used for none class-method",
5361 name);
5362 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5364 error ("stdcall and thiscall attributes are not compatible");
5366 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5368 error ("fastcall and thiscall attributes are not compatible");
5370 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5372 error ("cdecl and thiscall attributes are not compatible");
5376 /* Can combine sseregparm with all attributes. */
5378 return NULL_TREE;
5381 /* This function determines from TYPE the calling-convention. */
5383 unsigned int
5384 ix86_get_callcvt (const_tree type)
5386 unsigned int ret = 0;
5387 bool is_stdarg;
5388 tree attrs;
5390 if (TARGET_64BIT)
5391 return IX86_CALLCVT_CDECL;
5393 attrs = TYPE_ATTRIBUTES (type);
5394 if (attrs != NULL_TREE)
5396 if (lookup_attribute ("cdecl", attrs))
5397 ret |= IX86_CALLCVT_CDECL;
5398 else if (lookup_attribute ("stdcall", attrs))
5399 ret |= IX86_CALLCVT_STDCALL;
5400 else if (lookup_attribute ("fastcall", attrs))
5401 ret |= IX86_CALLCVT_FASTCALL;
5402 else if (lookup_attribute ("thiscall", attrs))
5403 ret |= IX86_CALLCVT_THISCALL;
5405 /* Regparam isn't allowed for thiscall and fastcall. */
5406 if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0)
5408 if (lookup_attribute ("regparm", attrs))
5409 ret |= IX86_CALLCVT_REGPARM;
5410 if (lookup_attribute ("sseregparm", attrs))
5411 ret |= IX86_CALLCVT_SSEREGPARM;
5414 if (IX86_BASE_CALLCVT(ret) != 0)
5415 return ret;
5418 is_stdarg = stdarg_p (type);
5419 if (TARGET_RTD && !is_stdarg)
5420 return IX86_CALLCVT_STDCALL | ret;
5422 if (ret != 0
5423 || is_stdarg
5424 || TREE_CODE (type) != METHOD_TYPE
5425 || ix86_function_type_abi (type) != MS_ABI)
5426 return IX86_CALLCVT_CDECL | ret;
5428 return IX86_CALLCVT_THISCALL;
5431 /* Return 0 if the attributes for two types are incompatible, 1 if they
5432 are compatible, and 2 if they are nearly compatible (which causes a
5433 warning to be generated). */
5435 static int
5436 ix86_comp_type_attributes (const_tree type1, const_tree type2)
5438 unsigned int ccvt1, ccvt2;
5440 if (TREE_CODE (type1) != FUNCTION_TYPE
5441 && TREE_CODE (type1) != METHOD_TYPE)
5442 return 1;
5444 ccvt1 = ix86_get_callcvt (type1);
5445 ccvt2 = ix86_get_callcvt (type2);
5446 if (ccvt1 != ccvt2)
5447 return 0;
5448 if (ix86_function_regparm (type1, NULL)
5449 != ix86_function_regparm (type2, NULL))
5450 return 0;
5452 return 1;
5455 /* Return the regparm value for a function with the indicated TYPE and DECL.
5456 DECL may be NULL when calling function indirectly
5457 or considering a libcall. */
5459 static int
5460 ix86_function_regparm (const_tree type, const_tree decl)
5462 tree attr;
5463 int regparm;
5464 unsigned int ccvt;
5466 if (TARGET_64BIT)
5467 return (ix86_function_type_abi (type) == SYSV_ABI
5468 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
5469 ccvt = ix86_get_callcvt (type);
5470 regparm = ix86_regparm;
5472 if ((ccvt & IX86_CALLCVT_REGPARM) != 0)
5474 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
5475 if (attr)
5477 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
5478 return regparm;
5481 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
5482 return 2;
5483 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
5484 return 1;
5486 /* Use register calling convention for local functions when possible. */
5487 if (decl
5488 && TREE_CODE (decl) == FUNCTION_DECL
5489 && optimize
5490 && !(profile_flag && !flag_fentry))
5492 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5493 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE (decl));
5494 if (i && i->local && i->can_change_signature)
5496 int local_regparm, globals = 0, regno;
5498 /* Make sure no regparm register is taken by a
5499 fixed register variable. */
5500 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
5501 if (fixed_regs[local_regparm])
5502 break;
5504 /* We don't want to use regparm(3) for nested functions as
5505 these use a static chain pointer in the third argument. */
5506 if (local_regparm == 3 && DECL_STATIC_CHAIN (decl))
5507 local_regparm = 2;
5509 /* In 32-bit mode save a register for the split stack. */
5510 if (!TARGET_64BIT && local_regparm == 3 && flag_split_stack)
5511 local_regparm = 2;
5513 /* Each fixed register usage increases register pressure,
5514 so less registers should be used for argument passing.
5515 This functionality can be overriden by an explicit
5516 regparm value. */
5517 for (regno = 0; regno <= DI_REG; regno++)
5518 if (fixed_regs[regno])
5519 globals++;
5521 local_regparm
5522 = globals < local_regparm ? local_regparm - globals : 0;
5524 if (local_regparm > regparm)
5525 regparm = local_regparm;
5529 return regparm;
5532 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
5533 DFmode (2) arguments in SSE registers for a function with the
5534 indicated TYPE and DECL. DECL may be NULL when calling function
5535 indirectly or considering a libcall. Otherwise return 0. */
5537 static int
5538 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
5540 gcc_assert (!TARGET_64BIT);
5542 /* Use SSE registers to pass SFmode and DFmode arguments if requested
5543 by the sseregparm attribute. */
5544 if (TARGET_SSEREGPARM
5545 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
5547 if (!TARGET_SSE)
5549 if (warn)
5551 if (decl)
5552 error ("calling %qD with attribute sseregparm without "
5553 "SSE/SSE2 enabled", decl);
5554 else
5555 error ("calling %qT with attribute sseregparm without "
5556 "SSE/SSE2 enabled", type);
5558 return 0;
5561 return 2;
5564 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
5565 (and DFmode for SSE2) arguments in SSE registers. */
5566 if (decl && TARGET_SSE_MATH && optimize
5567 && !(profile_flag && !flag_fentry))
5569 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5570 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
5571 if (i && i->local && i->can_change_signature)
5572 return TARGET_SSE2 ? 2 : 1;
5575 return 0;
5578 /* Return true if EAX is live at the start of the function. Used by
5579 ix86_expand_prologue to determine if we need special help before
5580 calling allocate_stack_worker. */
5582 static bool
5583 ix86_eax_live_at_start_p (void)
5585 /* Cheat. Don't bother working forward from ix86_function_regparm
5586 to the function type to whether an actual argument is located in
5587 eax. Instead just look at cfg info, which is still close enough
5588 to correct at this point. This gives false positives for broken
5589 functions that might use uninitialized data that happens to be
5590 allocated in eax, but who cares? */
5591 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR), 0);
5594 static bool
5595 ix86_keep_aggregate_return_pointer (tree fntype)
5597 tree attr;
5599 if (!TARGET_64BIT)
5601 attr = lookup_attribute ("callee_pop_aggregate_return",
5602 TYPE_ATTRIBUTES (fntype));
5603 if (attr)
5604 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0);
5606 /* For 32-bit MS-ABI the default is to keep aggregate
5607 return pointer. */
5608 if (ix86_function_type_abi (fntype) == MS_ABI)
5609 return true;
5611 return KEEP_AGGREGATE_RETURN_POINTER != 0;
5614 /* Value is the number of bytes of arguments automatically
5615 popped when returning from a subroutine call.
5616 FUNDECL is the declaration node of the function (as a tree),
5617 FUNTYPE is the data type of the function (as a tree),
5618 or for a library call it is an identifier node for the subroutine name.
5619 SIZE is the number of bytes of arguments passed on the stack.
5621 On the 80386, the RTD insn may be used to pop them if the number
5622 of args is fixed, but if the number is variable then the caller
5623 must pop them all. RTD can't be used for library calls now
5624 because the library is compiled with the Unix compiler.
5625 Use of RTD is a selectable option, since it is incompatible with
5626 standard Unix calling sequences. If the option is not selected,
5627 the caller must always pop the args.
5629 The attribute stdcall is equivalent to RTD on a per module basis. */
5631 static int
5632 ix86_return_pops_args (tree fundecl, tree funtype, int size)
5634 unsigned int ccvt;
5636 /* None of the 64-bit ABIs pop arguments. */
5637 if (TARGET_64BIT)
5638 return 0;
5640 ccvt = ix86_get_callcvt (funtype);
5642 if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL
5643 | IX86_CALLCVT_THISCALL)) != 0
5644 && ! stdarg_p (funtype))
5645 return size;
5647 /* Lose any fake structure return argument if it is passed on the stack. */
5648 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
5649 && !ix86_keep_aggregate_return_pointer (funtype))
5651 int nregs = ix86_function_regparm (funtype, fundecl);
5652 if (nregs == 0)
5653 return GET_MODE_SIZE (Pmode);
5656 return 0;
5659 /* Argument support functions. */
5661 /* Return true when register may be used to pass function parameters. */
5662 bool
5663 ix86_function_arg_regno_p (int regno)
5665 int i;
5666 const int *parm_regs;
5668 if (!TARGET_64BIT)
5670 if (TARGET_MACHO)
5671 return (regno < REGPARM_MAX
5672 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
5673 else
5674 return (regno < REGPARM_MAX
5675 || (TARGET_MMX && MMX_REGNO_P (regno)
5676 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
5677 || (TARGET_SSE && SSE_REGNO_P (regno)
5678 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
5681 if (TARGET_MACHO)
5683 if (SSE_REGNO_P (regno) && TARGET_SSE)
5684 return true;
5686 else
5688 if (TARGET_SSE && SSE_REGNO_P (regno)
5689 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
5690 return true;
5693 /* TODO: The function should depend on current function ABI but
5694 builtins.c would need updating then. Therefore we use the
5695 default ABI. */
5697 /* RAX is used as hidden argument to va_arg functions. */
5698 if (ix86_abi == SYSV_ABI && regno == AX_REG)
5699 return true;
5701 if (ix86_abi == MS_ABI)
5702 parm_regs = x86_64_ms_abi_int_parameter_registers;
5703 else
5704 parm_regs = x86_64_int_parameter_registers;
5705 for (i = 0; i < (ix86_abi == MS_ABI
5706 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
5707 if (regno == parm_regs[i])
5708 return true;
5709 return false;
5712 /* Return if we do not know how to pass TYPE solely in registers. */
5714 static bool
5715 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
5717 if (must_pass_in_stack_var_size_or_pad (mode, type))
5718 return true;
5720 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
5721 The layout_type routine is crafty and tries to trick us into passing
5722 currently unsupported vector types on the stack by using TImode. */
5723 return (!TARGET_64BIT && mode == TImode
5724 && type && TREE_CODE (type) != VECTOR_TYPE);
5727 /* It returns the size, in bytes, of the area reserved for arguments passed
5728 in registers for the function represented by fndecl dependent to the used
5729 abi format. */
5731 ix86_reg_parm_stack_space (const_tree fndecl)
5733 enum calling_abi call_abi = SYSV_ABI;
5734 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
5735 call_abi = ix86_function_abi (fndecl);
5736 else
5737 call_abi = ix86_function_type_abi (fndecl);
5738 if (TARGET_64BIT && call_abi == MS_ABI)
5739 return 32;
5740 return 0;
5743 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
5744 call abi used. */
5745 enum calling_abi
5746 ix86_function_type_abi (const_tree fntype)
5748 if (fntype != NULL_TREE && TYPE_ATTRIBUTES (fntype) != NULL_TREE)
5750 enum calling_abi abi = ix86_abi;
5751 if (abi == SYSV_ABI)
5753 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
5754 abi = MS_ABI;
5756 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
5757 abi = SYSV_ABI;
5758 return abi;
5760 return ix86_abi;
5763 static bool
5764 ix86_function_ms_hook_prologue (const_tree fn)
5766 if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
5768 if (decl_function_context (fn) != NULL_TREE)
5769 error_at (DECL_SOURCE_LOCATION (fn),
5770 "ms_hook_prologue is not compatible with nested function");
5771 else
5772 return true;
5774 return false;
5777 static enum calling_abi
5778 ix86_function_abi (const_tree fndecl)
5780 if (! fndecl)
5781 return ix86_abi;
5782 return ix86_function_type_abi (TREE_TYPE (fndecl));
5785 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
5786 call abi used. */
5787 enum calling_abi
5788 ix86_cfun_abi (void)
5790 if (! cfun)
5791 return ix86_abi;
5792 return cfun->machine->call_abi;
5795 /* Write the extra assembler code needed to declare a function properly. */
5797 void
5798 ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
5799 tree decl)
5801 bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
5803 if (is_ms_hook)
5805 int i, filler_count = (TARGET_64BIT ? 32 : 16);
5806 unsigned int filler_cc = 0xcccccccc;
5808 for (i = 0; i < filler_count; i += 4)
5809 fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
5812 #ifdef SUBTARGET_ASM_UNWIND_INIT
5813 SUBTARGET_ASM_UNWIND_INIT (asm_out_file);
5814 #endif
5816 ASM_OUTPUT_LABEL (asm_out_file, fname);
5818 /* Output magic byte marker, if hot-patch attribute is set. */
5819 if (is_ms_hook)
5821 if (TARGET_64BIT)
5823 /* leaq [%rsp + 0], %rsp */
5824 asm_fprintf (asm_out_file, ASM_BYTE
5825 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
5827 else
5829 /* movl.s %edi, %edi
5830 push %ebp
5831 movl.s %esp, %ebp */
5832 asm_fprintf (asm_out_file, ASM_BYTE
5833 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
5838 /* regclass.c */
5839 extern void init_regs (void);
5841 /* Implementation of call abi switching target hook. Specific to FNDECL
5842 the specific call register sets are set. See also
5843 ix86_conditional_register_usage for more details. */
5844 void
5845 ix86_call_abi_override (const_tree fndecl)
5847 if (fndecl == NULL_TREE)
5848 cfun->machine->call_abi = ix86_abi;
5849 else
5850 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
5853 /* 64-bit MS and SYSV ABI have different set of call used registers. Avoid
5854 expensive re-initialization of init_regs each time we switch function context
5855 since this is needed only during RTL expansion. */
5856 static void
5857 ix86_maybe_switch_abi (void)
5859 if (TARGET_64BIT &&
5860 call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
5861 reinit_regs ();
5864 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5865 for a call to a function whose data type is FNTYPE.
5866 For a library call, FNTYPE is 0. */
5868 void
5869 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
5870 tree fntype, /* tree ptr for function decl */
5871 rtx libname, /* SYMBOL_REF of library name or 0 */
5872 tree fndecl,
5873 int caller)
5875 struct cgraph_local_info *i;
5876 tree fnret_type;
5878 memset (cum, 0, sizeof (*cum));
5880 /* Initialize for the current callee. */
5881 if (caller)
5883 cfun->machine->callee_pass_avx256_p = false;
5884 cfun->machine->callee_return_avx256_p = false;
5887 if (fndecl)
5889 i = cgraph_local_info (fndecl);
5890 cum->call_abi = ix86_function_abi (fndecl);
5891 fnret_type = TREE_TYPE (TREE_TYPE (fndecl));
5893 else
5895 i = NULL;
5896 cum->call_abi = ix86_function_type_abi (fntype);
5897 if (fntype)
5898 fnret_type = TREE_TYPE (fntype);
5899 else
5900 fnret_type = NULL;
5903 if (TARGET_VZEROUPPER && fnret_type)
5905 rtx fnret_value = ix86_function_value (fnret_type, fntype,
5906 false);
5907 if (function_pass_avx256_p (fnret_value))
5909 /* The return value of this function uses 256bit AVX modes. */
5910 if (caller)
5911 cfun->machine->callee_return_avx256_p = true;
5912 else
5913 cfun->machine->caller_return_avx256_p = true;
5917 cum->caller = caller;
5919 /* Set up the number of registers to use for passing arguments. */
5921 if (TARGET_64BIT && cum->call_abi == MS_ABI && !ACCUMULATE_OUTGOING_ARGS)
5922 sorry ("ms_abi attribute requires -maccumulate-outgoing-args "
5923 "or subtarget optimization implying it");
5924 cum->nregs = ix86_regparm;
5925 if (TARGET_64BIT)
5927 cum->nregs = (cum->call_abi == SYSV_ABI
5928 ? X86_64_REGPARM_MAX
5929 : X86_64_MS_REGPARM_MAX);
5931 if (TARGET_SSE)
5933 cum->sse_nregs = SSE_REGPARM_MAX;
5934 if (TARGET_64BIT)
5936 cum->sse_nregs = (cum->call_abi == SYSV_ABI
5937 ? X86_64_SSE_REGPARM_MAX
5938 : X86_64_MS_SSE_REGPARM_MAX);
5941 if (TARGET_MMX)
5942 cum->mmx_nregs = MMX_REGPARM_MAX;
5943 cum->warn_avx = true;
5944 cum->warn_sse = true;
5945 cum->warn_mmx = true;
5947 /* Because type might mismatch in between caller and callee, we need to
5948 use actual type of function for local calls.
5949 FIXME: cgraph_analyze can be told to actually record if function uses
5950 va_start so for local functions maybe_vaarg can be made aggressive
5951 helping K&R code.
5952 FIXME: once typesytem is fixed, we won't need this code anymore. */
5953 if (i && i->local && i->can_change_signature)
5954 fntype = TREE_TYPE (fndecl);
5955 cum->maybe_vaarg = (fntype
5956 ? (!prototype_p (fntype) || stdarg_p (fntype))
5957 : !libname);
5959 if (!TARGET_64BIT)
5961 /* If there are variable arguments, then we won't pass anything
5962 in registers in 32-bit mode. */
5963 if (stdarg_p (fntype))
5965 cum->nregs = 0;
5966 cum->sse_nregs = 0;
5967 cum->mmx_nregs = 0;
5968 cum->warn_avx = 0;
5969 cum->warn_sse = 0;
5970 cum->warn_mmx = 0;
5971 return;
5974 /* Use ecx and edx registers if function has fastcall attribute,
5975 else look for regparm information. */
5976 if (fntype)
5978 unsigned int ccvt = ix86_get_callcvt (fntype);
5979 if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
5981 cum->nregs = 1;
5982 cum->fastcall = 1; /* Same first register as in fastcall. */
5984 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
5986 cum->nregs = 2;
5987 cum->fastcall = 1;
5989 else
5990 cum->nregs = ix86_function_regparm (fntype, fndecl);
5993 /* Set up the number of SSE registers used for passing SFmode
5994 and DFmode arguments. Warn for mismatching ABI. */
5995 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
5999 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
6000 But in the case of vector types, it is some vector mode.
6002 When we have only some of our vector isa extensions enabled, then there
6003 are some modes for which vector_mode_supported_p is false. For these
6004 modes, the generic vector support in gcc will choose some non-vector mode
6005 in order to implement the type. By computing the natural mode, we'll
6006 select the proper ABI location for the operand and not depend on whatever
6007 the middle-end decides to do with these vector types.
6009 The midde-end can't deal with the vector types > 16 bytes. In this
6010 case, we return the original mode and warn ABI change if CUM isn't
6011 NULL. */
6013 static enum machine_mode
6014 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum)
6016 enum machine_mode mode = TYPE_MODE (type);
6018 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
6020 HOST_WIDE_INT size = int_size_in_bytes (type);
6021 if ((size == 8 || size == 16 || size == 32)
6022 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
6023 && TYPE_VECTOR_SUBPARTS (type) > 1)
6025 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
6027 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
6028 mode = MIN_MODE_VECTOR_FLOAT;
6029 else
6030 mode = MIN_MODE_VECTOR_INT;
6032 /* Get the mode which has this inner mode and number of units. */
6033 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
6034 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
6035 && GET_MODE_INNER (mode) == innermode)
6037 if (size == 32 && !TARGET_AVX)
6039 static bool warnedavx;
6041 if (cum
6042 && !warnedavx
6043 && cum->warn_avx)
6045 warnedavx = true;
6046 warning (0, "AVX vector argument without AVX "
6047 "enabled changes the ABI");
6049 return TYPE_MODE (type);
6051 else
6052 return mode;
6055 gcc_unreachable ();
6059 return mode;
6062 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
6063 this may not agree with the mode that the type system has chosen for the
6064 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
6065 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
6067 static rtx
6068 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
6069 unsigned int regno)
6071 rtx tmp;
6073 if (orig_mode != BLKmode)
6074 tmp = gen_rtx_REG (orig_mode, regno);
6075 else
6077 tmp = gen_rtx_REG (mode, regno);
6078 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
6079 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
6082 return tmp;
6085 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
6086 of this code is to classify each 8bytes of incoming argument by the register
6087 class and assign registers accordingly. */
6089 /* Return the union class of CLASS1 and CLASS2.
6090 See the x86-64 PS ABI for details. */
6092 static enum x86_64_reg_class
6093 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
6095 /* Rule #1: If both classes are equal, this is the resulting class. */
6096 if (class1 == class2)
6097 return class1;
6099 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
6100 the other class. */
6101 if (class1 == X86_64_NO_CLASS)
6102 return class2;
6103 if (class2 == X86_64_NO_CLASS)
6104 return class1;
6106 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
6107 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
6108 return X86_64_MEMORY_CLASS;
6110 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
6111 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
6112 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
6113 return X86_64_INTEGERSI_CLASS;
6114 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
6115 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
6116 return X86_64_INTEGER_CLASS;
6118 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
6119 MEMORY is used. */
6120 if (class1 == X86_64_X87_CLASS
6121 || class1 == X86_64_X87UP_CLASS
6122 || class1 == X86_64_COMPLEX_X87_CLASS
6123 || class2 == X86_64_X87_CLASS
6124 || class2 == X86_64_X87UP_CLASS
6125 || class2 == X86_64_COMPLEX_X87_CLASS)
6126 return X86_64_MEMORY_CLASS;
6128 /* Rule #6: Otherwise class SSE is used. */
6129 return X86_64_SSE_CLASS;
6132 /* Classify the argument of type TYPE and mode MODE.
6133 CLASSES will be filled by the register class used to pass each word
6134 of the operand. The number of words is returned. In case the parameter
6135 should be passed in memory, 0 is returned. As a special case for zero
6136 sized containers, classes[0] will be NO_CLASS and 1 is returned.
6138 BIT_OFFSET is used internally for handling records and specifies offset
6139 of the offset in bits modulo 256 to avoid overflow cases.
6141 See the x86-64 PS ABI for details.
6144 static int
6145 classify_argument (enum machine_mode mode, const_tree type,
6146 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
6148 HOST_WIDE_INT bytes =
6149 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
6150 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6152 /* Variable sized entities are always passed/returned in memory. */
6153 if (bytes < 0)
6154 return 0;
6156 if (mode != VOIDmode
6157 && targetm.calls.must_pass_in_stack (mode, type))
6158 return 0;
6160 if (type && AGGREGATE_TYPE_P (type))
6162 int i;
6163 tree field;
6164 enum x86_64_reg_class subclasses[MAX_CLASSES];
6166 /* On x86-64 we pass structures larger than 32 bytes on the stack. */
6167 if (bytes > 32)
6168 return 0;
6170 for (i = 0; i < words; i++)
6171 classes[i] = X86_64_NO_CLASS;
6173 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
6174 signalize memory class, so handle it as special case. */
6175 if (!words)
6177 classes[0] = X86_64_NO_CLASS;
6178 return 1;
6181 /* Classify each field of record and merge classes. */
6182 switch (TREE_CODE (type))
6184 case RECORD_TYPE:
6185 /* And now merge the fields of structure. */
6186 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6188 if (TREE_CODE (field) == FIELD_DECL)
6190 int num;
6192 if (TREE_TYPE (field) == error_mark_node)
6193 continue;
6195 /* Bitfields are always classified as integer. Handle them
6196 early, since later code would consider them to be
6197 misaligned integers. */
6198 if (DECL_BIT_FIELD (field))
6200 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
6201 i < ((int_bit_position (field) + (bit_offset % 64))
6202 + tree_low_cst (DECL_SIZE (field), 0)
6203 + 63) / 8 / 8; i++)
6204 classes[i] =
6205 merge_classes (X86_64_INTEGER_CLASS,
6206 classes[i]);
6208 else
6210 int pos;
6212 type = TREE_TYPE (field);
6214 /* Flexible array member is ignored. */
6215 if (TYPE_MODE (type) == BLKmode
6216 && TREE_CODE (type) == ARRAY_TYPE
6217 && TYPE_SIZE (type) == NULL_TREE
6218 && TYPE_DOMAIN (type) != NULL_TREE
6219 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
6220 == NULL_TREE))
6222 static bool warned;
6224 if (!warned && warn_psabi)
6226 warned = true;
6227 inform (input_location,
6228 "the ABI of passing struct with"
6229 " a flexible array member has"
6230 " changed in GCC 4.4");
6232 continue;
6234 num = classify_argument (TYPE_MODE (type), type,
6235 subclasses,
6236 (int_bit_position (field)
6237 + bit_offset) % 256);
6238 if (!num)
6239 return 0;
6240 pos = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
6241 for (i = 0; i < num && (i + pos) < words; i++)
6242 classes[i + pos] =
6243 merge_classes (subclasses[i], classes[i + pos]);
6247 break;
6249 case ARRAY_TYPE:
6250 /* Arrays are handled as small records. */
6252 int num;
6253 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
6254 TREE_TYPE (type), subclasses, bit_offset);
6255 if (!num)
6256 return 0;
6258 /* The partial classes are now full classes. */
6259 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
6260 subclasses[0] = X86_64_SSE_CLASS;
6261 if (subclasses[0] == X86_64_INTEGERSI_CLASS
6262 && !((bit_offset % 64) == 0 && bytes == 4))
6263 subclasses[0] = X86_64_INTEGER_CLASS;
6265 for (i = 0; i < words; i++)
6266 classes[i] = subclasses[i % num];
6268 break;
6270 case UNION_TYPE:
6271 case QUAL_UNION_TYPE:
6272 /* Unions are similar to RECORD_TYPE but offset is always 0.
6274 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6276 if (TREE_CODE (field) == FIELD_DECL)
6278 int num;
6280 if (TREE_TYPE (field) == error_mark_node)
6281 continue;
6283 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
6284 TREE_TYPE (field), subclasses,
6285 bit_offset);
6286 if (!num)
6287 return 0;
6288 for (i = 0; i < num; i++)
6289 classes[i] = merge_classes (subclasses[i], classes[i]);
6292 break;
6294 default:
6295 gcc_unreachable ();
6298 if (words > 2)
6300 /* When size > 16 bytes, if the first one isn't
6301 X86_64_SSE_CLASS or any other ones aren't
6302 X86_64_SSEUP_CLASS, everything should be passed in
6303 memory. */
6304 if (classes[0] != X86_64_SSE_CLASS)
6305 return 0;
6307 for (i = 1; i < words; i++)
6308 if (classes[i] != X86_64_SSEUP_CLASS)
6309 return 0;
6312 /* Final merger cleanup. */
6313 for (i = 0; i < words; i++)
6315 /* If one class is MEMORY, everything should be passed in
6316 memory. */
6317 if (classes[i] == X86_64_MEMORY_CLASS)
6318 return 0;
6320 /* The X86_64_SSEUP_CLASS should be always preceded by
6321 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
6322 if (classes[i] == X86_64_SSEUP_CLASS
6323 && classes[i - 1] != X86_64_SSE_CLASS
6324 && classes[i - 1] != X86_64_SSEUP_CLASS)
6326 /* The first one should never be X86_64_SSEUP_CLASS. */
6327 gcc_assert (i != 0);
6328 classes[i] = X86_64_SSE_CLASS;
6331 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
6332 everything should be passed in memory. */
6333 if (classes[i] == X86_64_X87UP_CLASS
6334 && (classes[i - 1] != X86_64_X87_CLASS))
6336 static bool warned;
6338 /* The first one should never be X86_64_X87UP_CLASS. */
6339 gcc_assert (i != 0);
6340 if (!warned && warn_psabi)
6342 warned = true;
6343 inform (input_location,
6344 "the ABI of passing union with long double"
6345 " has changed in GCC 4.4");
6347 return 0;
6350 return words;
6353 /* Compute alignment needed. We align all types to natural boundaries with
6354 exception of XFmode that is aligned to 64bits. */
6355 if (mode != VOIDmode && mode != BLKmode)
6357 int mode_alignment = GET_MODE_BITSIZE (mode);
6359 if (mode == XFmode)
6360 mode_alignment = 128;
6361 else if (mode == XCmode)
6362 mode_alignment = 256;
6363 if (COMPLEX_MODE_P (mode))
6364 mode_alignment /= 2;
6365 /* Misaligned fields are always returned in memory. */
6366 if (bit_offset % mode_alignment)
6367 return 0;
6370 /* for V1xx modes, just use the base mode */
6371 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
6372 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
6373 mode = GET_MODE_INNER (mode);
6375 /* Classification of atomic types. */
6376 switch (mode)
6378 case SDmode:
6379 case DDmode:
6380 classes[0] = X86_64_SSE_CLASS;
6381 return 1;
6382 case TDmode:
6383 classes[0] = X86_64_SSE_CLASS;
6384 classes[1] = X86_64_SSEUP_CLASS;
6385 return 2;
6386 case DImode:
6387 case SImode:
6388 case HImode:
6389 case QImode:
6390 case CSImode:
6391 case CHImode:
6392 case CQImode:
6394 int size = (bit_offset % 64)+ (int) GET_MODE_BITSIZE (mode);
6396 if (size <= 32)
6398 classes[0] = X86_64_INTEGERSI_CLASS;
6399 return 1;
6401 else if (size <= 64)
6403 classes[0] = X86_64_INTEGER_CLASS;
6404 return 1;
6406 else if (size <= 64+32)
6408 classes[0] = X86_64_INTEGER_CLASS;
6409 classes[1] = X86_64_INTEGERSI_CLASS;
6410 return 2;
6412 else if (size <= 64+64)
6414 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6415 return 2;
6417 else
6418 gcc_unreachable ();
6420 case CDImode:
6421 case TImode:
6422 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6423 return 2;
6424 case COImode:
6425 case OImode:
6426 /* OImode shouldn't be used directly. */
6427 gcc_unreachable ();
6428 case CTImode:
6429 return 0;
6430 case SFmode:
6431 if (!(bit_offset % 64))
6432 classes[0] = X86_64_SSESF_CLASS;
6433 else
6434 classes[0] = X86_64_SSE_CLASS;
6435 return 1;
6436 case DFmode:
6437 classes[0] = X86_64_SSEDF_CLASS;
6438 return 1;
6439 case XFmode:
6440 classes[0] = X86_64_X87_CLASS;
6441 classes[1] = X86_64_X87UP_CLASS;
6442 return 2;
6443 case TFmode:
6444 classes[0] = X86_64_SSE_CLASS;
6445 classes[1] = X86_64_SSEUP_CLASS;
6446 return 2;
6447 case SCmode:
6448 classes[0] = X86_64_SSE_CLASS;
6449 if (!(bit_offset % 64))
6450 return 1;
6451 else
6453 static bool warned;
6455 if (!warned && warn_psabi)
6457 warned = true;
6458 inform (input_location,
6459 "the ABI of passing structure with complex float"
6460 " member has changed in GCC 4.4");
6462 classes[1] = X86_64_SSESF_CLASS;
6463 return 2;
6465 case DCmode:
6466 classes[0] = X86_64_SSEDF_CLASS;
6467 classes[1] = X86_64_SSEDF_CLASS;
6468 return 2;
6469 case XCmode:
6470 classes[0] = X86_64_COMPLEX_X87_CLASS;
6471 return 1;
6472 case TCmode:
6473 /* This modes is larger than 16 bytes. */
6474 return 0;
6475 case V8SFmode:
6476 case V8SImode:
6477 case V32QImode:
6478 case V16HImode:
6479 case V4DFmode:
6480 case V4DImode:
6481 classes[0] = X86_64_SSE_CLASS;
6482 classes[1] = X86_64_SSEUP_CLASS;
6483 classes[2] = X86_64_SSEUP_CLASS;
6484 classes[3] = X86_64_SSEUP_CLASS;
6485 return 4;
6486 case V4SFmode:
6487 case V4SImode:
6488 case V16QImode:
6489 case V8HImode:
6490 case V2DFmode:
6491 case V2DImode:
6492 classes[0] = X86_64_SSE_CLASS;
6493 classes[1] = X86_64_SSEUP_CLASS;
6494 return 2;
6495 case V1TImode:
6496 case V1DImode:
6497 case V2SFmode:
6498 case V2SImode:
6499 case V4HImode:
6500 case V8QImode:
6501 classes[0] = X86_64_SSE_CLASS;
6502 return 1;
6503 case BLKmode:
6504 case VOIDmode:
6505 return 0;
6506 default:
6507 gcc_assert (VECTOR_MODE_P (mode));
6509 if (bytes > 16)
6510 return 0;
6512 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
6514 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
6515 classes[0] = X86_64_INTEGERSI_CLASS;
6516 else
6517 classes[0] = X86_64_INTEGER_CLASS;
6518 classes[1] = X86_64_INTEGER_CLASS;
6519 return 1 + (bytes > 8);
6523 /* Examine the argument and return set number of register required in each
6524 class. Return 0 iff parameter should be passed in memory. */
6525 static int
6526 examine_argument (enum machine_mode mode, const_tree type, int in_return,
6527 int *int_nregs, int *sse_nregs)
6529 enum x86_64_reg_class regclass[MAX_CLASSES];
6530 int n = classify_argument (mode, type, regclass, 0);
6532 *int_nregs = 0;
6533 *sse_nregs = 0;
6534 if (!n)
6535 return 0;
6536 for (n--; n >= 0; n--)
6537 switch (regclass[n])
6539 case X86_64_INTEGER_CLASS:
6540 case X86_64_INTEGERSI_CLASS:
6541 (*int_nregs)++;
6542 break;
6543 case X86_64_SSE_CLASS:
6544 case X86_64_SSESF_CLASS:
6545 case X86_64_SSEDF_CLASS:
6546 (*sse_nregs)++;
6547 break;
6548 case X86_64_NO_CLASS:
6549 case X86_64_SSEUP_CLASS:
6550 break;
6551 case X86_64_X87_CLASS:
6552 case X86_64_X87UP_CLASS:
6553 if (!in_return)
6554 return 0;
6555 break;
6556 case X86_64_COMPLEX_X87_CLASS:
6557 return in_return ? 2 : 0;
6558 case X86_64_MEMORY_CLASS:
6559 gcc_unreachable ();
6561 return 1;
6564 /* Construct container for the argument used by GCC interface. See
6565 FUNCTION_ARG for the detailed description. */
6567 static rtx
6568 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
6569 const_tree type, int in_return, int nintregs, int nsseregs,
6570 const int *intreg, int sse_regno)
6572 /* The following variables hold the static issued_error state. */
6573 static bool issued_sse_arg_error;
6574 static bool issued_sse_ret_error;
6575 static bool issued_x87_ret_error;
6577 enum machine_mode tmpmode;
6578 int bytes =
6579 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
6580 enum x86_64_reg_class regclass[MAX_CLASSES];
6581 int n;
6582 int i;
6583 int nexps = 0;
6584 int needed_sseregs, needed_intregs;
6585 rtx exp[MAX_CLASSES];
6586 rtx ret;
6588 n = classify_argument (mode, type, regclass, 0);
6589 if (!n)
6590 return NULL;
6591 if (!examine_argument (mode, type, in_return, &needed_intregs,
6592 &needed_sseregs))
6593 return NULL;
6594 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
6595 return NULL;
6597 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
6598 some less clueful developer tries to use floating-point anyway. */
6599 if (needed_sseregs && !TARGET_SSE)
6601 if (in_return)
6603 if (!issued_sse_ret_error)
6605 error ("SSE register return with SSE disabled");
6606 issued_sse_ret_error = true;
6609 else if (!issued_sse_arg_error)
6611 error ("SSE register argument with SSE disabled");
6612 issued_sse_arg_error = true;
6614 return NULL;
6617 /* Likewise, error if the ABI requires us to return values in the
6618 x87 registers and the user specified -mno-80387. */
6619 if (!TARGET_80387 && in_return)
6620 for (i = 0; i < n; i++)
6621 if (regclass[i] == X86_64_X87_CLASS
6622 || regclass[i] == X86_64_X87UP_CLASS
6623 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
6625 if (!issued_x87_ret_error)
6627 error ("x87 register return with x87 disabled");
6628 issued_x87_ret_error = true;
6630 return NULL;
6633 /* First construct simple cases. Avoid SCmode, since we want to use
6634 single register to pass this type. */
6635 if (n == 1 && mode != SCmode)
6636 switch (regclass[0])
6638 case X86_64_INTEGER_CLASS:
6639 case X86_64_INTEGERSI_CLASS:
6640 return gen_rtx_REG (mode, intreg[0]);
6641 case X86_64_SSE_CLASS:
6642 case X86_64_SSESF_CLASS:
6643 case X86_64_SSEDF_CLASS:
6644 if (mode != BLKmode)
6645 return gen_reg_or_parallel (mode, orig_mode,
6646 SSE_REGNO (sse_regno));
6647 break;
6648 case X86_64_X87_CLASS:
6649 case X86_64_COMPLEX_X87_CLASS:
6650 return gen_rtx_REG (mode, FIRST_STACK_REG);
6651 case X86_64_NO_CLASS:
6652 /* Zero sized array, struct or class. */
6653 return NULL;
6654 default:
6655 gcc_unreachable ();
6657 if (n == 2 && regclass[0] == X86_64_SSE_CLASS
6658 && regclass[1] == X86_64_SSEUP_CLASS && mode != BLKmode)
6659 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
6660 if (n == 4
6661 && regclass[0] == X86_64_SSE_CLASS
6662 && regclass[1] == X86_64_SSEUP_CLASS
6663 && regclass[2] == X86_64_SSEUP_CLASS
6664 && regclass[3] == X86_64_SSEUP_CLASS
6665 && mode != BLKmode)
6666 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
6668 if (n == 2
6669 && regclass[0] == X86_64_X87_CLASS && regclass[1] == X86_64_X87UP_CLASS)
6670 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
6671 if (n == 2 && regclass[0] == X86_64_INTEGER_CLASS
6672 && regclass[1] == X86_64_INTEGER_CLASS
6673 && (mode == CDImode || mode == TImode || mode == TFmode)
6674 && intreg[0] + 1 == intreg[1])
6675 return gen_rtx_REG (mode, intreg[0]);
6677 /* Otherwise figure out the entries of the PARALLEL. */
6678 for (i = 0; i < n; i++)
6680 int pos;
6682 switch (regclass[i])
6684 case X86_64_NO_CLASS:
6685 break;
6686 case X86_64_INTEGER_CLASS:
6687 case X86_64_INTEGERSI_CLASS:
6688 /* Merge TImodes on aligned occasions here too. */
6689 if (i * 8 + 8 > bytes)
6690 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
6691 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
6692 tmpmode = SImode;
6693 else
6694 tmpmode = DImode;
6695 /* We've requested 24 bytes we don't have mode for. Use DImode. */
6696 if (tmpmode == BLKmode)
6697 tmpmode = DImode;
6698 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6699 gen_rtx_REG (tmpmode, *intreg),
6700 GEN_INT (i*8));
6701 intreg++;
6702 break;
6703 case X86_64_SSESF_CLASS:
6704 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6705 gen_rtx_REG (SFmode,
6706 SSE_REGNO (sse_regno)),
6707 GEN_INT (i*8));
6708 sse_regno++;
6709 break;
6710 case X86_64_SSEDF_CLASS:
6711 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6712 gen_rtx_REG (DFmode,
6713 SSE_REGNO (sse_regno)),
6714 GEN_INT (i*8));
6715 sse_regno++;
6716 break;
6717 case X86_64_SSE_CLASS:
6718 pos = i;
6719 switch (n)
6721 case 1:
6722 tmpmode = DImode;
6723 break;
6724 case 2:
6725 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
6727 tmpmode = TImode;
6728 i++;
6730 else
6731 tmpmode = DImode;
6732 break;
6733 case 4:
6734 gcc_assert (i == 0
6735 && regclass[1] == X86_64_SSEUP_CLASS
6736 && regclass[2] == X86_64_SSEUP_CLASS
6737 && regclass[3] == X86_64_SSEUP_CLASS);
6738 tmpmode = OImode;
6739 i += 3;
6740 break;
6741 default:
6742 gcc_unreachable ();
6744 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6745 gen_rtx_REG (tmpmode,
6746 SSE_REGNO (sse_regno)),
6747 GEN_INT (pos*8));
6748 sse_regno++;
6749 break;
6750 default:
6751 gcc_unreachable ();
6755 /* Empty aligned struct, union or class. */
6756 if (nexps == 0)
6757 return NULL;
6759 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
6760 for (i = 0; i < nexps; i++)
6761 XVECEXP (ret, 0, i) = exp [i];
6762 return ret;
6765 /* Update the data in CUM to advance over an argument of mode MODE
6766 and data type TYPE. (TYPE is null for libcalls where that information
6767 may not be available.) */
6769 static void
6770 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6771 const_tree type, HOST_WIDE_INT bytes,
6772 HOST_WIDE_INT words)
6774 switch (mode)
6776 default:
6777 break;
6779 case BLKmode:
6780 if (bytes < 0)
6781 break;
6782 /* FALLTHRU */
6784 case DImode:
6785 case SImode:
6786 case HImode:
6787 case QImode:
6788 cum->words += words;
6789 cum->nregs -= words;
6790 cum->regno += words;
6792 if (cum->nregs <= 0)
6794 cum->nregs = 0;
6795 cum->regno = 0;
6797 break;
6799 case OImode:
6800 /* OImode shouldn't be used directly. */
6801 gcc_unreachable ();
6803 case DFmode:
6804 if (cum->float_in_sse < 2)
6805 break;
6806 case SFmode:
6807 if (cum->float_in_sse < 1)
6808 break;
6809 /* FALLTHRU */
6811 case V8SFmode:
6812 case V8SImode:
6813 case V32QImode:
6814 case V16HImode:
6815 case V4DFmode:
6816 case V4DImode:
6817 case TImode:
6818 case V16QImode:
6819 case V8HImode:
6820 case V4SImode:
6821 case V2DImode:
6822 case V4SFmode:
6823 case V2DFmode:
6824 if (!type || !AGGREGATE_TYPE_P (type))
6826 cum->sse_words += words;
6827 cum->sse_nregs -= 1;
6828 cum->sse_regno += 1;
6829 if (cum->sse_nregs <= 0)
6831 cum->sse_nregs = 0;
6832 cum->sse_regno = 0;
6835 break;
6837 case V8QImode:
6838 case V4HImode:
6839 case V2SImode:
6840 case V2SFmode:
6841 case V1TImode:
6842 case V1DImode:
6843 if (!type || !AGGREGATE_TYPE_P (type))
6845 cum->mmx_words += words;
6846 cum->mmx_nregs -= 1;
6847 cum->mmx_regno += 1;
6848 if (cum->mmx_nregs <= 0)
6850 cum->mmx_nregs = 0;
6851 cum->mmx_regno = 0;
6854 break;
6858 static void
6859 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6860 const_tree type, HOST_WIDE_INT words, bool named)
6862 int int_nregs, sse_nregs;
6864 /* Unnamed 256bit vector mode parameters are passed on stack. */
6865 if (!named && VALID_AVX256_REG_MODE (mode))
6866 return;
6868 if (examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
6869 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
6871 cum->nregs -= int_nregs;
6872 cum->sse_nregs -= sse_nregs;
6873 cum->regno += int_nregs;
6874 cum->sse_regno += sse_nregs;
6876 else
6878 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
6879 cum->words = (cum->words + align - 1) & ~(align - 1);
6880 cum->words += words;
6884 static void
6885 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
6886 HOST_WIDE_INT words)
6888 /* Otherwise, this should be passed indirect. */
6889 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
6891 cum->words += words;
6892 if (cum->nregs > 0)
6894 cum->nregs -= 1;
6895 cum->regno += 1;
6899 /* Update the data in CUM to advance over an argument of mode MODE and
6900 data type TYPE. (TYPE is null for libcalls where that information
6901 may not be available.) */
6903 static void
6904 ix86_function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6905 const_tree type, bool named)
6907 HOST_WIDE_INT bytes, words;
6909 if (mode == BLKmode)
6910 bytes = int_size_in_bytes (type);
6911 else
6912 bytes = GET_MODE_SIZE (mode);
6913 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6915 if (type)
6916 mode = type_natural_mode (type, NULL);
6918 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6919 function_arg_advance_ms_64 (cum, bytes, words);
6920 else if (TARGET_64BIT)
6921 function_arg_advance_64 (cum, mode, type, words, named);
6922 else
6923 function_arg_advance_32 (cum, mode, type, bytes, words);
6926 /* Define where to put the arguments to a function.
6927 Value is zero to push the argument on the stack,
6928 or a hard register in which to store the argument.
6930 MODE is the argument's machine mode.
6931 TYPE is the data type of the argument (as a tree).
6932 This is null for libcalls where that information may
6933 not be available.
6934 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6935 the preceding args and about the function being called.
6936 NAMED is nonzero if this argument is a named parameter
6937 (otherwise it is an extra parameter matching an ellipsis). */
6939 static rtx
6940 function_arg_32 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6941 enum machine_mode orig_mode, const_tree type,
6942 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
6944 static bool warnedsse, warnedmmx;
6946 /* Avoid the AL settings for the Unix64 ABI. */
6947 if (mode == VOIDmode)
6948 return constm1_rtx;
6950 switch (mode)
6952 default:
6953 break;
6955 case BLKmode:
6956 if (bytes < 0)
6957 break;
6958 /* FALLTHRU */
6959 case DImode:
6960 case SImode:
6961 case HImode:
6962 case QImode:
6963 if (words <= cum->nregs)
6965 int regno = cum->regno;
6967 /* Fastcall allocates the first two DWORD (SImode) or
6968 smaller arguments to ECX and EDX if it isn't an
6969 aggregate type . */
6970 if (cum->fastcall)
6972 if (mode == BLKmode
6973 || mode == DImode
6974 || (type && AGGREGATE_TYPE_P (type)))
6975 break;
6977 /* ECX not EAX is the first allocated register. */
6978 if (regno == AX_REG)
6979 regno = CX_REG;
6981 return gen_rtx_REG (mode, regno);
6983 break;
6985 case DFmode:
6986 if (cum->float_in_sse < 2)
6987 break;
6988 case SFmode:
6989 if (cum->float_in_sse < 1)
6990 break;
6991 /* FALLTHRU */
6992 case TImode:
6993 /* In 32bit, we pass TImode in xmm registers. */
6994 case V16QImode:
6995 case V8HImode:
6996 case V4SImode:
6997 case V2DImode:
6998 case V4SFmode:
6999 case V2DFmode:
7000 if (!type || !AGGREGATE_TYPE_P (type))
7002 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
7004 warnedsse = true;
7005 warning (0, "SSE vector argument without SSE enabled "
7006 "changes the ABI");
7008 if (cum->sse_nregs)
7009 return gen_reg_or_parallel (mode, orig_mode,
7010 cum->sse_regno + FIRST_SSE_REG);
7012 break;
7014 case OImode:
7015 /* OImode shouldn't be used directly. */
7016 gcc_unreachable ();
7018 case V8SFmode:
7019 case V8SImode:
7020 case V32QImode:
7021 case V16HImode:
7022 case V4DFmode:
7023 case V4DImode:
7024 if (!type || !AGGREGATE_TYPE_P (type))
7026 if (cum->sse_nregs)
7027 return gen_reg_or_parallel (mode, orig_mode,
7028 cum->sse_regno + FIRST_SSE_REG);
7030 break;
7032 case V8QImode:
7033 case V4HImode:
7034 case V2SImode:
7035 case V2SFmode:
7036 case V1TImode:
7037 case V1DImode:
7038 if (!type || !AGGREGATE_TYPE_P (type))
7040 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
7042 warnedmmx = true;
7043 warning (0, "MMX vector argument without MMX enabled "
7044 "changes the ABI");
7046 if (cum->mmx_nregs)
7047 return gen_reg_or_parallel (mode, orig_mode,
7048 cum->mmx_regno + FIRST_MMX_REG);
7050 break;
7053 return NULL_RTX;
7056 static rtx
7057 function_arg_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
7058 enum machine_mode orig_mode, const_tree type, bool named)
7060 /* Handle a hidden AL argument containing number of registers
7061 for varargs x86-64 functions. */
7062 if (mode == VOIDmode)
7063 return GEN_INT (cum->maybe_vaarg
7064 ? (cum->sse_nregs < 0
7065 ? X86_64_SSE_REGPARM_MAX
7066 : cum->sse_regno)
7067 : -1);
7069 switch (mode)
7071 default:
7072 break;
7074 case V8SFmode:
7075 case V8SImode:
7076 case V32QImode:
7077 case V16HImode:
7078 case V4DFmode:
7079 case V4DImode:
7080 /* Unnamed 256bit vector mode parameters are passed on stack. */
7081 if (!named)
7082 return NULL;
7083 break;
7086 return construct_container (mode, orig_mode, type, 0, cum->nregs,
7087 cum->sse_nregs,
7088 &x86_64_int_parameter_registers [cum->regno],
7089 cum->sse_regno);
7092 static rtx
7093 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
7094 enum machine_mode orig_mode, bool named,
7095 HOST_WIDE_INT bytes)
7097 unsigned int regno;
7099 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
7100 We use value of -2 to specify that current function call is MSABI. */
7101 if (mode == VOIDmode)
7102 return GEN_INT (-2);
7104 /* If we've run out of registers, it goes on the stack. */
7105 if (cum->nregs == 0)
7106 return NULL_RTX;
7108 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
7110 /* Only floating point modes are passed in anything but integer regs. */
7111 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
7113 if (named)
7114 regno = cum->regno + FIRST_SSE_REG;
7115 else
7117 rtx t1, t2;
7119 /* Unnamed floating parameters are passed in both the
7120 SSE and integer registers. */
7121 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
7122 t2 = gen_rtx_REG (mode, regno);
7123 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
7124 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
7125 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
7128 /* Handle aggregated types passed in register. */
7129 if (orig_mode == BLKmode)
7131 if (bytes > 0 && bytes <= 8)
7132 mode = (bytes > 4 ? DImode : SImode);
7133 if (mode == BLKmode)
7134 mode = DImode;
7137 return gen_reg_or_parallel (mode, orig_mode, regno);
7140 /* Return where to put the arguments to a function.
7141 Return zero to push the argument on the stack, or a hard register in which to store the argument.
7143 MODE is the argument's machine mode. TYPE is the data type of the
7144 argument. It is null for libcalls where that information may not be
7145 available. CUM gives information about the preceding args and about
7146 the function being called. NAMED is nonzero if this argument is a
7147 named parameter (otherwise it is an extra parameter matching an
7148 ellipsis). */
7150 static rtx
7151 ix86_function_arg (CUMULATIVE_ARGS *cum, enum machine_mode omode,
7152 const_tree type, bool named)
7154 enum machine_mode mode = omode;
7155 HOST_WIDE_INT bytes, words;
7156 rtx arg;
7158 if (mode == BLKmode)
7159 bytes = int_size_in_bytes (type);
7160 else
7161 bytes = GET_MODE_SIZE (mode);
7162 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7164 /* To simplify the code below, represent vector types with a vector mode
7165 even if MMX/SSE are not active. */
7166 if (type && TREE_CODE (type) == VECTOR_TYPE)
7167 mode = type_natural_mode (type, cum);
7169 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7170 arg = function_arg_ms_64 (cum, mode, omode, named, bytes);
7171 else if (TARGET_64BIT)
7172 arg = function_arg_64 (cum, mode, omode, type, named);
7173 else
7174 arg = function_arg_32 (cum, mode, omode, type, bytes, words);
7176 if (TARGET_VZEROUPPER && function_pass_avx256_p (arg))
7178 /* This argument uses 256bit AVX modes. */
7179 if (cum->caller)
7180 cfun->machine->callee_pass_avx256_p = true;
7181 else
7182 cfun->machine->caller_pass_avx256_p = true;
7185 return arg;
7188 /* A C expression that indicates when an argument must be passed by
7189 reference. If nonzero for an argument, a copy of that argument is
7190 made in memory and a pointer to the argument is passed instead of
7191 the argument itself. The pointer is passed in whatever way is
7192 appropriate for passing a pointer to that type. */
7194 static bool
7195 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
7196 enum machine_mode mode ATTRIBUTE_UNUSED,
7197 const_tree type, bool named ATTRIBUTE_UNUSED)
7199 /* See Windows x64 Software Convention. */
7200 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7202 int msize = (int) GET_MODE_SIZE (mode);
7203 if (type)
7205 /* Arrays are passed by reference. */
7206 if (TREE_CODE (type) == ARRAY_TYPE)
7207 return true;
7209 if (AGGREGATE_TYPE_P (type))
7211 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
7212 are passed by reference. */
7213 msize = int_size_in_bytes (type);
7217 /* __m128 is passed by reference. */
7218 switch (msize) {
7219 case 1: case 2: case 4: case 8:
7220 break;
7221 default:
7222 return true;
7225 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
7226 return 1;
7228 return 0;
7231 /* Return true when TYPE should be 128bit aligned for 32bit argument
7232 passing ABI. XXX: This function is obsolete and is only used for
7233 checking psABI compatibility with previous versions of GCC. */
7235 static bool
7236 ix86_compat_aligned_value_p (const_tree type)
7238 enum machine_mode mode = TYPE_MODE (type);
7239 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
7240 || mode == TDmode
7241 || mode == TFmode
7242 || mode == TCmode)
7243 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
7244 return true;
7245 if (TYPE_ALIGN (type) < 128)
7246 return false;
7248 if (AGGREGATE_TYPE_P (type))
7250 /* Walk the aggregates recursively. */
7251 switch (TREE_CODE (type))
7253 case RECORD_TYPE:
7254 case UNION_TYPE:
7255 case QUAL_UNION_TYPE:
7257 tree field;
7259 /* Walk all the structure fields. */
7260 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
7262 if (TREE_CODE (field) == FIELD_DECL
7263 && ix86_compat_aligned_value_p (TREE_TYPE (field)))
7264 return true;
7266 break;
7269 case ARRAY_TYPE:
7270 /* Just for use if some languages passes arrays by value. */
7271 if (ix86_compat_aligned_value_p (TREE_TYPE (type)))
7272 return true;
7273 break;
7275 default:
7276 gcc_unreachable ();
7279 return false;
7282 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
7283 XXX: This function is obsolete and is only used for checking psABI
7284 compatibility with previous versions of GCC. */
7286 static unsigned int
7287 ix86_compat_function_arg_boundary (enum machine_mode mode,
7288 const_tree type, unsigned int align)
7290 /* In 32bit, only _Decimal128 and __float128 are aligned to their
7291 natural boundaries. */
7292 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
7294 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
7295 make an exception for SSE modes since these require 128bit
7296 alignment.
7298 The handling here differs from field_alignment. ICC aligns MMX
7299 arguments to 4 byte boundaries, while structure fields are aligned
7300 to 8 byte boundaries. */
7301 if (!type)
7303 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
7304 align = PARM_BOUNDARY;
7306 else
7308 if (!ix86_compat_aligned_value_p (type))
7309 align = PARM_BOUNDARY;
7312 if (align > BIGGEST_ALIGNMENT)
7313 align = BIGGEST_ALIGNMENT;
7314 return align;
7317 /* Return true when TYPE should be 128bit aligned for 32bit argument
7318 passing ABI. */
7320 static bool
7321 ix86_contains_aligned_value_p (const_tree type)
7323 enum machine_mode mode = TYPE_MODE (type);
7325 if (mode == XFmode || mode == XCmode)
7326 return false;
7328 if (TYPE_ALIGN (type) < 128)
7329 return false;
7331 if (AGGREGATE_TYPE_P (type))
7333 /* Walk the aggregates recursively. */
7334 switch (TREE_CODE (type))
7336 case RECORD_TYPE:
7337 case UNION_TYPE:
7338 case QUAL_UNION_TYPE:
7340 tree field;
7342 /* Walk all the structure fields. */
7343 for (field = TYPE_FIELDS (type);
7344 field;
7345 field = DECL_CHAIN (field))
7347 if (TREE_CODE (field) == FIELD_DECL
7348 && ix86_contains_aligned_value_p (TREE_TYPE (field)))
7349 return true;
7351 break;
7354 case ARRAY_TYPE:
7355 /* Just for use if some languages passes arrays by value. */
7356 if (ix86_contains_aligned_value_p (TREE_TYPE (type)))
7357 return true;
7358 break;
7360 default:
7361 gcc_unreachable ();
7364 else
7365 return TYPE_ALIGN (type) >= 128;
7367 return false;
7370 /* Gives the alignment boundary, in bits, of an argument with the
7371 specified mode and type. */
7373 static unsigned int
7374 ix86_function_arg_boundary (enum machine_mode mode, const_tree type)
7376 unsigned int align;
7377 if (type)
7379 /* Since the main variant type is used for call, we convert it to
7380 the main variant type. */
7381 type = TYPE_MAIN_VARIANT (type);
7382 align = TYPE_ALIGN (type);
7384 else
7385 align = GET_MODE_ALIGNMENT (mode);
7386 if (align < PARM_BOUNDARY)
7387 align = PARM_BOUNDARY;
7388 else
7390 static bool warned;
7391 unsigned int saved_align = align;
7393 if (!TARGET_64BIT)
7395 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
7396 if (!type)
7398 if (mode == XFmode || mode == XCmode)
7399 align = PARM_BOUNDARY;
7401 else if (!ix86_contains_aligned_value_p (type))
7402 align = PARM_BOUNDARY;
7404 if (align < 128)
7405 align = PARM_BOUNDARY;
7408 if (warn_psabi
7409 && !warned
7410 && align != ix86_compat_function_arg_boundary (mode, type,
7411 saved_align))
7413 warned = true;
7414 inform (input_location,
7415 "The ABI for passing parameters with %d-byte"
7416 " alignment has changed in GCC 4.6",
7417 align / BITS_PER_UNIT);
7421 return align;
7424 /* Return true if N is a possible register number of function value. */
7426 static bool
7427 ix86_function_value_regno_p (const unsigned int regno)
7429 switch (regno)
7431 case 0:
7432 return true;
7434 case FIRST_FLOAT_REG:
7435 /* TODO: The function should depend on current function ABI but
7436 builtins.c would need updating then. Therefore we use the
7437 default ABI. */
7438 if (TARGET_64BIT && ix86_abi == MS_ABI)
7439 return false;
7440 return TARGET_FLOAT_RETURNS_IN_80387;
7442 case FIRST_SSE_REG:
7443 return TARGET_SSE;
7445 case FIRST_MMX_REG:
7446 if (TARGET_MACHO || TARGET_64BIT)
7447 return false;
7448 return TARGET_MMX;
7451 return false;
7454 /* Define how to find the value returned by a function.
7455 VALTYPE is the data type of the value (as a tree).
7456 If the precise function being called is known, FUNC is its FUNCTION_DECL;
7457 otherwise, FUNC is 0. */
7459 static rtx
7460 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
7461 const_tree fntype, const_tree fn)
7463 unsigned int regno;
7465 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
7466 we normally prevent this case when mmx is not available. However
7467 some ABIs may require the result to be returned like DImode. */
7468 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
7469 regno = TARGET_MMX ? FIRST_MMX_REG : 0;
7471 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
7472 we prevent this case when sse is not available. However some ABIs
7473 may require the result to be returned like integer TImode. */
7474 else if (mode == TImode
7475 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
7476 regno = TARGET_SSE ? FIRST_SSE_REG : 0;
7478 /* 32-byte vector modes in %ymm0. */
7479 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
7480 regno = TARGET_AVX ? FIRST_SSE_REG : 0;
7482 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
7483 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
7484 regno = FIRST_FLOAT_REG;
7485 else
7486 /* Most things go in %eax. */
7487 regno = AX_REG;
7489 /* Override FP return register with %xmm0 for local functions when
7490 SSE math is enabled or for functions with sseregparm attribute. */
7491 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
7493 int sse_level = ix86_function_sseregparm (fntype, fn, false);
7494 if ((sse_level >= 1 && mode == SFmode)
7495 || (sse_level == 2 && mode == DFmode))
7496 regno = FIRST_SSE_REG;
7499 /* OImode shouldn't be used directly. */
7500 gcc_assert (mode != OImode);
7502 return gen_rtx_REG (orig_mode, regno);
7505 static rtx
7506 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
7507 const_tree valtype)
7509 rtx ret;
7511 /* Handle libcalls, which don't provide a type node. */
7512 if (valtype == NULL)
7514 switch (mode)
7516 case SFmode:
7517 case SCmode:
7518 case DFmode:
7519 case DCmode:
7520 case TFmode:
7521 case SDmode:
7522 case DDmode:
7523 case TDmode:
7524 return gen_rtx_REG (mode, FIRST_SSE_REG);
7525 case XFmode:
7526 case XCmode:
7527 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
7528 case TCmode:
7529 return NULL;
7530 default:
7531 return gen_rtx_REG (mode, AX_REG);
7535 ret = construct_container (mode, orig_mode, valtype, 1,
7536 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
7537 x86_64_int_return_registers, 0);
7539 /* For zero sized structures, construct_container returns NULL, but we
7540 need to keep rest of compiler happy by returning meaningful value. */
7541 if (!ret)
7542 ret = gen_rtx_REG (orig_mode, AX_REG);
7544 return ret;
7547 static rtx
7548 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode)
7550 unsigned int regno = AX_REG;
7552 if (TARGET_SSE)
7554 switch (GET_MODE_SIZE (mode))
7556 case 16:
7557 if((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
7558 && !COMPLEX_MODE_P (mode))
7559 regno = FIRST_SSE_REG;
7560 break;
7561 case 8:
7562 case 4:
7563 if (mode == SFmode || mode == DFmode)
7564 regno = FIRST_SSE_REG;
7565 break;
7566 default:
7567 break;
7570 return gen_rtx_REG (orig_mode, regno);
7573 static rtx
7574 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
7575 enum machine_mode orig_mode, enum machine_mode mode)
7577 const_tree fn, fntype;
7579 fn = NULL_TREE;
7580 if (fntype_or_decl && DECL_P (fntype_or_decl))
7581 fn = fntype_or_decl;
7582 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
7584 if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
7585 return function_value_ms_64 (orig_mode, mode);
7586 else if (TARGET_64BIT)
7587 return function_value_64 (orig_mode, mode, valtype);
7588 else
7589 return function_value_32 (orig_mode, mode, fntype, fn);
7592 static rtx
7593 ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
7594 bool outgoing ATTRIBUTE_UNUSED)
7596 enum machine_mode mode, orig_mode;
7598 orig_mode = TYPE_MODE (valtype);
7599 mode = type_natural_mode (valtype, NULL);
7600 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
7604 ix86_libcall_value (enum machine_mode mode)
7606 return ix86_function_value_1 (NULL, NULL, mode, mode);
7609 /* Return true iff type is returned in memory. */
7611 static bool ATTRIBUTE_UNUSED
7612 return_in_memory_32 (const_tree type, enum machine_mode mode)
7614 HOST_WIDE_INT size;
7616 if (mode == BLKmode)
7617 return true;
7619 size = int_size_in_bytes (type);
7621 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
7622 return false;
7624 if (VECTOR_MODE_P (mode) || mode == TImode)
7626 /* User-created vectors small enough to fit in EAX. */
7627 if (size < 8)
7628 return false;
7630 /* MMX/3dNow values are returned in MM0,
7631 except when it doesn't exits or the ABI prescribes otherwise. */
7632 if (size == 8)
7633 return !TARGET_MMX || TARGET_VECT8_RETURNS;
7635 /* SSE values are returned in XMM0, except when it doesn't exist. */
7636 if (size == 16)
7637 return !TARGET_SSE;
7639 /* AVX values are returned in YMM0, except when it doesn't exist. */
7640 if (size == 32)
7641 return !TARGET_AVX;
7644 if (mode == XFmode)
7645 return false;
7647 if (size > 12)
7648 return true;
7650 /* OImode shouldn't be used directly. */
7651 gcc_assert (mode != OImode);
7653 return false;
7656 static bool ATTRIBUTE_UNUSED
7657 return_in_memory_64 (const_tree type, enum machine_mode mode)
7659 int needed_intregs, needed_sseregs;
7660 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
7663 static bool ATTRIBUTE_UNUSED
7664 return_in_memory_ms_64 (const_tree type, enum machine_mode mode)
7666 HOST_WIDE_INT size = int_size_in_bytes (type);
7668 /* __m128 is returned in xmm0. */
7669 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
7670 && !COMPLEX_MODE_P (mode) && (GET_MODE_SIZE (mode) == 16 || size == 16))
7671 return false;
7673 /* Otherwise, the size must be exactly in [1248]. */
7674 return size != 1 && size != 2 && size != 4 && size != 8;
7677 static bool
7678 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
7680 #ifdef SUBTARGET_RETURN_IN_MEMORY
7681 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
7682 #else
7683 const enum machine_mode mode = type_natural_mode (type, NULL);
7685 if (TARGET_64BIT)
7687 if (ix86_function_type_abi (fntype) == MS_ABI)
7688 return return_in_memory_ms_64 (type, mode);
7689 else
7690 return return_in_memory_64 (type, mode);
7692 else
7693 return return_in_memory_32 (type, mode);
7694 #endif
7697 /* When returning SSE vector types, we have a choice of either
7698 (1) being abi incompatible with a -march switch, or
7699 (2) generating an error.
7700 Given no good solution, I think the safest thing is one warning.
7701 The user won't be able to use -Werror, but....
7703 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
7704 called in response to actually generating a caller or callee that
7705 uses such a type. As opposed to TARGET_RETURN_IN_MEMORY, which is called
7706 via aggregate_value_p for general type probing from tree-ssa. */
7708 static rtx
7709 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
7711 static bool warnedsse, warnedmmx;
7713 if (!TARGET_64BIT && type)
7715 /* Look at the return type of the function, not the function type. */
7716 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
7718 if (!TARGET_SSE && !warnedsse)
7720 if (mode == TImode
7721 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
7723 warnedsse = true;
7724 warning (0, "SSE vector return without SSE enabled "
7725 "changes the ABI");
7729 if (!TARGET_MMX && !warnedmmx)
7731 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
7733 warnedmmx = true;
7734 warning (0, "MMX vector return without MMX enabled "
7735 "changes the ABI");
7740 return NULL;
7744 /* Create the va_list data type. */
7746 /* Returns the calling convention specific va_list date type.
7747 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
7749 static tree
7750 ix86_build_builtin_va_list_abi (enum calling_abi abi)
7752 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
7754 /* For i386 we use plain pointer to argument area. */
7755 if (!TARGET_64BIT || abi == MS_ABI)
7756 return build_pointer_type (char_type_node);
7758 record = lang_hooks.types.make_type (RECORD_TYPE);
7759 type_decl = build_decl (BUILTINS_LOCATION,
7760 TYPE_DECL, get_identifier ("__va_list_tag"), record);
7762 f_gpr = build_decl (BUILTINS_LOCATION,
7763 FIELD_DECL, get_identifier ("gp_offset"),
7764 unsigned_type_node);
7765 f_fpr = build_decl (BUILTINS_LOCATION,
7766 FIELD_DECL, get_identifier ("fp_offset"),
7767 unsigned_type_node);
7768 f_ovf = build_decl (BUILTINS_LOCATION,
7769 FIELD_DECL, get_identifier ("overflow_arg_area"),
7770 ptr_type_node);
7771 f_sav = build_decl (BUILTINS_LOCATION,
7772 FIELD_DECL, get_identifier ("reg_save_area"),
7773 ptr_type_node);
7775 va_list_gpr_counter_field = f_gpr;
7776 va_list_fpr_counter_field = f_fpr;
7778 DECL_FIELD_CONTEXT (f_gpr) = record;
7779 DECL_FIELD_CONTEXT (f_fpr) = record;
7780 DECL_FIELD_CONTEXT (f_ovf) = record;
7781 DECL_FIELD_CONTEXT (f_sav) = record;
7783 TYPE_STUB_DECL (record) = type_decl;
7784 TYPE_NAME (record) = type_decl;
7785 TYPE_FIELDS (record) = f_gpr;
7786 DECL_CHAIN (f_gpr) = f_fpr;
7787 DECL_CHAIN (f_fpr) = f_ovf;
7788 DECL_CHAIN (f_ovf) = f_sav;
7790 layout_type (record);
7792 /* The correct type is an array type of one element. */
7793 return build_array_type (record, build_index_type (size_zero_node));
7796 /* Setup the builtin va_list data type and for 64-bit the additional
7797 calling convention specific va_list data types. */
7799 static tree
7800 ix86_build_builtin_va_list (void)
7802 tree ret = ix86_build_builtin_va_list_abi (ix86_abi);
7804 /* Initialize abi specific va_list builtin types. */
7805 if (TARGET_64BIT)
7807 tree t;
7808 if (ix86_abi == MS_ABI)
7810 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
7811 if (TREE_CODE (t) != RECORD_TYPE)
7812 t = build_variant_type_copy (t);
7813 sysv_va_list_type_node = t;
7815 else
7817 t = ret;
7818 if (TREE_CODE (t) != RECORD_TYPE)
7819 t = build_variant_type_copy (t);
7820 sysv_va_list_type_node = t;
7822 if (ix86_abi != MS_ABI)
7824 t = ix86_build_builtin_va_list_abi (MS_ABI);
7825 if (TREE_CODE (t) != RECORD_TYPE)
7826 t = build_variant_type_copy (t);
7827 ms_va_list_type_node = t;
7829 else
7831 t = ret;
7832 if (TREE_CODE (t) != RECORD_TYPE)
7833 t = build_variant_type_copy (t);
7834 ms_va_list_type_node = t;
7838 return ret;
7841 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
7843 static void
7844 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
7846 rtx save_area, mem;
7847 alias_set_type set;
7848 int i, max;
7850 /* GPR size of varargs save area. */
7851 if (cfun->va_list_gpr_size)
7852 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
7853 else
7854 ix86_varargs_gpr_size = 0;
7856 /* FPR size of varargs save area. We don't need it if we don't pass
7857 anything in SSE registers. */
7858 if (TARGET_SSE && cfun->va_list_fpr_size)
7859 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
7860 else
7861 ix86_varargs_fpr_size = 0;
7863 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
7864 return;
7866 save_area = frame_pointer_rtx;
7867 set = get_varargs_alias_set ();
7869 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
7870 if (max > X86_64_REGPARM_MAX)
7871 max = X86_64_REGPARM_MAX;
7873 for (i = cum->regno; i < max; i++)
7875 mem = gen_rtx_MEM (Pmode,
7876 plus_constant (save_area, i * UNITS_PER_WORD));
7877 MEM_NOTRAP_P (mem) = 1;
7878 set_mem_alias_set (mem, set);
7879 emit_move_insn (mem, gen_rtx_REG (Pmode,
7880 x86_64_int_parameter_registers[i]));
7883 if (ix86_varargs_fpr_size)
7885 enum machine_mode smode;
7886 rtx label, test;
7888 /* Now emit code to save SSE registers. The AX parameter contains number
7889 of SSE parameter registers used to call this function, though all we
7890 actually check here is the zero/non-zero status. */
7892 label = gen_label_rtx ();
7893 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
7894 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
7895 label));
7897 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
7898 we used movdqa (i.e. TImode) instead? Perhaps even better would
7899 be if we could determine the real mode of the data, via a hook
7900 into pass_stdarg. Ignore all that for now. */
7901 smode = V4SFmode;
7902 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
7903 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
7905 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
7906 if (max > X86_64_SSE_REGPARM_MAX)
7907 max = X86_64_SSE_REGPARM_MAX;
7909 for (i = cum->sse_regno; i < max; ++i)
7911 mem = plus_constant (save_area, i * 16 + ix86_varargs_gpr_size);
7912 mem = gen_rtx_MEM (smode, mem);
7913 MEM_NOTRAP_P (mem) = 1;
7914 set_mem_alias_set (mem, set);
7915 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
7917 emit_move_insn (mem, gen_rtx_REG (smode, SSE_REGNO (i)));
7920 emit_label (label);
7924 static void
7925 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
7927 alias_set_type set = get_varargs_alias_set ();
7928 int i;
7930 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
7932 rtx reg, mem;
7934 mem = gen_rtx_MEM (Pmode,
7935 plus_constant (virtual_incoming_args_rtx,
7936 i * UNITS_PER_WORD));
7937 MEM_NOTRAP_P (mem) = 1;
7938 set_mem_alias_set (mem, set);
7940 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
7941 emit_move_insn (mem, reg);
7945 static void
7946 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7947 tree type, int *pretend_size ATTRIBUTE_UNUSED,
7948 int no_rtl)
7950 CUMULATIVE_ARGS next_cum;
7951 tree fntype;
7953 /* This argument doesn't appear to be used anymore. Which is good,
7954 because the old code here didn't suppress rtl generation. */
7955 gcc_assert (!no_rtl);
7957 if (!TARGET_64BIT)
7958 return;
7960 fntype = TREE_TYPE (current_function_decl);
7962 /* For varargs, we do not want to skip the dummy va_dcl argument.
7963 For stdargs, we do want to skip the last named argument. */
7964 next_cum = *cum;
7965 if (stdarg_p (fntype))
7966 ix86_function_arg_advance (&next_cum, mode, type, true);
7968 if (cum->call_abi == MS_ABI)
7969 setup_incoming_varargs_ms_64 (&next_cum);
7970 else
7971 setup_incoming_varargs_64 (&next_cum);
7974 /* Checks if TYPE is of kind va_list char *. */
7976 static bool
7977 is_va_list_char_pointer (tree type)
7979 tree canonic;
7981 /* For 32-bit it is always true. */
7982 if (!TARGET_64BIT)
7983 return true;
7984 canonic = ix86_canonical_va_list_type (type);
7985 return (canonic == ms_va_list_type_node
7986 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
7989 /* Implement va_start. */
7991 static void
7992 ix86_va_start (tree valist, rtx nextarg)
7994 HOST_WIDE_INT words, n_gpr, n_fpr;
7995 tree f_gpr, f_fpr, f_ovf, f_sav;
7996 tree gpr, fpr, ovf, sav, t;
7997 tree type;
7998 rtx ovf_rtx;
8000 if (flag_split_stack
8001 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8003 unsigned int scratch_regno;
8005 /* When we are splitting the stack, we can't refer to the stack
8006 arguments using internal_arg_pointer, because they may be on
8007 the old stack. The split stack prologue will arrange to
8008 leave a pointer to the old stack arguments in a scratch
8009 register, which we here copy to a pseudo-register. The split
8010 stack prologue can't set the pseudo-register directly because
8011 it (the prologue) runs before any registers have been saved. */
8013 scratch_regno = split_stack_prologue_scratch_regno ();
8014 if (scratch_regno != INVALID_REGNUM)
8016 rtx reg, seq;
8018 reg = gen_reg_rtx (Pmode);
8019 cfun->machine->split_stack_varargs_pointer = reg;
8021 start_sequence ();
8022 emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
8023 seq = get_insns ();
8024 end_sequence ();
8026 push_topmost_sequence ();
8027 emit_insn_after (seq, entry_of_function ());
8028 pop_topmost_sequence ();
8032 /* Only 64bit target needs something special. */
8033 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
8035 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8036 std_expand_builtin_va_start (valist, nextarg);
8037 else
8039 rtx va_r, next;
8041 va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
8042 next = expand_binop (ptr_mode, add_optab,
8043 cfun->machine->split_stack_varargs_pointer,
8044 crtl->args.arg_offset_rtx,
8045 NULL_RTX, 0, OPTAB_LIB_WIDEN);
8046 convert_move (va_r, next, 0);
8048 return;
8051 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
8052 f_fpr = DECL_CHAIN (f_gpr);
8053 f_ovf = DECL_CHAIN (f_fpr);
8054 f_sav = DECL_CHAIN (f_ovf);
8056 valist = build_simple_mem_ref (valist);
8057 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
8058 /* The following should be folded into the MEM_REF offset. */
8059 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
8060 f_gpr, NULL_TREE);
8061 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
8062 f_fpr, NULL_TREE);
8063 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
8064 f_ovf, NULL_TREE);
8065 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
8066 f_sav, NULL_TREE);
8068 /* Count number of gp and fp argument registers used. */
8069 words = crtl->args.info.words;
8070 n_gpr = crtl->args.info.regno;
8071 n_fpr = crtl->args.info.sse_regno;
8073 if (cfun->va_list_gpr_size)
8075 type = TREE_TYPE (gpr);
8076 t = build2 (MODIFY_EXPR, type,
8077 gpr, build_int_cst (type, n_gpr * 8));
8078 TREE_SIDE_EFFECTS (t) = 1;
8079 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8082 if (TARGET_SSE && cfun->va_list_fpr_size)
8084 type = TREE_TYPE (fpr);
8085 t = build2 (MODIFY_EXPR, type, fpr,
8086 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
8087 TREE_SIDE_EFFECTS (t) = 1;
8088 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8091 /* Find the overflow area. */
8092 type = TREE_TYPE (ovf);
8093 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8094 ovf_rtx = crtl->args.internal_arg_pointer;
8095 else
8096 ovf_rtx = cfun->machine->split_stack_varargs_pointer;
8097 t = make_tree (type, ovf_rtx);
8098 if (words != 0)
8099 t = build2 (POINTER_PLUS_EXPR, type, t,
8100 size_int (words * UNITS_PER_WORD));
8101 t = build2 (MODIFY_EXPR, type, ovf, t);
8102 TREE_SIDE_EFFECTS (t) = 1;
8103 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8105 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
8107 /* Find the register save area.
8108 Prologue of the function save it right above stack frame. */
8109 type = TREE_TYPE (sav);
8110 t = make_tree (type, frame_pointer_rtx);
8111 if (!ix86_varargs_gpr_size)
8112 t = build2 (POINTER_PLUS_EXPR, type, t,
8113 size_int (-8 * X86_64_REGPARM_MAX));
8114 t = build2 (MODIFY_EXPR, type, sav, t);
8115 TREE_SIDE_EFFECTS (t) = 1;
8116 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8120 /* Implement va_arg. */
8122 static tree
8123 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
8124 gimple_seq *post_p)
8126 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
8127 tree f_gpr, f_fpr, f_ovf, f_sav;
8128 tree gpr, fpr, ovf, sav, t;
8129 int size, rsize;
8130 tree lab_false, lab_over = NULL_TREE;
8131 tree addr, t2;
8132 rtx container;
8133 int indirect_p = 0;
8134 tree ptrtype;
8135 enum machine_mode nat_mode;
8136 unsigned int arg_boundary;
8138 /* Only 64bit target needs something special. */
8139 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
8140 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
8142 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
8143 f_fpr = DECL_CHAIN (f_gpr);
8144 f_ovf = DECL_CHAIN (f_fpr);
8145 f_sav = DECL_CHAIN (f_ovf);
8147 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
8148 build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
8149 valist = build_va_arg_indirect_ref (valist);
8150 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
8151 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
8152 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
8154 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
8155 if (indirect_p)
8156 type = build_pointer_type (type);
8157 size = int_size_in_bytes (type);
8158 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
8160 nat_mode = type_natural_mode (type, NULL);
8161 switch (nat_mode)
8163 case V8SFmode:
8164 case V8SImode:
8165 case V32QImode:
8166 case V16HImode:
8167 case V4DFmode:
8168 case V4DImode:
8169 /* Unnamed 256bit vector mode parameters are passed on stack. */
8170 if (!TARGET_64BIT_MS_ABI)
8172 container = NULL;
8173 break;
8176 default:
8177 container = construct_container (nat_mode, TYPE_MODE (type),
8178 type, 0, X86_64_REGPARM_MAX,
8179 X86_64_SSE_REGPARM_MAX, intreg,
8181 break;
8184 /* Pull the value out of the saved registers. */
8186 addr = create_tmp_var (ptr_type_node, "addr");
8188 if (container)
8190 int needed_intregs, needed_sseregs;
8191 bool need_temp;
8192 tree int_addr, sse_addr;
8194 lab_false = create_artificial_label (UNKNOWN_LOCATION);
8195 lab_over = create_artificial_label (UNKNOWN_LOCATION);
8197 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
8199 need_temp = (!REG_P (container)
8200 && ((needed_intregs && TYPE_ALIGN (type) > 64)
8201 || TYPE_ALIGN (type) > 128));
8203 /* In case we are passing structure, verify that it is consecutive block
8204 on the register save area. If not we need to do moves. */
8205 if (!need_temp && !REG_P (container))
8207 /* Verify that all registers are strictly consecutive */
8208 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
8210 int i;
8212 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
8214 rtx slot = XVECEXP (container, 0, i);
8215 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
8216 || INTVAL (XEXP (slot, 1)) != i * 16)
8217 need_temp = 1;
8220 else
8222 int i;
8224 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
8226 rtx slot = XVECEXP (container, 0, i);
8227 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
8228 || INTVAL (XEXP (slot, 1)) != i * 8)
8229 need_temp = 1;
8233 if (!need_temp)
8235 int_addr = addr;
8236 sse_addr = addr;
8238 else
8240 int_addr = create_tmp_var (ptr_type_node, "int_addr");
8241 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
8244 /* First ensure that we fit completely in registers. */
8245 if (needed_intregs)
8247 t = build_int_cst (TREE_TYPE (gpr),
8248 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
8249 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
8250 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
8251 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
8252 gimplify_and_add (t, pre_p);
8254 if (needed_sseregs)
8256 t = build_int_cst (TREE_TYPE (fpr),
8257 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
8258 + X86_64_REGPARM_MAX * 8);
8259 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
8260 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
8261 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
8262 gimplify_and_add (t, pre_p);
8265 /* Compute index to start of area used for integer regs. */
8266 if (needed_intregs)
8268 /* int_addr = gpr + sav; */
8269 t = fold_convert (sizetype, gpr);
8270 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
8271 gimplify_assign (int_addr, t, pre_p);
8273 if (needed_sseregs)
8275 /* sse_addr = fpr + sav; */
8276 t = fold_convert (sizetype, fpr);
8277 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
8278 gimplify_assign (sse_addr, t, pre_p);
8280 if (need_temp)
8282 int i, prev_size = 0;
8283 tree temp = create_tmp_var (type, "va_arg_tmp");
8285 /* addr = &temp; */
8286 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
8287 gimplify_assign (addr, t, pre_p);
8289 for (i = 0; i < XVECLEN (container, 0); i++)
8291 rtx slot = XVECEXP (container, 0, i);
8292 rtx reg = XEXP (slot, 0);
8293 enum machine_mode mode = GET_MODE (reg);
8294 tree piece_type;
8295 tree addr_type;
8296 tree daddr_type;
8297 tree src_addr, src;
8298 int src_offset;
8299 tree dest_addr, dest;
8300 int cur_size = GET_MODE_SIZE (mode);
8302 gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
8303 prev_size = INTVAL (XEXP (slot, 1));
8304 if (prev_size + cur_size > size)
8306 cur_size = size - prev_size;
8307 mode = mode_for_size (cur_size * BITS_PER_UNIT, MODE_INT, 1);
8308 if (mode == BLKmode)
8309 mode = QImode;
8311 piece_type = lang_hooks.types.type_for_mode (mode, 1);
8312 if (mode == GET_MODE (reg))
8313 addr_type = build_pointer_type (piece_type);
8314 else
8315 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
8316 true);
8317 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
8318 true);
8320 if (SSE_REGNO_P (REGNO (reg)))
8322 src_addr = sse_addr;
8323 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
8325 else
8327 src_addr = int_addr;
8328 src_offset = REGNO (reg) * 8;
8330 src_addr = fold_convert (addr_type, src_addr);
8331 src_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, src_addr,
8332 size_int (src_offset));
8334 dest_addr = fold_convert (daddr_type, addr);
8335 dest_addr = fold_build2 (POINTER_PLUS_EXPR, daddr_type, dest_addr,
8336 size_int (prev_size));
8337 if (cur_size == GET_MODE_SIZE (mode))
8339 src = build_va_arg_indirect_ref (src_addr);
8340 dest = build_va_arg_indirect_ref (dest_addr);
8342 gimplify_assign (dest, src, pre_p);
8344 else
8346 tree copy
8347 = build_call_expr (implicit_built_in_decls[BUILT_IN_MEMCPY],
8348 3, dest_addr, src_addr,
8349 size_int (cur_size));
8350 gimplify_and_add (copy, pre_p);
8352 prev_size += cur_size;
8356 if (needed_intregs)
8358 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
8359 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
8360 gimplify_assign (gpr, t, pre_p);
8363 if (needed_sseregs)
8365 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
8366 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
8367 gimplify_assign (fpr, t, pre_p);
8370 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
8372 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
8375 /* ... otherwise out of the overflow area. */
8377 /* When we align parameter on stack for caller, if the parameter
8378 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
8379 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
8380 here with caller. */
8381 arg_boundary = ix86_function_arg_boundary (VOIDmode, type);
8382 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
8383 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
8385 /* Care for on-stack alignment if needed. */
8386 if (arg_boundary <= 64 || size == 0)
8387 t = ovf;
8388 else
8390 HOST_WIDE_INT align = arg_boundary / 8;
8391 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (ovf), ovf,
8392 size_int (align - 1));
8393 t = fold_convert (sizetype, t);
8394 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
8395 size_int (-align));
8396 t = fold_convert (TREE_TYPE (ovf), t);
8399 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
8400 gimplify_assign (addr, t, pre_p);
8402 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (t), t,
8403 size_int (rsize * UNITS_PER_WORD));
8404 gimplify_assign (unshare_expr (ovf), t, pre_p);
8406 if (container)
8407 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
8409 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
8410 addr = fold_convert (ptrtype, addr);
8412 if (indirect_p)
8413 addr = build_va_arg_indirect_ref (addr);
8414 return build_va_arg_indirect_ref (addr);
8417 /* Return true if OPNUM's MEM should be matched
8418 in movabs* patterns. */
8420 bool
8421 ix86_check_movabs (rtx insn, int opnum)
8423 rtx set, mem;
8425 set = PATTERN (insn);
8426 if (GET_CODE (set) == PARALLEL)
8427 set = XVECEXP (set, 0, 0);
8428 gcc_assert (GET_CODE (set) == SET);
8429 mem = XEXP (set, opnum);
8430 while (GET_CODE (mem) == SUBREG)
8431 mem = SUBREG_REG (mem);
8432 gcc_assert (MEM_P (mem));
8433 return volatile_ok || !MEM_VOLATILE_P (mem);
8436 /* Initialize the table of extra 80387 mathematical constants. */
8438 static void
8439 init_ext_80387_constants (void)
8441 static const char * cst[5] =
8443 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
8444 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
8445 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
8446 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
8447 "3.1415926535897932385128089594061862044", /* 4: fldpi */
8449 int i;
8451 for (i = 0; i < 5; i++)
8453 real_from_string (&ext_80387_constants_table[i], cst[i]);
8454 /* Ensure each constant is rounded to XFmode precision. */
8455 real_convert (&ext_80387_constants_table[i],
8456 XFmode, &ext_80387_constants_table[i]);
8459 ext_80387_constants_init = 1;
8462 /* Return non-zero if the constant is something that
8463 can be loaded with a special instruction. */
8466 standard_80387_constant_p (rtx x)
8468 enum machine_mode mode = GET_MODE (x);
8470 REAL_VALUE_TYPE r;
8472 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
8473 return -1;
8475 if (x == CONST0_RTX (mode))
8476 return 1;
8477 if (x == CONST1_RTX (mode))
8478 return 2;
8480 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8482 /* For XFmode constants, try to find a special 80387 instruction when
8483 optimizing for size or on those CPUs that benefit from them. */
8484 if (mode == XFmode
8485 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
8487 int i;
8489 if (! ext_80387_constants_init)
8490 init_ext_80387_constants ();
8492 for (i = 0; i < 5; i++)
8493 if (real_identical (&r, &ext_80387_constants_table[i]))
8494 return i + 3;
8497 /* Load of the constant -0.0 or -1.0 will be split as
8498 fldz;fchs or fld1;fchs sequence. */
8499 if (real_isnegzero (&r))
8500 return 8;
8501 if (real_identical (&r, &dconstm1))
8502 return 9;
8504 return 0;
8507 /* Return the opcode of the special instruction to be used to load
8508 the constant X. */
8510 const char *
8511 standard_80387_constant_opcode (rtx x)
8513 switch (standard_80387_constant_p (x))
8515 case 1:
8516 return "fldz";
8517 case 2:
8518 return "fld1";
8519 case 3:
8520 return "fldlg2";
8521 case 4:
8522 return "fldln2";
8523 case 5:
8524 return "fldl2e";
8525 case 6:
8526 return "fldl2t";
8527 case 7:
8528 return "fldpi";
8529 case 8:
8530 case 9:
8531 return "#";
8532 default:
8533 gcc_unreachable ();
8537 /* Return the CONST_DOUBLE representing the 80387 constant that is
8538 loaded by the specified special instruction. The argument IDX
8539 matches the return value from standard_80387_constant_p. */
8542 standard_80387_constant_rtx (int idx)
8544 int i;
8546 if (! ext_80387_constants_init)
8547 init_ext_80387_constants ();
8549 switch (idx)
8551 case 3:
8552 case 4:
8553 case 5:
8554 case 6:
8555 case 7:
8556 i = idx - 3;
8557 break;
8559 default:
8560 gcc_unreachable ();
8563 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
8564 XFmode);
8567 /* Return 1 if X is all 0s and 2 if x is all 1s
8568 in supported SSE vector mode. */
8571 standard_sse_constant_p (rtx x)
8573 enum machine_mode mode = GET_MODE (x);
8575 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
8576 return 1;
8577 if (vector_all_ones_operand (x, mode))
8578 switch (mode)
8580 case V16QImode:
8581 case V8HImode:
8582 case V4SImode:
8583 case V2DImode:
8584 if (TARGET_SSE2)
8585 return 2;
8586 default:
8587 break;
8590 return 0;
8593 /* Return the opcode of the special instruction to be used to load
8594 the constant X. */
8596 const char *
8597 standard_sse_constant_opcode (rtx insn, rtx x)
8599 switch (standard_sse_constant_p (x))
8601 case 1:
8602 switch (get_attr_mode (insn))
8604 case MODE_V4SF:
8605 return "%vxorps\t%0, %d0";
8606 case MODE_V2DF:
8607 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
8608 return "%vxorps\t%0, %d0";
8609 else
8610 return "%vxorpd\t%0, %d0";
8611 case MODE_TI:
8612 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
8613 return "%vxorps\t%0, %d0";
8614 else
8615 return "%vpxor\t%0, %d0";
8616 case MODE_V8SF:
8617 return "vxorps\t%x0, %x0, %x0";
8618 case MODE_V4DF:
8619 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
8620 return "vxorps\t%x0, %x0, %x0";
8621 else
8622 return "vxorpd\t%x0, %x0, %x0";
8623 case MODE_OI:
8624 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
8625 return "vxorps\t%x0, %x0, %x0";
8626 else
8627 return "vpxor\t%x0, %x0, %x0";
8628 default:
8629 break;
8631 case 2:
8632 return "%vpcmpeqd\t%0, %d0";
8633 default:
8634 break;
8636 gcc_unreachable ();
8639 /* Returns true if OP contains a symbol reference */
8641 bool
8642 symbolic_reference_mentioned_p (rtx op)
8644 const char *fmt;
8645 int i;
8647 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
8648 return true;
8650 fmt = GET_RTX_FORMAT (GET_CODE (op));
8651 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
8653 if (fmt[i] == 'E')
8655 int j;
8657 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
8658 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
8659 return true;
8662 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
8663 return true;
8666 return false;
8669 /* Return true if it is appropriate to emit `ret' instructions in the
8670 body of a function. Do this only if the epilogue is simple, needing a
8671 couple of insns. Prior to reloading, we can't tell how many registers
8672 must be saved, so return false then. Return false if there is no frame
8673 marker to de-allocate. */
8675 bool
8676 ix86_can_use_return_insn_p (void)
8678 struct ix86_frame frame;
8680 if (! reload_completed || frame_pointer_needed)
8681 return 0;
8683 /* Don't allow more than 32k pop, since that's all we can do
8684 with one instruction. */
8685 if (crtl->args.pops_args && crtl->args.size >= 32768)
8686 return 0;
8688 ix86_compute_frame_layout (&frame);
8689 return (frame.stack_pointer_offset == UNITS_PER_WORD
8690 && (frame.nregs + frame.nsseregs) == 0);
8693 /* Value should be nonzero if functions must have frame pointers.
8694 Zero means the frame pointer need not be set up (and parms may
8695 be accessed via the stack pointer) in functions that seem suitable. */
8697 static bool
8698 ix86_frame_pointer_required (void)
8700 /* If we accessed previous frames, then the generated code expects
8701 to be able to access the saved ebp value in our frame. */
8702 if (cfun->machine->accesses_prev_frame)
8703 return true;
8705 /* Several x86 os'es need a frame pointer for other reasons,
8706 usually pertaining to setjmp. */
8707 if (SUBTARGET_FRAME_POINTER_REQUIRED)
8708 return true;
8710 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
8711 turns off the frame pointer by default. Turn it back on now if
8712 we've not got a leaf function. */
8713 if (TARGET_OMIT_LEAF_FRAME_POINTER
8714 && (!current_function_is_leaf
8715 || ix86_current_function_calls_tls_descriptor))
8716 return true;
8718 if (crtl->profile && !flag_fentry)
8719 return true;
8721 return false;
8724 /* Record that the current function accesses previous call frames. */
8726 void
8727 ix86_setup_frame_addresses (void)
8729 cfun->machine->accesses_prev_frame = 1;
8732 #ifndef USE_HIDDEN_LINKONCE
8733 # if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
8734 # define USE_HIDDEN_LINKONCE 1
8735 # else
8736 # define USE_HIDDEN_LINKONCE 0
8737 # endif
8738 #endif
8740 static int pic_labels_used;
8742 /* Fills in the label name that should be used for a pc thunk for
8743 the given register. */
8745 static void
8746 get_pc_thunk_name (char name[32], unsigned int regno)
8748 gcc_assert (!TARGET_64BIT);
8750 if (USE_HIDDEN_LINKONCE)
8751 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
8752 else
8753 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
8757 /* This function generates code for -fpic that loads %ebx with
8758 the return address of the caller and then returns. */
8760 static void
8761 ix86_code_end (void)
8763 rtx xops[2];
8764 int regno;
8766 #ifdef TARGET_SOLARIS
8767 solaris_code_end ();
8768 #endif
8770 for (regno = AX_REG; regno <= SP_REG; regno++)
8772 char name[32];
8773 tree decl;
8775 if (!(pic_labels_used & (1 << regno)))
8776 continue;
8778 get_pc_thunk_name (name, regno);
8780 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
8781 get_identifier (name),
8782 build_function_type_list (void_type_node, NULL_TREE));
8783 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
8784 NULL_TREE, void_type_node);
8785 TREE_PUBLIC (decl) = 1;
8786 TREE_STATIC (decl) = 1;
8788 #if TARGET_MACHO
8789 if (TARGET_MACHO)
8791 switch_to_section (darwin_sections[text_coal_section]);
8792 fputs ("\t.weak_definition\t", asm_out_file);
8793 assemble_name (asm_out_file, name);
8794 fputs ("\n\t.private_extern\t", asm_out_file);
8795 assemble_name (asm_out_file, name);
8796 putc ('\n', asm_out_file);
8797 ASM_OUTPUT_LABEL (asm_out_file, name);
8798 DECL_WEAK (decl) = 1;
8800 else
8801 #endif
8802 if (USE_HIDDEN_LINKONCE)
8804 DECL_COMDAT_GROUP (decl) = DECL_ASSEMBLER_NAME (decl);
8806 targetm.asm_out.unique_section (decl, 0);
8807 switch_to_section (get_named_section (decl, NULL, 0));
8809 targetm.asm_out.globalize_label (asm_out_file, name);
8810 fputs ("\t.hidden\t", asm_out_file);
8811 assemble_name (asm_out_file, name);
8812 putc ('\n', asm_out_file);
8813 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
8815 else
8817 switch_to_section (text_section);
8818 ASM_OUTPUT_LABEL (asm_out_file, name);
8821 DECL_INITIAL (decl) = make_node (BLOCK);
8822 current_function_decl = decl;
8823 init_function_start (decl);
8824 first_function_block_is_cold = false;
8825 /* Make sure unwind info is emitted for the thunk if needed. */
8826 final_start_function (emit_barrier (), asm_out_file, 1);
8828 /* Pad stack IP move with 4 instructions (two NOPs count
8829 as one instruction). */
8830 if (TARGET_PAD_SHORT_FUNCTION)
8832 int i = 8;
8834 while (i--)
8835 fputs ("\tnop\n", asm_out_file);
8838 xops[0] = gen_rtx_REG (Pmode, regno);
8839 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
8840 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
8841 fputs ("\tret\n", asm_out_file);
8842 final_end_function ();
8843 init_insn_lengths ();
8844 free_after_compilation (cfun);
8845 set_cfun (NULL);
8846 current_function_decl = NULL;
8849 if (flag_split_stack)
8850 file_end_indicate_split_stack ();
8853 /* Emit code for the SET_GOT patterns. */
8855 const char *
8856 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
8858 rtx xops[3];
8860 xops[0] = dest;
8862 if (TARGET_VXWORKS_RTP && flag_pic)
8864 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
8865 xops[2] = gen_rtx_MEM (Pmode,
8866 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
8867 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
8869 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
8870 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
8871 an unadorned address. */
8872 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
8873 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
8874 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
8875 return "";
8878 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
8880 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
8882 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
8884 if (!flag_pic)
8885 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
8886 else
8888 output_asm_insn ("call\t%a2", xops);
8889 #ifdef DWARF2_UNWIND_INFO
8890 /* The call to next label acts as a push. */
8891 if (dwarf2out_do_frame ())
8893 rtx insn;
8894 start_sequence ();
8895 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
8896 gen_rtx_PLUS (Pmode,
8897 stack_pointer_rtx,
8898 GEN_INT (-4))));
8899 RTX_FRAME_RELATED_P (insn) = 1;
8900 dwarf2out_frame_debug (insn, true);
8901 end_sequence ();
8903 #endif
8906 #if TARGET_MACHO
8907 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8908 is what will be referenced by the Mach-O PIC subsystem. */
8909 if (!label)
8910 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
8911 #endif
8913 targetm.asm_out.internal_label (asm_out_file, "L",
8914 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
8916 if (flag_pic)
8918 output_asm_insn ("pop%z0\t%0", xops);
8919 #ifdef DWARF2_UNWIND_INFO
8920 /* The pop is a pop and clobbers dest, but doesn't restore it
8921 for unwind info purposes. */
8922 if (dwarf2out_do_frame ())
8924 rtx insn;
8925 start_sequence ();
8926 insn = emit_insn (gen_rtx_SET (VOIDmode, dest, const0_rtx));
8927 dwarf2out_frame_debug (insn, true);
8928 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
8929 gen_rtx_PLUS (Pmode,
8930 stack_pointer_rtx,
8931 GEN_INT (4))));
8932 RTX_FRAME_RELATED_P (insn) = 1;
8933 dwarf2out_frame_debug (insn, true);
8934 end_sequence ();
8936 #endif
8939 else
8941 char name[32];
8942 get_pc_thunk_name (name, REGNO (dest));
8943 pic_labels_used |= 1 << REGNO (dest);
8945 #ifdef DWARF2_UNWIND_INFO
8946 /* Ensure all queued register saves are flushed before the
8947 call. */
8948 if (dwarf2out_do_frame ())
8949 dwarf2out_flush_queued_reg_saves ();
8950 #endif
8951 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
8952 xops[2] = gen_rtx_MEM (QImode, xops[2]);
8953 output_asm_insn ("call\t%X2", xops);
8954 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8955 is what will be referenced by the Mach-O PIC subsystem. */
8956 #if TARGET_MACHO
8957 if (!label)
8958 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
8959 else
8960 targetm.asm_out.internal_label (asm_out_file, "L",
8961 CODE_LABEL_NUMBER (label));
8962 #endif
8965 if (TARGET_MACHO)
8966 return "";
8968 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
8969 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
8970 else
8971 output_asm_insn ("add%z0\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
8973 return "";
8976 /* Generate an "push" pattern for input ARG. */
8978 static rtx
8979 gen_push (rtx arg)
8981 struct machine_function *m = cfun->machine;
8983 if (m->fs.cfa_reg == stack_pointer_rtx)
8984 m->fs.cfa_offset += UNITS_PER_WORD;
8985 m->fs.sp_offset += UNITS_PER_WORD;
8987 return gen_rtx_SET (VOIDmode,
8988 gen_rtx_MEM (Pmode,
8989 gen_rtx_PRE_DEC (Pmode,
8990 stack_pointer_rtx)),
8991 arg);
8994 /* Generate an "pop" pattern for input ARG. */
8996 static rtx
8997 gen_pop (rtx arg)
8999 return gen_rtx_SET (VOIDmode,
9000 arg,
9001 gen_rtx_MEM (Pmode,
9002 gen_rtx_POST_INC (Pmode,
9003 stack_pointer_rtx)));
9006 /* Return >= 0 if there is an unused call-clobbered register available
9007 for the entire function. */
9009 static unsigned int
9010 ix86_select_alt_pic_regnum (void)
9012 if (current_function_is_leaf
9013 && !crtl->profile
9014 && !ix86_current_function_calls_tls_descriptor)
9016 int i, drap;
9017 /* Can't use the same register for both PIC and DRAP. */
9018 if (crtl->drap_reg)
9019 drap = REGNO (crtl->drap_reg);
9020 else
9021 drap = -1;
9022 for (i = 2; i >= 0; --i)
9023 if (i != drap && !df_regs_ever_live_p (i))
9024 return i;
9027 return INVALID_REGNUM;
9030 /* Return TRUE if we need to save REGNO. */
9032 static bool
9033 ix86_save_reg (unsigned int regno, bool maybe_eh_return)
9035 if (pic_offset_table_rtx
9036 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
9037 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
9038 || crtl->profile
9039 || crtl->calls_eh_return
9040 || crtl->uses_const_pool))
9041 return ix86_select_alt_pic_regnum () == INVALID_REGNUM;
9043 if (crtl->calls_eh_return && maybe_eh_return)
9045 unsigned i;
9046 for (i = 0; ; i++)
9048 unsigned test = EH_RETURN_DATA_REGNO (i);
9049 if (test == INVALID_REGNUM)
9050 break;
9051 if (test == regno)
9052 return true;
9056 if (crtl->drap_reg && regno == REGNO (crtl->drap_reg))
9057 return true;
9059 return (df_regs_ever_live_p (regno)
9060 && !call_used_regs[regno]
9061 && !fixed_regs[regno]
9062 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
9065 /* Return number of saved general prupose registers. */
9067 static int
9068 ix86_nsaved_regs (void)
9070 int nregs = 0;
9071 int regno;
9073 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9074 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9075 nregs ++;
9076 return nregs;
9079 /* Return number of saved SSE registrers. */
9081 static int
9082 ix86_nsaved_sseregs (void)
9084 int nregs = 0;
9085 int regno;
9087 if (!TARGET_64BIT_MS_ABI)
9088 return 0;
9089 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9090 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9091 nregs ++;
9092 return nregs;
9095 /* Given FROM and TO register numbers, say whether this elimination is
9096 allowed. If stack alignment is needed, we can only replace argument
9097 pointer with hard frame pointer, or replace frame pointer with stack
9098 pointer. Otherwise, frame pointer elimination is automatically
9099 handled and all other eliminations are valid. */
9101 static bool
9102 ix86_can_eliminate (const int from, const int to)
9104 if (stack_realign_fp)
9105 return ((from == ARG_POINTER_REGNUM
9106 && to == HARD_FRAME_POINTER_REGNUM)
9107 || (from == FRAME_POINTER_REGNUM
9108 && to == STACK_POINTER_REGNUM));
9109 else
9110 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
9113 /* Return the offset between two registers, one to be eliminated, and the other
9114 its replacement, at the start of a routine. */
9116 HOST_WIDE_INT
9117 ix86_initial_elimination_offset (int from, int to)
9119 struct ix86_frame frame;
9120 ix86_compute_frame_layout (&frame);
9122 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
9123 return frame.hard_frame_pointer_offset;
9124 else if (from == FRAME_POINTER_REGNUM
9125 && to == HARD_FRAME_POINTER_REGNUM)
9126 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
9127 else
9129 gcc_assert (to == STACK_POINTER_REGNUM);
9131 if (from == ARG_POINTER_REGNUM)
9132 return frame.stack_pointer_offset;
9134 gcc_assert (from == FRAME_POINTER_REGNUM);
9135 return frame.stack_pointer_offset - frame.frame_pointer_offset;
9139 /* In a dynamically-aligned function, we can't know the offset from
9140 stack pointer to frame pointer, so we must ensure that setjmp
9141 eliminates fp against the hard fp (%ebp) rather than trying to
9142 index from %esp up to the top of the frame across a gap that is
9143 of unknown (at compile-time) size. */
9144 static rtx
9145 ix86_builtin_setjmp_frame_value (void)
9147 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
9150 /* On the x86 -fsplit-stack and -fstack-protector both use the same
9151 field in the TCB, so they can not be used together. */
9153 static bool
9154 ix86_supports_split_stack (bool report ATTRIBUTE_UNUSED,
9155 struct gcc_options *opts ATTRIBUTE_UNUSED)
9157 bool ret = true;
9159 #ifndef TARGET_THREAD_SPLIT_STACK_OFFSET
9160 if (report)
9161 error ("%<-fsplit-stack%> currently only supported on GNU/Linux");
9162 ret = false;
9163 #else
9164 if (!HAVE_GAS_CFI_PERSONALITY_DIRECTIVE)
9166 if (report)
9167 error ("%<-fsplit-stack%> requires "
9168 "assembler support for CFI directives");
9169 ret = false;
9171 #endif
9173 return ret;
9176 /* When using -fsplit-stack, the allocation routines set a field in
9177 the TCB to the bottom of the stack plus this much space, measured
9178 in bytes. */
9180 #define SPLIT_STACK_AVAILABLE 256
9182 /* Fill structure ix86_frame about frame of currently computed function. */
9184 static void
9185 ix86_compute_frame_layout (struct ix86_frame *frame)
9187 unsigned int stack_alignment_needed;
9188 HOST_WIDE_INT offset;
9189 unsigned int preferred_alignment;
9190 HOST_WIDE_INT size = get_frame_size ();
9191 HOST_WIDE_INT to_allocate;
9193 frame->nregs = ix86_nsaved_regs ();
9194 frame->nsseregs = ix86_nsaved_sseregs ();
9196 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
9197 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
9199 /* 64-bit MS ABI seem to require stack alignment to be always 16 except for
9200 function prologues and leaf. */
9201 if ((TARGET_64BIT_MS_ABI && preferred_alignment < 16)
9202 && (!current_function_is_leaf || cfun->calls_alloca != 0
9203 || ix86_current_function_calls_tls_descriptor))
9205 preferred_alignment = 16;
9206 stack_alignment_needed = 16;
9207 crtl->preferred_stack_boundary = 128;
9208 crtl->stack_alignment_needed = 128;
9211 gcc_assert (!size || stack_alignment_needed);
9212 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
9213 gcc_assert (preferred_alignment <= stack_alignment_needed);
9215 /* For SEH we have to limit the amount of code movement into the prologue.
9216 At present we do this via a BLOCKAGE, at which point there's very little
9217 scheduling that can be done, which means that there's very little point
9218 in doing anything except PUSHs. */
9219 if (TARGET_SEH)
9220 cfun->machine->use_fast_prologue_epilogue = false;
9222 /* During reload iteration the amount of registers saved can change.
9223 Recompute the value as needed. Do not recompute when amount of registers
9224 didn't change as reload does multiple calls to the function and does not
9225 expect the decision to change within single iteration. */
9226 else if (!optimize_function_for_size_p (cfun)
9227 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
9229 int count = frame->nregs;
9230 struct cgraph_node *node = cgraph_get_node (current_function_decl);
9232 cfun->machine->use_fast_prologue_epilogue_nregs = count;
9234 /* The fast prologue uses move instead of push to save registers. This
9235 is significantly longer, but also executes faster as modern hardware
9236 can execute the moves in parallel, but can't do that for push/pop.
9238 Be careful about choosing what prologue to emit: When function takes
9239 many instructions to execute we may use slow version as well as in
9240 case function is known to be outside hot spot (this is known with
9241 feedback only). Weight the size of function by number of registers
9242 to save as it is cheap to use one or two push instructions but very
9243 slow to use many of them. */
9244 if (count)
9245 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
9246 if (node->frequency < NODE_FREQUENCY_NORMAL
9247 || (flag_branch_probabilities
9248 && node->frequency < NODE_FREQUENCY_HOT))
9249 cfun->machine->use_fast_prologue_epilogue = false;
9250 else
9251 cfun->machine->use_fast_prologue_epilogue
9252 = !expensive_function_p (count);
9254 if (TARGET_PROLOGUE_USING_MOVE
9255 && cfun->machine->use_fast_prologue_epilogue)
9256 frame->save_regs_using_mov = true;
9257 else
9258 frame->save_regs_using_mov = false;
9260 /* If static stack checking is enabled and done with probes, the registers
9261 need to be saved before allocating the frame. */
9262 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
9263 frame->save_regs_using_mov = false;
9265 /* Skip return address. */
9266 offset = UNITS_PER_WORD;
9268 /* Skip pushed static chain. */
9269 if (ix86_static_chain_on_stack)
9270 offset += UNITS_PER_WORD;
9272 /* Skip saved base pointer. */
9273 if (frame_pointer_needed)
9274 offset += UNITS_PER_WORD;
9275 frame->hfp_save_offset = offset;
9277 /* The traditional frame pointer location is at the top of the frame. */
9278 frame->hard_frame_pointer_offset = offset;
9280 /* Register save area */
9281 offset += frame->nregs * UNITS_PER_WORD;
9282 frame->reg_save_offset = offset;
9284 /* Align and set SSE register save area. */
9285 if (frame->nsseregs)
9287 /* The only ABI that has saved SSE registers (Win64) also has a
9288 16-byte aligned default stack, and thus we don't need to be
9289 within the re-aligned local stack frame to save them. */
9290 gcc_assert (INCOMING_STACK_BOUNDARY >= 128);
9291 offset = (offset + 16 - 1) & -16;
9292 offset += frame->nsseregs * 16;
9294 frame->sse_reg_save_offset = offset;
9296 /* The re-aligned stack starts here. Values before this point are not
9297 directly comparable with values below this point. In order to make
9298 sure that no value happens to be the same before and after, force
9299 the alignment computation below to add a non-zero value. */
9300 if (stack_realign_fp)
9301 offset = (offset + stack_alignment_needed) & -stack_alignment_needed;
9303 /* Va-arg area */
9304 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
9305 offset += frame->va_arg_size;
9307 /* Align start of frame for local function. */
9308 if (stack_realign_fp
9309 || offset != frame->sse_reg_save_offset
9310 || size != 0
9311 || !current_function_is_leaf
9312 || cfun->calls_alloca
9313 || ix86_current_function_calls_tls_descriptor)
9314 offset = (offset + stack_alignment_needed - 1) & -stack_alignment_needed;
9316 /* Frame pointer points here. */
9317 frame->frame_pointer_offset = offset;
9319 offset += size;
9321 /* Add outgoing arguments area. Can be skipped if we eliminated
9322 all the function calls as dead code.
9323 Skipping is however impossible when function calls alloca. Alloca
9324 expander assumes that last crtl->outgoing_args_size
9325 of stack frame are unused. */
9326 if (ACCUMULATE_OUTGOING_ARGS
9327 && (!current_function_is_leaf || cfun->calls_alloca
9328 || ix86_current_function_calls_tls_descriptor))
9330 offset += crtl->outgoing_args_size;
9331 frame->outgoing_arguments_size = crtl->outgoing_args_size;
9333 else
9334 frame->outgoing_arguments_size = 0;
9336 /* Align stack boundary. Only needed if we're calling another function
9337 or using alloca. */
9338 if (!current_function_is_leaf || cfun->calls_alloca
9339 || ix86_current_function_calls_tls_descriptor)
9340 offset = (offset + preferred_alignment - 1) & -preferred_alignment;
9342 /* We've reached end of stack frame. */
9343 frame->stack_pointer_offset = offset;
9345 /* Size prologue needs to allocate. */
9346 to_allocate = offset - frame->sse_reg_save_offset;
9348 if ((!to_allocate && frame->nregs <= 1)
9349 || (TARGET_64BIT && to_allocate >= (HOST_WIDE_INT) 0x80000000))
9350 frame->save_regs_using_mov = false;
9352 if (ix86_using_red_zone ()
9353 && current_function_sp_is_unchanging
9354 && current_function_is_leaf
9355 && !ix86_current_function_calls_tls_descriptor)
9357 frame->red_zone_size = to_allocate;
9358 if (frame->save_regs_using_mov)
9359 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
9360 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
9361 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
9363 else
9364 frame->red_zone_size = 0;
9365 frame->stack_pointer_offset -= frame->red_zone_size;
9367 /* The SEH frame pointer location is near the bottom of the frame.
9368 This is enforced by the fact that the difference between the
9369 stack pointer and the frame pointer is limited to 240 bytes in
9370 the unwind data structure. */
9371 if (TARGET_SEH)
9373 HOST_WIDE_INT diff;
9375 /* If we can leave the frame pointer where it is, do so. */
9376 diff = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
9377 if (diff > 240 || (diff & 15) != 0)
9379 /* Ideally we'd determine what portion of the local stack frame
9380 (within the constraint of the lowest 240) is most heavily used.
9381 But without that complication, simply bias the frame pointer
9382 by 128 bytes so as to maximize the amount of the local stack
9383 frame that is addressable with 8-bit offsets. */
9384 frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128;
9389 /* This is semi-inlined memory_address_length, but simplified
9390 since we know that we're always dealing with reg+offset, and
9391 to avoid having to create and discard all that rtl. */
9393 static inline int
9394 choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
9396 int len = 4;
9398 if (offset == 0)
9400 /* EBP and R13 cannot be encoded without an offset. */
9401 len = (regno == BP_REG || regno == R13_REG);
9403 else if (IN_RANGE (offset, -128, 127))
9404 len = 1;
9406 /* ESP and R12 must be encoded with a SIB byte. */
9407 if (regno == SP_REG || regno == R12_REG)
9408 len++;
9410 return len;
9413 /* Return an RTX that points to CFA_OFFSET within the stack frame.
9414 The valid base registers are taken from CFUN->MACHINE->FS. */
9416 static rtx
9417 choose_baseaddr (HOST_WIDE_INT cfa_offset)
9419 const struct machine_function *m = cfun->machine;
9420 rtx base_reg = NULL;
9421 HOST_WIDE_INT base_offset = 0;
9423 if (m->use_fast_prologue_epilogue)
9425 /* Choose the base register most likely to allow the most scheduling
9426 opportunities. Generally FP is valid througout the function,
9427 while DRAP must be reloaded within the epilogue. But choose either
9428 over the SP due to increased encoding size. */
9430 if (m->fs.fp_valid)
9432 base_reg = hard_frame_pointer_rtx;
9433 base_offset = m->fs.fp_offset - cfa_offset;
9435 else if (m->fs.drap_valid)
9437 base_reg = crtl->drap_reg;
9438 base_offset = 0 - cfa_offset;
9440 else if (m->fs.sp_valid)
9442 base_reg = stack_pointer_rtx;
9443 base_offset = m->fs.sp_offset - cfa_offset;
9446 else
9448 HOST_WIDE_INT toffset;
9449 int len = 16, tlen;
9451 /* Choose the base register with the smallest address encoding.
9452 With a tie, choose FP > DRAP > SP. */
9453 if (m->fs.sp_valid)
9455 base_reg = stack_pointer_rtx;
9456 base_offset = m->fs.sp_offset - cfa_offset;
9457 len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
9459 if (m->fs.drap_valid)
9461 toffset = 0 - cfa_offset;
9462 tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
9463 if (tlen <= len)
9465 base_reg = crtl->drap_reg;
9466 base_offset = toffset;
9467 len = tlen;
9470 if (m->fs.fp_valid)
9472 toffset = m->fs.fp_offset - cfa_offset;
9473 tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
9474 if (tlen <= len)
9476 base_reg = hard_frame_pointer_rtx;
9477 base_offset = toffset;
9478 len = tlen;
9482 gcc_assert (base_reg != NULL);
9484 return plus_constant (base_reg, base_offset);
9487 /* Emit code to save registers in the prologue. */
9489 static void
9490 ix86_emit_save_regs (void)
9492 unsigned int regno;
9493 rtx insn;
9495 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
9496 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9498 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
9499 RTX_FRAME_RELATED_P (insn) = 1;
9503 /* Emit a single register save at CFA - CFA_OFFSET. */
9505 static void
9506 ix86_emit_save_reg_using_mov (enum machine_mode mode, unsigned int regno,
9507 HOST_WIDE_INT cfa_offset)
9509 struct machine_function *m = cfun->machine;
9510 rtx reg = gen_rtx_REG (mode, regno);
9511 rtx mem, addr, base, insn;
9513 addr = choose_baseaddr (cfa_offset);
9514 mem = gen_frame_mem (mode, addr);
9516 /* For SSE saves, we need to indicate the 128-bit alignment. */
9517 set_mem_align (mem, GET_MODE_ALIGNMENT (mode));
9519 insn = emit_move_insn (mem, reg);
9520 RTX_FRAME_RELATED_P (insn) = 1;
9522 base = addr;
9523 if (GET_CODE (base) == PLUS)
9524 base = XEXP (base, 0);
9525 gcc_checking_assert (REG_P (base));
9527 /* When saving registers into a re-aligned local stack frame, avoid
9528 any tricky guessing by dwarf2out. */
9529 if (m->fs.realigned)
9531 gcc_checking_assert (stack_realign_drap);
9533 if (regno == REGNO (crtl->drap_reg))
9535 /* A bit of a hack. We force the DRAP register to be saved in
9536 the re-aligned stack frame, which provides us with a copy
9537 of the CFA that will last past the prologue. Install it. */
9538 gcc_checking_assert (cfun->machine->fs.fp_valid);
9539 addr = plus_constant (hard_frame_pointer_rtx,
9540 cfun->machine->fs.fp_offset - cfa_offset);
9541 mem = gen_rtx_MEM (mode, addr);
9542 add_reg_note (insn, REG_CFA_DEF_CFA, mem);
9544 else
9546 /* The frame pointer is a stable reference within the
9547 aligned frame. Use it. */
9548 gcc_checking_assert (cfun->machine->fs.fp_valid);
9549 addr = plus_constant (hard_frame_pointer_rtx,
9550 cfun->machine->fs.fp_offset - cfa_offset);
9551 mem = gen_rtx_MEM (mode, addr);
9552 add_reg_note (insn, REG_CFA_EXPRESSION,
9553 gen_rtx_SET (VOIDmode, mem, reg));
9557 /* The memory may not be relative to the current CFA register,
9558 which means that we may need to generate a new pattern for
9559 use by the unwind info. */
9560 else if (base != m->fs.cfa_reg)
9562 addr = plus_constant (m->fs.cfa_reg, m->fs.cfa_offset - cfa_offset);
9563 mem = gen_rtx_MEM (mode, addr);
9564 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (VOIDmode, mem, reg));
9568 /* Emit code to save registers using MOV insns.
9569 First register is stored at CFA - CFA_OFFSET. */
9570 static void
9571 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
9573 unsigned int regno;
9575 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9576 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9578 ix86_emit_save_reg_using_mov (Pmode, regno, cfa_offset);
9579 cfa_offset -= UNITS_PER_WORD;
9583 /* Emit code to save SSE registers using MOV insns.
9584 First register is stored at CFA - CFA_OFFSET. */
9585 static void
9586 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
9588 unsigned int regno;
9590 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9591 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9593 ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
9594 cfa_offset -= 16;
9598 static GTY(()) rtx queued_cfa_restores;
9600 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
9601 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
9602 Don't add the note if the previously saved value will be left untouched
9603 within stack red-zone till return, as unwinders can find the same value
9604 in the register and on the stack. */
9606 static void
9607 ix86_add_cfa_restore_note (rtx insn, rtx reg, HOST_WIDE_INT cfa_offset)
9609 if (cfa_offset <= cfun->machine->fs.red_zone_offset)
9610 return;
9612 if (insn)
9614 add_reg_note (insn, REG_CFA_RESTORE, reg);
9615 RTX_FRAME_RELATED_P (insn) = 1;
9617 else
9618 queued_cfa_restores
9619 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
9622 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
9624 static void
9625 ix86_add_queued_cfa_restore_notes (rtx insn)
9627 rtx last;
9628 if (!queued_cfa_restores)
9629 return;
9630 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
9632 XEXP (last, 1) = REG_NOTES (insn);
9633 REG_NOTES (insn) = queued_cfa_restores;
9634 queued_cfa_restores = NULL_RTX;
9635 RTX_FRAME_RELATED_P (insn) = 1;
9638 /* Expand prologue or epilogue stack adjustment.
9639 The pattern exist to put a dependency on all ebp-based memory accesses.
9640 STYLE should be negative if instructions should be marked as frame related,
9641 zero if %r11 register is live and cannot be freely used and positive
9642 otherwise. */
9644 static void
9645 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
9646 int style, bool set_cfa)
9648 struct machine_function *m = cfun->machine;
9649 rtx insn;
9650 bool add_frame_related_expr = false;
9652 if (! TARGET_64BIT)
9653 insn = gen_pro_epilogue_adjust_stack_si_add (dest, src, offset);
9654 else if (x86_64_immediate_operand (offset, DImode))
9655 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, offset);
9656 else
9658 rtx tmp;
9659 /* r11 is used by indirect sibcall return as well, set before the
9660 epilogue and used after the epilogue. */
9661 if (style)
9662 tmp = gen_rtx_REG (DImode, R11_REG);
9663 else
9665 gcc_assert (src != hard_frame_pointer_rtx
9666 && dest != hard_frame_pointer_rtx);
9667 tmp = hard_frame_pointer_rtx;
9669 insn = emit_insn (gen_rtx_SET (DImode, tmp, offset));
9670 if (style < 0)
9671 add_frame_related_expr = true;
9673 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, tmp);
9676 insn = emit_insn (insn);
9677 if (style >= 0)
9678 ix86_add_queued_cfa_restore_notes (insn);
9680 if (set_cfa)
9682 rtx r;
9684 gcc_assert (m->fs.cfa_reg == src);
9685 m->fs.cfa_offset += INTVAL (offset);
9686 m->fs.cfa_reg = dest;
9688 r = gen_rtx_PLUS (Pmode, src, offset);
9689 r = gen_rtx_SET (VOIDmode, dest, r);
9690 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
9691 RTX_FRAME_RELATED_P (insn) = 1;
9693 else if (style < 0)
9695 RTX_FRAME_RELATED_P (insn) = 1;
9696 if (add_frame_related_expr)
9698 rtx r = gen_rtx_PLUS (Pmode, src, offset);
9699 r = gen_rtx_SET (VOIDmode, dest, r);
9700 add_reg_note (insn, REG_FRAME_RELATED_EXPR, r);
9704 if (dest == stack_pointer_rtx)
9706 HOST_WIDE_INT ooffset = m->fs.sp_offset;
9707 bool valid = m->fs.sp_valid;
9709 if (src == hard_frame_pointer_rtx)
9711 valid = m->fs.fp_valid;
9712 ooffset = m->fs.fp_offset;
9714 else if (src == crtl->drap_reg)
9716 valid = m->fs.drap_valid;
9717 ooffset = 0;
9719 else
9721 /* Else there are two possibilities: SP itself, which we set
9722 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
9723 taken care of this by hand along the eh_return path. */
9724 gcc_checking_assert (src == stack_pointer_rtx
9725 || offset == const0_rtx);
9728 m->fs.sp_offset = ooffset - INTVAL (offset);
9729 m->fs.sp_valid = valid;
9733 /* Find an available register to be used as dynamic realign argument
9734 pointer regsiter. Such a register will be written in prologue and
9735 used in begin of body, so it must not be
9736 1. parameter passing register.
9737 2. GOT pointer.
9738 We reuse static-chain register if it is available. Otherwise, we
9739 use DI for i386 and R13 for x86-64. We chose R13 since it has
9740 shorter encoding.
9742 Return: the regno of chosen register. */
9744 static unsigned int
9745 find_drap_reg (void)
9747 tree decl = cfun->decl;
9749 if (TARGET_64BIT)
9751 /* Use R13 for nested function or function need static chain.
9752 Since function with tail call may use any caller-saved
9753 registers in epilogue, DRAP must not use caller-saved
9754 register in such case. */
9755 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
9756 return R13_REG;
9758 return R10_REG;
9760 else
9762 /* Use DI for nested function or function need static chain.
9763 Since function with tail call may use any caller-saved
9764 registers in epilogue, DRAP must not use caller-saved
9765 register in such case. */
9766 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
9767 return DI_REG;
9769 /* Reuse static chain register if it isn't used for parameter
9770 passing. */
9771 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2)
9773 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl));
9774 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) == 0)
9775 return CX_REG;
9777 return DI_REG;
9781 /* Return minimum incoming stack alignment. */
9783 static unsigned int
9784 ix86_minimum_incoming_stack_boundary (bool sibcall)
9786 unsigned int incoming_stack_boundary;
9788 /* Prefer the one specified at command line. */
9789 if (ix86_user_incoming_stack_boundary)
9790 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
9791 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
9792 if -mstackrealign is used, it isn't used for sibcall check and
9793 estimated stack alignment is 128bit. */
9794 else if (!sibcall
9795 && !TARGET_64BIT
9796 && ix86_force_align_arg_pointer
9797 && crtl->stack_alignment_estimated == 128)
9798 incoming_stack_boundary = MIN_STACK_BOUNDARY;
9799 else
9800 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
9802 /* Incoming stack alignment can be changed on individual functions
9803 via force_align_arg_pointer attribute. We use the smallest
9804 incoming stack boundary. */
9805 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
9806 && lookup_attribute (ix86_force_align_arg_pointer_string,
9807 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
9808 incoming_stack_boundary = MIN_STACK_BOUNDARY;
9810 /* The incoming stack frame has to be aligned at least at
9811 parm_stack_boundary. */
9812 if (incoming_stack_boundary < crtl->parm_stack_boundary)
9813 incoming_stack_boundary = crtl->parm_stack_boundary;
9815 /* Stack at entrance of main is aligned by runtime. We use the
9816 smallest incoming stack boundary. */
9817 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
9818 && DECL_NAME (current_function_decl)
9819 && MAIN_NAME_P (DECL_NAME (current_function_decl))
9820 && DECL_FILE_SCOPE_P (current_function_decl))
9821 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
9823 return incoming_stack_boundary;
9826 /* Update incoming stack boundary and estimated stack alignment. */
9828 static void
9829 ix86_update_stack_boundary (void)
9831 ix86_incoming_stack_boundary
9832 = ix86_minimum_incoming_stack_boundary (false);
9834 /* x86_64 vararg needs 16byte stack alignment for register save
9835 area. */
9836 if (TARGET_64BIT
9837 && cfun->stdarg
9838 && crtl->stack_alignment_estimated < 128)
9839 crtl->stack_alignment_estimated = 128;
9842 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
9843 needed or an rtx for DRAP otherwise. */
9845 static rtx
9846 ix86_get_drap_rtx (void)
9848 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
9849 crtl->need_drap = true;
9851 if (stack_realign_drap)
9853 /* Assign DRAP to vDRAP and returns vDRAP */
9854 unsigned int regno = find_drap_reg ();
9855 rtx drap_vreg;
9856 rtx arg_ptr;
9857 rtx seq, insn;
9859 arg_ptr = gen_rtx_REG (Pmode, regno);
9860 crtl->drap_reg = arg_ptr;
9862 start_sequence ();
9863 drap_vreg = copy_to_reg (arg_ptr);
9864 seq = get_insns ();
9865 end_sequence ();
9867 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
9868 if (!optimize)
9870 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
9871 RTX_FRAME_RELATED_P (insn) = 1;
9873 return drap_vreg;
9875 else
9876 return NULL;
9879 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
9881 static rtx
9882 ix86_internal_arg_pointer (void)
9884 return virtual_incoming_args_rtx;
9887 struct scratch_reg {
9888 rtx reg;
9889 bool saved;
9892 /* Return a short-lived scratch register for use on function entry.
9893 In 32-bit mode, it is valid only after the registers are saved
9894 in the prologue. This register must be released by means of
9895 release_scratch_register_on_entry once it is dead. */
9897 static void
9898 get_scratch_register_on_entry (struct scratch_reg *sr)
9900 int regno;
9902 sr->saved = false;
9904 if (TARGET_64BIT)
9906 /* We always use R11 in 64-bit mode. */
9907 regno = R11_REG;
9909 else
9911 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
9912 bool fastcall_p
9913 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
9914 bool static_chain_p = DECL_STATIC_CHAIN (decl);
9915 int regparm = ix86_function_regparm (fntype, decl);
9916 int drap_regno
9917 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
9919 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
9920 for the static chain register. */
9921 if ((regparm < 1 || (fastcall_p && !static_chain_p))
9922 && drap_regno != AX_REG)
9923 regno = AX_REG;
9924 else if (regparm < 2 && drap_regno != DX_REG)
9925 regno = DX_REG;
9926 /* ecx is the static chain register. */
9927 else if (regparm < 3 && !fastcall_p && !static_chain_p
9928 && drap_regno != CX_REG)
9929 regno = CX_REG;
9930 else if (ix86_save_reg (BX_REG, true))
9931 regno = BX_REG;
9932 /* esi is the static chain register. */
9933 else if (!(regparm == 3 && static_chain_p)
9934 && ix86_save_reg (SI_REG, true))
9935 regno = SI_REG;
9936 else if (ix86_save_reg (DI_REG, true))
9937 regno = DI_REG;
9938 else
9940 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
9941 sr->saved = true;
9945 sr->reg = gen_rtx_REG (Pmode, regno);
9946 if (sr->saved)
9948 rtx insn = emit_insn (gen_push (sr->reg));
9949 RTX_FRAME_RELATED_P (insn) = 1;
9953 /* Release a scratch register obtained from the preceding function. */
9955 static void
9956 release_scratch_register_on_entry (struct scratch_reg *sr)
9958 if (sr->saved)
9960 rtx x, insn = emit_insn (gen_pop (sr->reg));
9962 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
9963 RTX_FRAME_RELATED_P (insn) = 1;
9964 x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
9965 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
9966 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
9970 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
9972 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
9974 static void
9975 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size)
9977 /* We skip the probe for the first interval + a small dope of 4 words and
9978 probe that many bytes past the specified size to maintain a protection
9979 area at the botton of the stack. */
9980 const int dope = 4 * UNITS_PER_WORD;
9981 rtx size_rtx = GEN_INT (size), last;
9983 /* See if we have a constant small number of probes to generate. If so,
9984 that's the easy case. The run-time loop is made up of 11 insns in the
9985 generic case while the compile-time loop is made up of 3+2*(n-1) insns
9986 for n # of intervals. */
9987 if (size <= 5 * PROBE_INTERVAL)
9989 HOST_WIDE_INT i, adjust;
9990 bool first_probe = true;
9992 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
9993 values of N from 1 until it exceeds SIZE. If only one probe is
9994 needed, this will not generate any code. Then adjust and probe
9995 to PROBE_INTERVAL + SIZE. */
9996 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
9998 if (first_probe)
10000 adjust = 2 * PROBE_INTERVAL + dope;
10001 first_probe = false;
10003 else
10004 adjust = PROBE_INTERVAL;
10006 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10007 plus_constant (stack_pointer_rtx, -adjust)));
10008 emit_stack_probe (stack_pointer_rtx);
10011 if (first_probe)
10012 adjust = size + PROBE_INTERVAL + dope;
10013 else
10014 adjust = size + PROBE_INTERVAL - i;
10016 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10017 plus_constant (stack_pointer_rtx, -adjust)));
10018 emit_stack_probe (stack_pointer_rtx);
10020 /* Adjust back to account for the additional first interval. */
10021 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10022 plus_constant (stack_pointer_rtx,
10023 PROBE_INTERVAL + dope)));
10026 /* Otherwise, do the same as above, but in a loop. Note that we must be
10027 extra careful with variables wrapping around because we might be at
10028 the very top (or the very bottom) of the address space and we have
10029 to be able to handle this case properly; in particular, we use an
10030 equality test for the loop condition. */
10031 else
10033 HOST_WIDE_INT rounded_size;
10034 struct scratch_reg sr;
10036 get_scratch_register_on_entry (&sr);
10039 /* Step 1: round SIZE to the previous multiple of the interval. */
10041 rounded_size = size & -PROBE_INTERVAL;
10044 /* Step 2: compute initial and final value of the loop counter. */
10046 /* SP = SP_0 + PROBE_INTERVAL. */
10047 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10048 plus_constant (stack_pointer_rtx,
10049 - (PROBE_INTERVAL + dope))));
10051 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
10052 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
10053 emit_insn (gen_rtx_SET (VOIDmode, sr.reg,
10054 gen_rtx_PLUS (Pmode, sr.reg,
10055 stack_pointer_rtx)));
10058 /* Step 3: the loop
10060 while (SP != LAST_ADDR)
10062 SP = SP + PROBE_INTERVAL
10063 probe at SP
10066 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
10067 values of N from 1 until it is equal to ROUNDED_SIZE. */
10069 emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx));
10072 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
10073 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
10075 if (size != rounded_size)
10077 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10078 plus_constant (stack_pointer_rtx,
10079 rounded_size - size)));
10080 emit_stack_probe (stack_pointer_rtx);
10083 /* Adjust back to account for the additional first interval. */
10084 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10085 plus_constant (stack_pointer_rtx,
10086 PROBE_INTERVAL + dope)));
10088 release_scratch_register_on_entry (&sr);
10091 gcc_assert (cfun->machine->fs.cfa_reg != stack_pointer_rtx);
10093 /* Even if the stack pointer isn't the CFA register, we need to correctly
10094 describe the adjustments made to it, in particular differentiate the
10095 frame-related ones from the frame-unrelated ones. */
10096 if (size > 0)
10098 rtx expr = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (2));
10099 XVECEXP (expr, 0, 0)
10100 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10101 plus_constant (stack_pointer_rtx, -size));
10102 XVECEXP (expr, 0, 1)
10103 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10104 plus_constant (stack_pointer_rtx,
10105 PROBE_INTERVAL + dope + size));
10106 add_reg_note (last, REG_FRAME_RELATED_EXPR, expr);
10107 RTX_FRAME_RELATED_P (last) = 1;
10109 cfun->machine->fs.sp_offset += size;
10112 /* Make sure nothing is scheduled before we are done. */
10113 emit_insn (gen_blockage ());
10116 /* Adjust the stack pointer up to REG while probing it. */
10118 const char *
10119 output_adjust_stack_and_probe (rtx reg)
10121 static int labelno = 0;
10122 char loop_lab[32], end_lab[32];
10123 rtx xops[2];
10125 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
10126 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
10128 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
10130 /* Jump to END_LAB if SP == LAST_ADDR. */
10131 xops[0] = stack_pointer_rtx;
10132 xops[1] = reg;
10133 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
10134 fputs ("\tje\t", asm_out_file);
10135 assemble_name_raw (asm_out_file, end_lab);
10136 fputc ('\n', asm_out_file);
10138 /* SP = SP + PROBE_INTERVAL. */
10139 xops[1] = GEN_INT (PROBE_INTERVAL);
10140 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
10142 /* Probe at SP. */
10143 xops[1] = const0_rtx;
10144 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
10146 fprintf (asm_out_file, "\tjmp\t");
10147 assemble_name_raw (asm_out_file, loop_lab);
10148 fputc ('\n', asm_out_file);
10150 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
10152 return "";
10155 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
10156 inclusive. These are offsets from the current stack pointer. */
10158 static void
10159 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
10161 /* See if we have a constant small number of probes to generate. If so,
10162 that's the easy case. The run-time loop is made up of 7 insns in the
10163 generic case while the compile-time loop is made up of n insns for n #
10164 of intervals. */
10165 if (size <= 7 * PROBE_INTERVAL)
10167 HOST_WIDE_INT i;
10169 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
10170 it exceeds SIZE. If only one probe is needed, this will not
10171 generate any code. Then probe at FIRST + SIZE. */
10172 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
10173 emit_stack_probe (plus_constant (stack_pointer_rtx, -(first + i)));
10175 emit_stack_probe (plus_constant (stack_pointer_rtx, -(first + size)));
10178 /* Otherwise, do the same as above, but in a loop. Note that we must be
10179 extra careful with variables wrapping around because we might be at
10180 the very top (or the very bottom) of the address space and we have
10181 to be able to handle this case properly; in particular, we use an
10182 equality test for the loop condition. */
10183 else
10185 HOST_WIDE_INT rounded_size, last;
10186 struct scratch_reg sr;
10188 get_scratch_register_on_entry (&sr);
10191 /* Step 1: round SIZE to the previous multiple of the interval. */
10193 rounded_size = size & -PROBE_INTERVAL;
10196 /* Step 2: compute initial and final value of the loop counter. */
10198 /* TEST_OFFSET = FIRST. */
10199 emit_move_insn (sr.reg, GEN_INT (-first));
10201 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
10202 last = first + rounded_size;
10205 /* Step 3: the loop
10207 while (TEST_ADDR != LAST_ADDR)
10209 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
10210 probe at TEST_ADDR
10213 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
10214 until it is equal to ROUNDED_SIZE. */
10216 emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last)));
10219 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
10220 that SIZE is equal to ROUNDED_SIZE. */
10222 if (size != rounded_size)
10223 emit_stack_probe (plus_constant (gen_rtx_PLUS (Pmode,
10224 stack_pointer_rtx,
10225 sr.reg),
10226 rounded_size - size));
10228 release_scratch_register_on_entry (&sr);
10231 /* Make sure nothing is scheduled before we are done. */
10232 emit_insn (gen_blockage ());
10235 /* Probe a range of stack addresses from REG to END, inclusive. These are
10236 offsets from the current stack pointer. */
10238 const char *
10239 output_probe_stack_range (rtx reg, rtx end)
10241 static int labelno = 0;
10242 char loop_lab[32], end_lab[32];
10243 rtx xops[3];
10245 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
10246 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
10248 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
10250 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
10251 xops[0] = reg;
10252 xops[1] = end;
10253 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
10254 fputs ("\tje\t", asm_out_file);
10255 assemble_name_raw (asm_out_file, end_lab);
10256 fputc ('\n', asm_out_file);
10258 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
10259 xops[1] = GEN_INT (PROBE_INTERVAL);
10260 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
10262 /* Probe at TEST_ADDR. */
10263 xops[0] = stack_pointer_rtx;
10264 xops[1] = reg;
10265 xops[2] = const0_rtx;
10266 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
10268 fprintf (asm_out_file, "\tjmp\t");
10269 assemble_name_raw (asm_out_file, loop_lab);
10270 fputc ('\n', asm_out_file);
10272 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
10274 return "";
10277 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
10278 to be generated in correct form. */
10279 static void
10280 ix86_finalize_stack_realign_flags (void)
10282 /* Check if stack realign is really needed after reload, and
10283 stores result in cfun */
10284 unsigned int incoming_stack_boundary
10285 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
10286 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
10287 unsigned int stack_realign = (incoming_stack_boundary
10288 < (current_function_is_leaf
10289 ? crtl->max_used_stack_slot_alignment
10290 : crtl->stack_alignment_needed));
10292 if (crtl->stack_realign_finalized)
10294 /* After stack_realign_needed is finalized, we can't no longer
10295 change it. */
10296 gcc_assert (crtl->stack_realign_needed == stack_realign);
10298 else
10300 crtl->stack_realign_needed = stack_realign;
10301 crtl->stack_realign_finalized = true;
10305 /* Expand the prologue into a bunch of separate insns. */
10307 void
10308 ix86_expand_prologue (void)
10310 struct machine_function *m = cfun->machine;
10311 rtx insn, t;
10312 bool pic_reg_used;
10313 struct ix86_frame frame;
10314 HOST_WIDE_INT allocate;
10315 bool int_registers_saved;
10317 ix86_finalize_stack_realign_flags ();
10319 /* DRAP should not coexist with stack_realign_fp */
10320 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
10322 memset (&m->fs, 0, sizeof (m->fs));
10324 /* Initialize CFA state for before the prologue. */
10325 m->fs.cfa_reg = stack_pointer_rtx;
10326 m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
10328 /* Track SP offset to the CFA. We continue tracking this after we've
10329 swapped the CFA register away from SP. In the case of re-alignment
10330 this is fudged; we're interested to offsets within the local frame. */
10331 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
10332 m->fs.sp_valid = true;
10334 ix86_compute_frame_layout (&frame);
10336 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
10338 /* We should have already generated an error for any use of
10339 ms_hook on a nested function. */
10340 gcc_checking_assert (!ix86_static_chain_on_stack);
10342 /* Check if profiling is active and we shall use profiling before
10343 prologue variant. If so sorry. */
10344 if (crtl->profile && flag_fentry != 0)
10345 sorry ("ms_hook_prologue attribute isn%'t compatible "
10346 "with -mfentry for 32-bit");
10348 /* In ix86_asm_output_function_label we emitted:
10349 8b ff movl.s %edi,%edi
10350 55 push %ebp
10351 8b ec movl.s %esp,%ebp
10353 This matches the hookable function prologue in Win32 API
10354 functions in Microsoft Windows XP Service Pack 2 and newer.
10355 Wine uses this to enable Windows apps to hook the Win32 API
10356 functions provided by Wine.
10358 What that means is that we've already set up the frame pointer. */
10360 if (frame_pointer_needed
10361 && !(crtl->drap_reg && crtl->stack_realign_needed))
10363 rtx push, mov;
10365 /* We've decided to use the frame pointer already set up.
10366 Describe this to the unwinder by pretending that both
10367 push and mov insns happen right here.
10369 Putting the unwind info here at the end of the ms_hook
10370 is done so that we can make absolutely certain we get
10371 the required byte sequence at the start of the function,
10372 rather than relying on an assembler that can produce
10373 the exact encoding required.
10375 However it does mean (in the unpatched case) that we have
10376 a 1 insn window where the asynchronous unwind info is
10377 incorrect. However, if we placed the unwind info at
10378 its correct location we would have incorrect unwind info
10379 in the patched case. Which is probably all moot since
10380 I don't expect Wine generates dwarf2 unwind info for the
10381 system libraries that use this feature. */
10383 insn = emit_insn (gen_blockage ());
10385 push = gen_push (hard_frame_pointer_rtx);
10386 mov = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
10387 stack_pointer_rtx);
10388 RTX_FRAME_RELATED_P (push) = 1;
10389 RTX_FRAME_RELATED_P (mov) = 1;
10391 RTX_FRAME_RELATED_P (insn) = 1;
10392 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
10393 gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
10395 /* Note that gen_push incremented m->fs.cfa_offset, even
10396 though we didn't emit the push insn here. */
10397 m->fs.cfa_reg = hard_frame_pointer_rtx;
10398 m->fs.fp_offset = m->fs.cfa_offset;
10399 m->fs.fp_valid = true;
10401 else
10403 /* The frame pointer is not needed so pop %ebp again.
10404 This leaves us with a pristine state. */
10405 emit_insn (gen_pop (hard_frame_pointer_rtx));
10409 /* The first insn of a function that accepts its static chain on the
10410 stack is to push the register that would be filled in by a direct
10411 call. This insn will be skipped by the trampoline. */
10412 else if (ix86_static_chain_on_stack)
10414 insn = emit_insn (gen_push (ix86_static_chain (cfun->decl, false)));
10415 emit_insn (gen_blockage ());
10417 /* We don't want to interpret this push insn as a register save,
10418 only as a stack adjustment. The real copy of the register as
10419 a save will be done later, if needed. */
10420 t = plus_constant (stack_pointer_rtx, -UNITS_PER_WORD);
10421 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
10422 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
10423 RTX_FRAME_RELATED_P (insn) = 1;
10426 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
10427 of DRAP is needed and stack realignment is really needed after reload */
10428 if (stack_realign_drap)
10430 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
10432 /* Only need to push parameter pointer reg if it is caller saved. */
10433 if (!call_used_regs[REGNO (crtl->drap_reg)])
10435 /* Push arg pointer reg */
10436 insn = emit_insn (gen_push (crtl->drap_reg));
10437 RTX_FRAME_RELATED_P (insn) = 1;
10440 /* Grab the argument pointer. */
10441 t = plus_constant (stack_pointer_rtx, m->fs.sp_offset);
10442 insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
10443 RTX_FRAME_RELATED_P (insn) = 1;
10444 m->fs.cfa_reg = crtl->drap_reg;
10445 m->fs.cfa_offset = 0;
10447 /* Align the stack. */
10448 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
10449 stack_pointer_rtx,
10450 GEN_INT (-align_bytes)));
10451 RTX_FRAME_RELATED_P (insn) = 1;
10453 /* Replicate the return address on the stack so that return
10454 address can be reached via (argp - 1) slot. This is needed
10455 to implement macro RETURN_ADDR_RTX and intrinsic function
10456 expand_builtin_return_addr etc. */
10457 t = plus_constant (crtl->drap_reg, -UNITS_PER_WORD);
10458 t = gen_frame_mem (Pmode, t);
10459 insn = emit_insn (gen_push (t));
10460 RTX_FRAME_RELATED_P (insn) = 1;
10462 /* For the purposes of frame and register save area addressing,
10463 we've started over with a new frame. */
10464 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
10465 m->fs.realigned = true;
10468 if (frame_pointer_needed && !m->fs.fp_valid)
10470 /* Note: AT&T enter does NOT have reversed args. Enter is probably
10471 slower on all targets. Also sdb doesn't like it. */
10472 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
10473 RTX_FRAME_RELATED_P (insn) = 1;
10475 if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
10477 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
10478 RTX_FRAME_RELATED_P (insn) = 1;
10480 if (m->fs.cfa_reg == stack_pointer_rtx)
10481 m->fs.cfa_reg = hard_frame_pointer_rtx;
10482 m->fs.fp_offset = m->fs.sp_offset;
10483 m->fs.fp_valid = true;
10487 int_registers_saved = (frame.nregs == 0);
10489 if (!int_registers_saved)
10491 /* If saving registers via PUSH, do so now. */
10492 if (!frame.save_regs_using_mov)
10494 ix86_emit_save_regs ();
10495 int_registers_saved = true;
10496 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
10499 /* When using red zone we may start register saving before allocating
10500 the stack frame saving one cycle of the prologue. However, avoid
10501 doing this if we have to probe the stack; at least on x86_64 the
10502 stack probe can turn into a call that clobbers a red zone location. */
10503 else if (ix86_using_red_zone ()
10504 && (! TARGET_STACK_PROBE
10505 || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
10507 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
10508 int_registers_saved = true;
10512 if (stack_realign_fp)
10514 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
10515 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
10517 /* The computation of the size of the re-aligned stack frame means
10518 that we must allocate the size of the register save area before
10519 performing the actual alignment. Otherwise we cannot guarantee
10520 that there's enough storage above the realignment point. */
10521 if (m->fs.sp_offset != frame.sse_reg_save_offset)
10522 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10523 GEN_INT (m->fs.sp_offset
10524 - frame.sse_reg_save_offset),
10525 -1, false);
10527 /* Align the stack. */
10528 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
10529 stack_pointer_rtx,
10530 GEN_INT (-align_bytes)));
10532 /* For the purposes of register save area addressing, the stack
10533 pointer is no longer valid. As for the value of sp_offset,
10534 see ix86_compute_frame_layout, which we need to match in order
10535 to pass verification of stack_pointer_offset at the end. */
10536 m->fs.sp_offset = (m->fs.sp_offset + align_bytes) & -align_bytes;
10537 m->fs.sp_valid = false;
10540 allocate = frame.stack_pointer_offset - m->fs.sp_offset;
10542 if (flag_stack_usage_info)
10544 /* We start to count from ARG_POINTER. */
10545 HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
10547 /* If it was realigned, take into account the fake frame. */
10548 if (stack_realign_drap)
10550 if (ix86_static_chain_on_stack)
10551 stack_size += UNITS_PER_WORD;
10553 if (!call_used_regs[REGNO (crtl->drap_reg)])
10554 stack_size += UNITS_PER_WORD;
10556 /* This over-estimates by 1 minimal-stack-alignment-unit but
10557 mitigates that by counting in the new return address slot. */
10558 current_function_dynamic_stack_size
10559 += crtl->stack_alignment_needed / BITS_PER_UNIT;
10562 current_function_static_stack_size = stack_size;
10565 /* The stack has already been decremented by the instruction calling us
10566 so probe if the size is non-negative to preserve the protection area. */
10567 if (allocate >= 0 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
10569 /* We expect the registers to be saved when probes are used. */
10570 gcc_assert (int_registers_saved);
10572 if (STACK_CHECK_MOVING_SP)
10574 ix86_adjust_stack_and_probe (allocate);
10575 allocate = 0;
10577 else
10579 HOST_WIDE_INT size = allocate;
10581 if (TARGET_64BIT && size >= (HOST_WIDE_INT) 0x80000000)
10582 size = 0x80000000 - STACK_CHECK_PROTECT - 1;
10584 if (TARGET_STACK_PROBE)
10585 ix86_emit_probe_stack_range (0, size + STACK_CHECK_PROTECT);
10586 else
10587 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
10591 if (allocate == 0)
10593 else if (!ix86_target_stack_probe ()
10594 || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
10596 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10597 GEN_INT (-allocate), -1,
10598 m->fs.cfa_reg == stack_pointer_rtx);
10600 else
10602 rtx eax = gen_rtx_REG (Pmode, AX_REG);
10603 rtx r10 = NULL;
10604 rtx (*adjust_stack_insn)(rtx, rtx, rtx);
10606 bool eax_live = false;
10607 bool r10_live = false;
10609 if (TARGET_64BIT)
10610 r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
10611 if (!TARGET_64BIT_MS_ABI)
10612 eax_live = ix86_eax_live_at_start_p ();
10614 if (eax_live)
10616 emit_insn (gen_push (eax));
10617 allocate -= UNITS_PER_WORD;
10619 if (r10_live)
10621 r10 = gen_rtx_REG (Pmode, R10_REG);
10622 emit_insn (gen_push (r10));
10623 allocate -= UNITS_PER_WORD;
10626 emit_move_insn (eax, GEN_INT (allocate));
10627 emit_insn (ix86_gen_allocate_stack_worker (eax, eax));
10629 /* Use the fact that AX still contains ALLOCATE. */
10630 adjust_stack_insn = (TARGET_64BIT
10631 ? gen_pro_epilogue_adjust_stack_di_sub
10632 : gen_pro_epilogue_adjust_stack_si_sub);
10634 insn = emit_insn (adjust_stack_insn (stack_pointer_rtx,
10635 stack_pointer_rtx, eax));
10637 /* Note that SEH directives need to continue tracking the stack
10638 pointer even after the frame pointer has been set up. */
10639 if (m->fs.cfa_reg == stack_pointer_rtx || TARGET_SEH)
10641 if (m->fs.cfa_reg == stack_pointer_rtx)
10642 m->fs.cfa_offset += allocate;
10644 RTX_FRAME_RELATED_P (insn) = 1;
10645 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
10646 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10647 plus_constant (stack_pointer_rtx,
10648 -allocate)));
10650 m->fs.sp_offset += allocate;
10652 if (r10_live && eax_live)
10654 t = choose_baseaddr (m->fs.sp_offset - allocate);
10655 emit_move_insn (r10, gen_frame_mem (Pmode, t));
10656 t = choose_baseaddr (m->fs.sp_offset - allocate - UNITS_PER_WORD);
10657 emit_move_insn (eax, gen_frame_mem (Pmode, t));
10659 else if (eax_live || r10_live)
10661 t = choose_baseaddr (m->fs.sp_offset - allocate);
10662 emit_move_insn ((eax_live ? eax : r10), gen_frame_mem (Pmode, t));
10665 gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
10667 /* If we havn't already set up the frame pointer, do so now. */
10668 if (frame_pointer_needed && !m->fs.fp_valid)
10670 insn = ix86_gen_add3 (hard_frame_pointer_rtx, stack_pointer_rtx,
10671 GEN_INT (frame.stack_pointer_offset
10672 - frame.hard_frame_pointer_offset));
10673 insn = emit_insn (insn);
10674 RTX_FRAME_RELATED_P (insn) = 1;
10675 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
10677 if (m->fs.cfa_reg == stack_pointer_rtx)
10678 m->fs.cfa_reg = hard_frame_pointer_rtx;
10679 m->fs.fp_offset = frame.hard_frame_pointer_offset;
10680 m->fs.fp_valid = true;
10683 if (!int_registers_saved)
10684 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
10685 if (frame.nsseregs)
10686 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
10688 pic_reg_used = false;
10689 if (pic_offset_table_rtx
10690 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
10691 || crtl->profile))
10693 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
10695 if (alt_pic_reg_used != INVALID_REGNUM)
10696 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
10698 pic_reg_used = true;
10701 if (pic_reg_used)
10703 if (TARGET_64BIT)
10705 if (ix86_cmodel == CM_LARGE_PIC)
10707 rtx tmp_reg = gen_rtx_REG (DImode, R11_REG);
10708 rtx label = gen_label_rtx ();
10709 emit_label (label);
10710 LABEL_PRESERVE_P (label) = 1;
10711 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
10712 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
10713 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
10714 insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
10715 pic_offset_table_rtx, tmp_reg));
10717 else
10718 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
10720 else
10721 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
10724 /* In the pic_reg_used case, make sure that the got load isn't deleted
10725 when mcount needs it. Blockage to avoid call movement across mcount
10726 call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
10727 note. */
10728 if (crtl->profile && !flag_fentry && pic_reg_used)
10729 emit_insn (gen_prologue_use (pic_offset_table_rtx));
10731 if (crtl->drap_reg && !crtl->stack_realign_needed)
10733 /* vDRAP is setup but after reload it turns out stack realign
10734 isn't necessary, here we will emit prologue to setup DRAP
10735 without stack realign adjustment */
10736 t = choose_baseaddr (0);
10737 emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
10740 /* Prevent instructions from being scheduled into register save push
10741 sequence when access to the redzone area is done through frame pointer.
10742 The offset between the frame pointer and the stack pointer is calculated
10743 relative to the value of the stack pointer at the end of the function
10744 prologue, and moving instructions that access redzone area via frame
10745 pointer inside push sequence violates this assumption. */
10746 if (frame_pointer_needed && frame.red_zone_size)
10747 emit_insn (gen_memory_blockage ());
10749 /* Emit cld instruction if stringops are used in the function. */
10750 if (TARGET_CLD && ix86_current_function_needs_cld)
10751 emit_insn (gen_cld ());
10753 /* SEH requires that the prologue end within 256 bytes of the start of
10754 the function. Prevent instruction schedules that would extend that. */
10755 if (TARGET_SEH)
10756 emit_insn (gen_blockage ());
10759 /* Emit code to restore REG using a POP insn. */
10761 static void
10762 ix86_emit_restore_reg_using_pop (rtx reg)
10764 struct machine_function *m = cfun->machine;
10765 rtx insn = emit_insn (gen_pop (reg));
10767 ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
10768 m->fs.sp_offset -= UNITS_PER_WORD;
10770 if (m->fs.cfa_reg == crtl->drap_reg
10771 && REGNO (reg) == REGNO (crtl->drap_reg))
10773 /* Previously we'd represented the CFA as an expression
10774 like *(%ebp - 8). We've just popped that value from
10775 the stack, which means we need to reset the CFA to
10776 the drap register. This will remain until we restore
10777 the stack pointer. */
10778 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
10779 RTX_FRAME_RELATED_P (insn) = 1;
10781 /* This means that the DRAP register is valid for addressing too. */
10782 m->fs.drap_valid = true;
10783 return;
10786 if (m->fs.cfa_reg == stack_pointer_rtx)
10788 rtx x = plus_constant (stack_pointer_rtx, UNITS_PER_WORD);
10789 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
10790 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
10791 RTX_FRAME_RELATED_P (insn) = 1;
10793 m->fs.cfa_offset -= UNITS_PER_WORD;
10796 /* When the frame pointer is the CFA, and we pop it, we are
10797 swapping back to the stack pointer as the CFA. This happens
10798 for stack frames that don't allocate other data, so we assume
10799 the stack pointer is now pointing at the return address, i.e.
10800 the function entry state, which makes the offset be 1 word. */
10801 if (reg == hard_frame_pointer_rtx)
10803 m->fs.fp_valid = false;
10804 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
10806 m->fs.cfa_reg = stack_pointer_rtx;
10807 m->fs.cfa_offset -= UNITS_PER_WORD;
10809 add_reg_note (insn, REG_CFA_DEF_CFA,
10810 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
10811 GEN_INT (m->fs.cfa_offset)));
10812 RTX_FRAME_RELATED_P (insn) = 1;
10817 /* Emit code to restore saved registers using POP insns. */
10819 static void
10820 ix86_emit_restore_regs_using_pop (void)
10822 unsigned int regno;
10824 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10825 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
10826 ix86_emit_restore_reg_using_pop (gen_rtx_REG (Pmode, regno));
10829 /* Emit code and notes for the LEAVE instruction. */
10831 static void
10832 ix86_emit_leave (void)
10834 struct machine_function *m = cfun->machine;
10835 rtx insn = emit_insn (ix86_gen_leave ());
10837 ix86_add_queued_cfa_restore_notes (insn);
10839 gcc_assert (m->fs.fp_valid);
10840 m->fs.sp_valid = true;
10841 m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
10842 m->fs.fp_valid = false;
10844 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
10846 m->fs.cfa_reg = stack_pointer_rtx;
10847 m->fs.cfa_offset = m->fs.sp_offset;
10849 add_reg_note (insn, REG_CFA_DEF_CFA,
10850 plus_constant (stack_pointer_rtx, m->fs.sp_offset));
10851 RTX_FRAME_RELATED_P (insn) = 1;
10852 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
10853 m->fs.fp_offset);
10857 /* Emit code to restore saved registers using MOV insns.
10858 First register is restored from CFA - CFA_OFFSET. */
10859 static void
10860 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
10861 bool maybe_eh_return)
10863 struct machine_function *m = cfun->machine;
10864 unsigned int regno;
10866 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10867 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
10869 rtx reg = gen_rtx_REG (Pmode, regno);
10870 rtx insn, mem;
10872 mem = choose_baseaddr (cfa_offset);
10873 mem = gen_frame_mem (Pmode, mem);
10874 insn = emit_move_insn (reg, mem);
10876 if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
10878 /* Previously we'd represented the CFA as an expression
10879 like *(%ebp - 8). We've just popped that value from
10880 the stack, which means we need to reset the CFA to
10881 the drap register. This will remain until we restore
10882 the stack pointer. */
10883 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
10884 RTX_FRAME_RELATED_P (insn) = 1;
10886 /* This means that the DRAP register is valid for addressing. */
10887 m->fs.drap_valid = true;
10889 else
10890 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
10892 cfa_offset -= UNITS_PER_WORD;
10896 /* Emit code to restore saved registers using MOV insns.
10897 First register is restored from CFA - CFA_OFFSET. */
10898 static void
10899 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
10900 bool maybe_eh_return)
10902 unsigned int regno;
10904 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10905 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
10907 rtx reg = gen_rtx_REG (V4SFmode, regno);
10908 rtx mem;
10910 mem = choose_baseaddr (cfa_offset);
10911 mem = gen_rtx_MEM (V4SFmode, mem);
10912 set_mem_align (mem, 128);
10913 emit_move_insn (reg, mem);
10915 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
10917 cfa_offset -= 16;
10921 /* Restore function stack, frame, and registers. */
10923 void
10924 ix86_expand_epilogue (int style)
10926 struct machine_function *m = cfun->machine;
10927 struct machine_frame_state frame_state_save = m->fs;
10928 struct ix86_frame frame;
10929 bool restore_regs_via_mov;
10930 bool using_drap;
10932 ix86_finalize_stack_realign_flags ();
10933 ix86_compute_frame_layout (&frame);
10935 m->fs.sp_valid = (!frame_pointer_needed
10936 || (current_function_sp_is_unchanging
10937 && !stack_realign_fp));
10938 gcc_assert (!m->fs.sp_valid
10939 || m->fs.sp_offset == frame.stack_pointer_offset);
10941 /* The FP must be valid if the frame pointer is present. */
10942 gcc_assert (frame_pointer_needed == m->fs.fp_valid);
10943 gcc_assert (!m->fs.fp_valid
10944 || m->fs.fp_offset == frame.hard_frame_pointer_offset);
10946 /* We must have *some* valid pointer to the stack frame. */
10947 gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
10949 /* The DRAP is never valid at this point. */
10950 gcc_assert (!m->fs.drap_valid);
10952 /* See the comment about red zone and frame
10953 pointer usage in ix86_expand_prologue. */
10954 if (frame_pointer_needed && frame.red_zone_size)
10955 emit_insn (gen_memory_blockage ());
10957 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
10958 gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
10960 /* Determine the CFA offset of the end of the red-zone. */
10961 m->fs.red_zone_offset = 0;
10962 if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
10964 /* The red-zone begins below the return address. */
10965 m->fs.red_zone_offset = RED_ZONE_SIZE + UNITS_PER_WORD;
10967 /* When the register save area is in the aligned portion of
10968 the stack, determine the maximum runtime displacement that
10969 matches up with the aligned frame. */
10970 if (stack_realign_drap)
10971 m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
10972 + UNITS_PER_WORD);
10975 /* Special care must be taken for the normal return case of a function
10976 using eh_return: the eax and edx registers are marked as saved, but
10977 not restored along this path. Adjust the save location to match. */
10978 if (crtl->calls_eh_return && style != 2)
10979 frame.reg_save_offset -= 2 * UNITS_PER_WORD;
10981 /* EH_RETURN requires the use of moves to function properly. */
10982 if (crtl->calls_eh_return)
10983 restore_regs_via_mov = true;
10984 /* SEH requires the use of pops to identify the epilogue. */
10985 else if (TARGET_SEH)
10986 restore_regs_via_mov = false;
10987 /* If we're only restoring one register and sp is not valid then
10988 using a move instruction to restore the register since it's
10989 less work than reloading sp and popping the register. */
10990 else if (!m->fs.sp_valid && frame.nregs <= 1)
10991 restore_regs_via_mov = true;
10992 else if (TARGET_EPILOGUE_USING_MOVE
10993 && cfun->machine->use_fast_prologue_epilogue
10994 && (frame.nregs > 1
10995 || m->fs.sp_offset != frame.reg_save_offset))
10996 restore_regs_via_mov = true;
10997 else if (frame_pointer_needed
10998 && !frame.nregs
10999 && m->fs.sp_offset != frame.reg_save_offset)
11000 restore_regs_via_mov = true;
11001 else if (frame_pointer_needed
11002 && TARGET_USE_LEAVE
11003 && cfun->machine->use_fast_prologue_epilogue
11004 && frame.nregs == 1)
11005 restore_regs_via_mov = true;
11006 else
11007 restore_regs_via_mov = false;
11009 if (restore_regs_via_mov || frame.nsseregs)
11011 /* Ensure that the entire register save area is addressable via
11012 the stack pointer, if we will restore via sp. */
11013 if (TARGET_64BIT
11014 && m->fs.sp_offset > 0x7fffffff
11015 && !(m->fs.fp_valid || m->fs.drap_valid)
11016 && (frame.nsseregs + frame.nregs) != 0)
11018 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11019 GEN_INT (m->fs.sp_offset
11020 - frame.sse_reg_save_offset),
11021 style,
11022 m->fs.cfa_reg == stack_pointer_rtx);
11026 /* If there are any SSE registers to restore, then we have to do it
11027 via moves, since there's obviously no pop for SSE regs. */
11028 if (frame.nsseregs)
11029 ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
11030 style == 2);
11032 if (restore_regs_via_mov)
11034 rtx t;
11036 if (frame.nregs)
11037 ix86_emit_restore_regs_using_mov (frame.reg_save_offset, style == 2);
11039 /* eh_return epilogues need %ecx added to the stack pointer. */
11040 if (style == 2)
11042 rtx insn, sa = EH_RETURN_STACKADJ_RTX;
11044 /* Stack align doesn't work with eh_return. */
11045 gcc_assert (!stack_realign_drap);
11046 /* Neither does regparm nested functions. */
11047 gcc_assert (!ix86_static_chain_on_stack);
11049 if (frame_pointer_needed)
11051 t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
11052 t = plus_constant (t, m->fs.fp_offset - UNITS_PER_WORD);
11053 emit_insn (gen_rtx_SET (VOIDmode, sa, t));
11055 t = gen_frame_mem (Pmode, hard_frame_pointer_rtx);
11056 insn = emit_move_insn (hard_frame_pointer_rtx, t);
11058 /* Note that we use SA as a temporary CFA, as the return
11059 address is at the proper place relative to it. We
11060 pretend this happens at the FP restore insn because
11061 prior to this insn the FP would be stored at the wrong
11062 offset relative to SA, and after this insn we have no
11063 other reasonable register to use for the CFA. We don't
11064 bother resetting the CFA to the SP for the duration of
11065 the return insn. */
11066 add_reg_note (insn, REG_CFA_DEF_CFA,
11067 plus_constant (sa, UNITS_PER_WORD));
11068 ix86_add_queued_cfa_restore_notes (insn);
11069 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
11070 RTX_FRAME_RELATED_P (insn) = 1;
11072 m->fs.cfa_reg = sa;
11073 m->fs.cfa_offset = UNITS_PER_WORD;
11074 m->fs.fp_valid = false;
11076 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
11077 const0_rtx, style, false);
11079 else
11081 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
11082 t = plus_constant (t, m->fs.sp_offset - UNITS_PER_WORD);
11083 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, t));
11084 ix86_add_queued_cfa_restore_notes (insn);
11086 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
11087 if (m->fs.cfa_offset != UNITS_PER_WORD)
11089 m->fs.cfa_offset = UNITS_PER_WORD;
11090 add_reg_note (insn, REG_CFA_DEF_CFA,
11091 plus_constant (stack_pointer_rtx,
11092 UNITS_PER_WORD));
11093 RTX_FRAME_RELATED_P (insn) = 1;
11096 m->fs.sp_offset = UNITS_PER_WORD;
11097 m->fs.sp_valid = true;
11100 else
11102 /* SEH requires that the function end with (1) a stack adjustment
11103 if necessary, (2) a sequence of pops, and (3) a return or
11104 jump instruction. Prevent insns from the function body from
11105 being scheduled into this sequence. */
11106 if (TARGET_SEH)
11108 /* Prevent a catch region from being adjacent to the standard
11109 epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor
11110 several other flags that would be interesting to test are
11111 not yet set up. */
11112 if (flag_non_call_exceptions)
11113 emit_insn (gen_nops (const1_rtx));
11114 else
11115 emit_insn (gen_blockage ());
11118 /* First step is to deallocate the stack frame so that we can
11119 pop the registers. */
11120 if (!m->fs.sp_valid)
11122 pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
11123 GEN_INT (m->fs.fp_offset
11124 - frame.reg_save_offset),
11125 style, false);
11127 else if (m->fs.sp_offset != frame.reg_save_offset)
11129 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11130 GEN_INT (m->fs.sp_offset
11131 - frame.reg_save_offset),
11132 style,
11133 m->fs.cfa_reg == stack_pointer_rtx);
11136 ix86_emit_restore_regs_using_pop ();
11139 /* If we used a stack pointer and haven't already got rid of it,
11140 then do so now. */
11141 if (m->fs.fp_valid)
11143 /* If the stack pointer is valid and pointing at the frame
11144 pointer store address, then we only need a pop. */
11145 if (m->fs.sp_valid && m->fs.sp_offset == frame.hfp_save_offset)
11146 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
11147 /* Leave results in shorter dependency chains on CPUs that are
11148 able to grok it fast. */
11149 else if (TARGET_USE_LEAVE
11150 || optimize_function_for_size_p (cfun)
11151 || !cfun->machine->use_fast_prologue_epilogue)
11152 ix86_emit_leave ();
11153 else
11155 pro_epilogue_adjust_stack (stack_pointer_rtx,
11156 hard_frame_pointer_rtx,
11157 const0_rtx, style, !using_drap);
11158 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
11162 if (using_drap)
11164 int param_ptr_offset = UNITS_PER_WORD;
11165 rtx insn;
11167 gcc_assert (stack_realign_drap);
11169 if (ix86_static_chain_on_stack)
11170 param_ptr_offset += UNITS_PER_WORD;
11171 if (!call_used_regs[REGNO (crtl->drap_reg)])
11172 param_ptr_offset += UNITS_PER_WORD;
11174 insn = emit_insn (gen_rtx_SET
11175 (VOIDmode, stack_pointer_rtx,
11176 gen_rtx_PLUS (Pmode,
11177 crtl->drap_reg,
11178 GEN_INT (-param_ptr_offset))));
11179 m->fs.cfa_reg = stack_pointer_rtx;
11180 m->fs.cfa_offset = param_ptr_offset;
11181 m->fs.sp_offset = param_ptr_offset;
11182 m->fs.realigned = false;
11184 add_reg_note (insn, REG_CFA_DEF_CFA,
11185 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11186 GEN_INT (param_ptr_offset)));
11187 RTX_FRAME_RELATED_P (insn) = 1;
11189 if (!call_used_regs[REGNO (crtl->drap_reg)])
11190 ix86_emit_restore_reg_using_pop (crtl->drap_reg);
11193 /* At this point the stack pointer must be valid, and we must have
11194 restored all of the registers. We may not have deallocated the
11195 entire stack frame. We've delayed this until now because it may
11196 be possible to merge the local stack deallocation with the
11197 deallocation forced by ix86_static_chain_on_stack. */
11198 gcc_assert (m->fs.sp_valid);
11199 gcc_assert (!m->fs.fp_valid);
11200 gcc_assert (!m->fs.realigned);
11201 if (m->fs.sp_offset != UNITS_PER_WORD)
11203 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11204 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
11205 style, true);
11208 /* Sibcall epilogues don't want a return instruction. */
11209 if (style == 0)
11211 m->fs = frame_state_save;
11212 return;
11215 /* Emit vzeroupper if needed. */
11216 if (TARGET_VZEROUPPER
11217 && !TREE_THIS_VOLATILE (cfun->decl)
11218 && !cfun->machine->caller_return_avx256_p)
11219 emit_insn (gen_avx_vzeroupper (GEN_INT (call_no_avx256)));
11221 if (crtl->args.pops_args && crtl->args.size)
11223 rtx popc = GEN_INT (crtl->args.pops_args);
11225 /* i386 can only pop 64K bytes. If asked to pop more, pop return
11226 address, do explicit add, and jump indirectly to the caller. */
11228 if (crtl->args.pops_args >= 65536)
11230 rtx ecx = gen_rtx_REG (SImode, CX_REG);
11231 rtx insn;
11233 /* There is no "pascal" calling convention in any 64bit ABI. */
11234 gcc_assert (!TARGET_64BIT);
11236 insn = emit_insn (gen_pop (ecx));
11237 m->fs.cfa_offset -= UNITS_PER_WORD;
11238 m->fs.sp_offset -= UNITS_PER_WORD;
11240 add_reg_note (insn, REG_CFA_ADJUST_CFA,
11241 copy_rtx (XVECEXP (PATTERN (insn), 0, 1)));
11242 add_reg_note (insn, REG_CFA_REGISTER,
11243 gen_rtx_SET (VOIDmode, ecx, pc_rtx));
11244 RTX_FRAME_RELATED_P (insn) = 1;
11246 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11247 popc, -1, true);
11248 emit_jump_insn (gen_return_indirect_internal (ecx));
11250 else
11251 emit_jump_insn (gen_return_pop_internal (popc));
11253 else
11254 emit_jump_insn (gen_return_internal ());
11256 /* Restore the state back to the state from the prologue,
11257 so that it's correct for the next epilogue. */
11258 m->fs = frame_state_save;
11261 /* Reset from the function's potential modifications. */
11263 static void
11264 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
11265 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
11267 if (pic_offset_table_rtx)
11268 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
11269 #if TARGET_MACHO
11270 /* Mach-O doesn't support labels at the end of objects, so if
11271 it looks like we might want one, insert a NOP. */
11273 rtx insn = get_last_insn ();
11274 while (insn
11275 && NOTE_P (insn)
11276 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
11277 insn = PREV_INSN (insn);
11278 if (insn
11279 && (LABEL_P (insn)
11280 || (NOTE_P (insn)
11281 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
11282 fputs ("\tnop\n", file);
11284 #endif
11288 /* Return a scratch register to use in the split stack prologue. The
11289 split stack prologue is used for -fsplit-stack. It is the first
11290 instructions in the function, even before the regular prologue.
11291 The scratch register can be any caller-saved register which is not
11292 used for parameters or for the static chain. */
11294 static unsigned int
11295 split_stack_prologue_scratch_regno (void)
11297 if (TARGET_64BIT)
11298 return R11_REG;
11299 else
11301 bool is_fastcall;
11302 int regparm;
11304 is_fastcall = (lookup_attribute ("fastcall",
11305 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
11306 != NULL);
11307 regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
11309 if (is_fastcall)
11311 if (DECL_STATIC_CHAIN (cfun->decl))
11313 sorry ("-fsplit-stack does not support fastcall with "
11314 "nested function");
11315 return INVALID_REGNUM;
11317 return AX_REG;
11319 else if (regparm < 3)
11321 if (!DECL_STATIC_CHAIN (cfun->decl))
11322 return CX_REG;
11323 else
11325 if (regparm >= 2)
11327 sorry ("-fsplit-stack does not support 2 register "
11328 " parameters for a nested function");
11329 return INVALID_REGNUM;
11331 return DX_REG;
11334 else
11336 /* FIXME: We could make this work by pushing a register
11337 around the addition and comparison. */
11338 sorry ("-fsplit-stack does not support 3 register parameters");
11339 return INVALID_REGNUM;
11344 /* A SYMBOL_REF for the function which allocates new stackspace for
11345 -fsplit-stack. */
11347 static GTY(()) rtx split_stack_fn;
11349 /* A SYMBOL_REF for the more stack function when using the large
11350 model. */
11352 static GTY(()) rtx split_stack_fn_large;
11354 /* Handle -fsplit-stack. These are the first instructions in the
11355 function, even before the regular prologue. */
11357 void
11358 ix86_expand_split_stack_prologue (void)
11360 struct ix86_frame frame;
11361 HOST_WIDE_INT allocate;
11362 unsigned HOST_WIDE_INT args_size;
11363 rtx label, limit, current, jump_insn, allocate_rtx, call_insn, call_fusage;
11364 rtx scratch_reg = NULL_RTX;
11365 rtx varargs_label = NULL_RTX;
11366 rtx fn;
11368 gcc_assert (flag_split_stack && reload_completed);
11370 ix86_finalize_stack_realign_flags ();
11371 ix86_compute_frame_layout (&frame);
11372 allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
11374 /* This is the label we will branch to if we have enough stack
11375 space. We expect the basic block reordering pass to reverse this
11376 branch if optimizing, so that we branch in the unlikely case. */
11377 label = gen_label_rtx ();
11379 /* We need to compare the stack pointer minus the frame size with
11380 the stack boundary in the TCB. The stack boundary always gives
11381 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
11382 can compare directly. Otherwise we need to do an addition. */
11384 limit = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
11385 UNSPEC_STACK_CHECK);
11386 limit = gen_rtx_CONST (Pmode, limit);
11387 limit = gen_rtx_MEM (Pmode, limit);
11388 if (allocate < SPLIT_STACK_AVAILABLE)
11389 current = stack_pointer_rtx;
11390 else
11392 unsigned int scratch_regno;
11393 rtx offset;
11395 /* We need a scratch register to hold the stack pointer minus
11396 the required frame size. Since this is the very start of the
11397 function, the scratch register can be any caller-saved
11398 register which is not used for parameters. */
11399 offset = GEN_INT (- allocate);
11400 scratch_regno = split_stack_prologue_scratch_regno ();
11401 if (scratch_regno == INVALID_REGNUM)
11402 return;
11403 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
11404 if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
11406 /* We don't use ix86_gen_add3 in this case because it will
11407 want to split to lea, but when not optimizing the insn
11408 will not be split after this point. */
11409 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
11410 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11411 offset)));
11413 else
11415 emit_move_insn (scratch_reg, offset);
11416 emit_insn (gen_adddi3 (scratch_reg, scratch_reg,
11417 stack_pointer_rtx));
11419 current = scratch_reg;
11422 ix86_expand_branch (GEU, current, limit, label);
11423 jump_insn = get_last_insn ();
11424 JUMP_LABEL (jump_insn) = label;
11426 /* Mark the jump as very likely to be taken. */
11427 add_reg_note (jump_insn, REG_BR_PROB,
11428 GEN_INT (REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100));
11430 if (split_stack_fn == NULL_RTX)
11431 split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
11432 fn = split_stack_fn;
11434 /* Get more stack space. We pass in the desired stack space and the
11435 size of the arguments to copy to the new stack. In 32-bit mode
11436 we push the parameters; __morestack will return on a new stack
11437 anyhow. In 64-bit mode we pass the parameters in r10 and
11438 r11. */
11439 allocate_rtx = GEN_INT (allocate);
11440 args_size = crtl->args.size >= 0 ? crtl->args.size : 0;
11441 call_fusage = NULL_RTX;
11442 if (TARGET_64BIT)
11444 rtx reg10, reg11;
11446 reg10 = gen_rtx_REG (Pmode, R10_REG);
11447 reg11 = gen_rtx_REG (Pmode, R11_REG);
11449 /* If this function uses a static chain, it will be in %r10.
11450 Preserve it across the call to __morestack. */
11451 if (DECL_STATIC_CHAIN (cfun->decl))
11453 rtx rax;
11455 rax = gen_rtx_REG (Pmode, AX_REG);
11456 emit_move_insn (rax, reg10);
11457 use_reg (&call_fusage, rax);
11460 if (ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
11462 HOST_WIDE_INT argval;
11464 /* When using the large model we need to load the address
11465 into a register, and we've run out of registers. So we
11466 switch to a different calling convention, and we call a
11467 different function: __morestack_large. We pass the
11468 argument size in the upper 32 bits of r10 and pass the
11469 frame size in the lower 32 bits. */
11470 gcc_assert ((allocate & (HOST_WIDE_INT) 0xffffffff) == allocate);
11471 gcc_assert ((args_size & 0xffffffff) == args_size);
11473 if (split_stack_fn_large == NULL_RTX)
11474 split_stack_fn_large =
11475 gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model");
11477 if (ix86_cmodel == CM_LARGE_PIC)
11479 rtx label, x;
11481 label = gen_label_rtx ();
11482 emit_label (label);
11483 LABEL_PRESERVE_P (label) = 1;
11484 emit_insn (gen_set_rip_rex64 (reg10, label));
11485 emit_insn (gen_set_got_offset_rex64 (reg11, label));
11486 emit_insn (gen_adddi3 (reg10, reg10, reg11));
11487 x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn_large),
11488 UNSPEC_GOT);
11489 x = gen_rtx_CONST (Pmode, x);
11490 emit_move_insn (reg11, x);
11491 x = gen_rtx_PLUS (Pmode, reg10, reg11);
11492 x = gen_const_mem (Pmode, x);
11493 emit_move_insn (reg11, x);
11495 else
11496 emit_move_insn (reg11, split_stack_fn_large);
11498 fn = reg11;
11500 argval = ((args_size << 16) << 16) + allocate;
11501 emit_move_insn (reg10, GEN_INT (argval));
11503 else
11505 emit_move_insn (reg10, allocate_rtx);
11506 emit_move_insn (reg11, GEN_INT (args_size));
11507 use_reg (&call_fusage, reg11);
11510 use_reg (&call_fusage, reg10);
11512 else
11514 emit_insn (gen_push (GEN_INT (args_size)));
11515 emit_insn (gen_push (allocate_rtx));
11517 call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn),
11518 GEN_INT (UNITS_PER_WORD), constm1_rtx,
11519 NULL_RTX, false);
11520 add_function_usage_to (call_insn, call_fusage);
11522 /* In order to make call/return prediction work right, we now need
11523 to execute a return instruction. See
11524 libgcc/config/i386/morestack.S for the details on how this works.
11526 For flow purposes gcc must not see this as a return
11527 instruction--we need control flow to continue at the subsequent
11528 label. Therefore, we use an unspec. */
11529 gcc_assert (crtl->args.pops_args < 65536);
11530 emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
11532 /* If we are in 64-bit mode and this function uses a static chain,
11533 we saved %r10 in %rax before calling _morestack. */
11534 if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
11535 emit_move_insn (gen_rtx_REG (Pmode, R10_REG),
11536 gen_rtx_REG (Pmode, AX_REG));
11538 /* If this function calls va_start, we need to store a pointer to
11539 the arguments on the old stack, because they may not have been
11540 all copied to the new stack. At this point the old stack can be
11541 found at the frame pointer value used by __morestack, because
11542 __morestack has set that up before calling back to us. Here we
11543 store that pointer in a scratch register, and in
11544 ix86_expand_prologue we store the scratch register in a stack
11545 slot. */
11546 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11548 unsigned int scratch_regno;
11549 rtx frame_reg;
11550 int words;
11552 scratch_regno = split_stack_prologue_scratch_regno ();
11553 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
11554 frame_reg = gen_rtx_REG (Pmode, BP_REG);
11556 /* 64-bit:
11557 fp -> old fp value
11558 return address within this function
11559 return address of caller of this function
11560 stack arguments
11561 So we add three words to get to the stack arguments.
11563 32-bit:
11564 fp -> old fp value
11565 return address within this function
11566 first argument to __morestack
11567 second argument to __morestack
11568 return address of caller of this function
11569 stack arguments
11570 So we add five words to get to the stack arguments.
11572 words = TARGET_64BIT ? 3 : 5;
11573 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
11574 gen_rtx_PLUS (Pmode, frame_reg,
11575 GEN_INT (words * UNITS_PER_WORD))));
11577 varargs_label = gen_label_rtx ();
11578 emit_jump_insn (gen_jump (varargs_label));
11579 JUMP_LABEL (get_last_insn ()) = varargs_label;
11581 emit_barrier ();
11584 emit_label (label);
11585 LABEL_NUSES (label) = 1;
11587 /* If this function calls va_start, we now have to set the scratch
11588 register for the case where we do not call __morestack. In this
11589 case we need to set it based on the stack pointer. */
11590 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11592 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
11593 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11594 GEN_INT (UNITS_PER_WORD))));
11596 emit_label (varargs_label);
11597 LABEL_NUSES (varargs_label) = 1;
11601 /* We may have to tell the dataflow pass that the split stack prologue
11602 is initializing a scratch register. */
11604 static void
11605 ix86_live_on_entry (bitmap regs)
11607 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11609 gcc_assert (flag_split_stack);
11610 bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
11614 /* Extract the parts of an RTL expression that is a valid memory address
11615 for an instruction. Return 0 if the structure of the address is
11616 grossly off. Return -1 if the address contains ASHIFT, so it is not
11617 strictly valid, but still used for computing length of lea instruction. */
11620 ix86_decompose_address (rtx addr, struct ix86_address *out)
11622 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
11623 rtx base_reg, index_reg;
11624 HOST_WIDE_INT scale = 1;
11625 rtx scale_rtx = NULL_RTX;
11626 rtx tmp;
11627 int retval = 1;
11628 enum ix86_address_seg seg = SEG_DEFAULT;
11630 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
11631 base = addr;
11632 else if (GET_CODE (addr) == PLUS)
11634 rtx addends[4], op;
11635 int n = 0, i;
11637 op = addr;
11640 if (n >= 4)
11641 return 0;
11642 addends[n++] = XEXP (op, 1);
11643 op = XEXP (op, 0);
11645 while (GET_CODE (op) == PLUS);
11646 if (n >= 4)
11647 return 0;
11648 addends[n] = op;
11650 for (i = n; i >= 0; --i)
11652 op = addends[i];
11653 switch (GET_CODE (op))
11655 case MULT:
11656 if (index)
11657 return 0;
11658 index = XEXP (op, 0);
11659 scale_rtx = XEXP (op, 1);
11660 break;
11662 case ASHIFT:
11663 if (index)
11664 return 0;
11665 index = XEXP (op, 0);
11666 tmp = XEXP (op, 1);
11667 if (!CONST_INT_P (tmp))
11668 return 0;
11669 scale = INTVAL (tmp);
11670 if ((unsigned HOST_WIDE_INT) scale > 3)
11671 return 0;
11672 scale = 1 << scale;
11673 break;
11675 case UNSPEC:
11676 if (XINT (op, 1) == UNSPEC_TP
11677 && TARGET_TLS_DIRECT_SEG_REFS
11678 && seg == SEG_DEFAULT)
11679 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
11680 else
11681 return 0;
11682 break;
11684 case REG:
11685 case SUBREG:
11686 if (!base)
11687 base = op;
11688 else if (!index)
11689 index = op;
11690 else
11691 return 0;
11692 break;
11694 case CONST:
11695 case CONST_INT:
11696 case SYMBOL_REF:
11697 case LABEL_REF:
11698 if (disp)
11699 return 0;
11700 disp = op;
11701 break;
11703 default:
11704 return 0;
11708 else if (GET_CODE (addr) == MULT)
11710 index = XEXP (addr, 0); /* index*scale */
11711 scale_rtx = XEXP (addr, 1);
11713 else if (GET_CODE (addr) == ASHIFT)
11715 /* We're called for lea too, which implements ashift on occasion. */
11716 index = XEXP (addr, 0);
11717 tmp = XEXP (addr, 1);
11718 if (!CONST_INT_P (tmp))
11719 return 0;
11720 scale = INTVAL (tmp);
11721 if ((unsigned HOST_WIDE_INT) scale > 3)
11722 return 0;
11723 scale = 1 << scale;
11724 retval = -1;
11726 else
11727 disp = addr; /* displacement */
11729 /* Extract the integral value of scale. */
11730 if (scale_rtx)
11732 if (!CONST_INT_P (scale_rtx))
11733 return 0;
11734 scale = INTVAL (scale_rtx);
11737 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
11738 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
11740 /* Avoid useless 0 displacement. */
11741 if (disp == const0_rtx && (base || index))
11742 disp = NULL_RTX;
11744 /* Allow arg pointer and stack pointer as index if there is not scaling. */
11745 if (base_reg && index_reg && scale == 1
11746 && (index_reg == arg_pointer_rtx
11747 || index_reg == frame_pointer_rtx
11748 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
11750 rtx tmp;
11751 tmp = base, base = index, index = tmp;
11752 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
11755 /* Special case: %ebp cannot be encoded as a base without a displacement.
11756 Similarly %r13. */
11757 if (!disp
11758 && base_reg
11759 && (base_reg == hard_frame_pointer_rtx
11760 || base_reg == frame_pointer_rtx
11761 || base_reg == arg_pointer_rtx
11762 || (REG_P (base_reg)
11763 && (REGNO (base_reg) == HARD_FRAME_POINTER_REGNUM
11764 || REGNO (base_reg) == R13_REG))))
11765 disp = const0_rtx;
11767 /* Special case: on K6, [%esi] makes the instruction vector decoded.
11768 Avoid this by transforming to [%esi+0].
11769 Reload calls address legitimization without cfun defined, so we need
11770 to test cfun for being non-NULL. */
11771 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
11772 && base_reg && !index_reg && !disp
11773 && REG_P (base_reg) && REGNO (base_reg) == SI_REG)
11774 disp = const0_rtx;
11776 /* Special case: encode reg+reg instead of reg*2. */
11777 if (!base && index && scale == 2)
11778 base = index, base_reg = index_reg, scale = 1;
11780 /* Special case: scaling cannot be encoded without base or displacement. */
11781 if (!base && !disp && index && scale != 1)
11782 disp = const0_rtx;
11784 out->base = base;
11785 out->index = index;
11786 out->disp = disp;
11787 out->scale = scale;
11788 out->seg = seg;
11790 return retval;
11793 /* Return cost of the memory address x.
11794 For i386, it is better to use a complex address than let gcc copy
11795 the address into a reg and make a new pseudo. But not if the address
11796 requires to two regs - that would mean more pseudos with longer
11797 lifetimes. */
11798 static int
11799 ix86_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
11801 struct ix86_address parts;
11802 int cost = 1;
11803 int ok = ix86_decompose_address (x, &parts);
11805 gcc_assert (ok);
11807 if (parts.base && GET_CODE (parts.base) == SUBREG)
11808 parts.base = SUBREG_REG (parts.base);
11809 if (parts.index && GET_CODE (parts.index) == SUBREG)
11810 parts.index = SUBREG_REG (parts.index);
11812 /* Attempt to minimize number of registers in the address. */
11813 if ((parts.base
11814 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
11815 || (parts.index
11816 && (!REG_P (parts.index)
11817 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
11818 cost++;
11820 if (parts.base
11821 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
11822 && parts.index
11823 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
11824 && parts.base != parts.index)
11825 cost++;
11827 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
11828 since it's predecode logic can't detect the length of instructions
11829 and it degenerates to vector decoded. Increase cost of such
11830 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
11831 to split such addresses or even refuse such addresses at all.
11833 Following addressing modes are affected:
11834 [base+scale*index]
11835 [scale*index+disp]
11836 [base+index]
11838 The first and last case may be avoidable by explicitly coding the zero in
11839 memory address, but I don't have AMD-K6 machine handy to check this
11840 theory. */
11842 if (TARGET_K6
11843 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
11844 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
11845 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
11846 cost += 10;
11848 return cost;
11851 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
11852 this is used for to form addresses to local data when -fPIC is in
11853 use. */
11855 static bool
11856 darwin_local_data_pic (rtx disp)
11858 return (GET_CODE (disp) == UNSPEC
11859 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
11862 /* Determine if a given RTX is a valid constant. We already know this
11863 satisfies CONSTANT_P. */
11865 static bool
11866 ix86_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
11868 switch (GET_CODE (x))
11870 case CONST:
11871 x = XEXP (x, 0);
11873 if (GET_CODE (x) == PLUS)
11875 if (!CONST_INT_P (XEXP (x, 1)))
11876 return false;
11877 x = XEXP (x, 0);
11880 if (TARGET_MACHO && darwin_local_data_pic (x))
11881 return true;
11883 /* Only some unspecs are valid as "constants". */
11884 if (GET_CODE (x) == UNSPEC)
11885 switch (XINT (x, 1))
11887 case UNSPEC_GOT:
11888 case UNSPEC_GOTOFF:
11889 case UNSPEC_PLTOFF:
11890 return TARGET_64BIT;
11891 case UNSPEC_TPOFF:
11892 case UNSPEC_NTPOFF:
11893 x = XVECEXP (x, 0, 0);
11894 return (GET_CODE (x) == SYMBOL_REF
11895 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
11896 case UNSPEC_DTPOFF:
11897 x = XVECEXP (x, 0, 0);
11898 return (GET_CODE (x) == SYMBOL_REF
11899 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
11900 default:
11901 return false;
11904 /* We must have drilled down to a symbol. */
11905 if (GET_CODE (x) == LABEL_REF)
11906 return true;
11907 if (GET_CODE (x) != SYMBOL_REF)
11908 return false;
11909 /* FALLTHRU */
11911 case SYMBOL_REF:
11912 /* TLS symbols are never valid. */
11913 if (SYMBOL_REF_TLS_MODEL (x))
11914 return false;
11916 /* DLLIMPORT symbols are never valid. */
11917 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
11918 && SYMBOL_REF_DLLIMPORT_P (x))
11919 return false;
11921 #if TARGET_MACHO
11922 /* mdynamic-no-pic */
11923 if (MACHO_DYNAMIC_NO_PIC_P)
11924 return machopic_symbol_defined_p (x);
11925 #endif
11926 break;
11928 case CONST_DOUBLE:
11929 if (GET_MODE (x) == TImode
11930 && x != CONST0_RTX (TImode)
11931 && !TARGET_64BIT)
11932 return false;
11933 break;
11935 case CONST_VECTOR:
11936 if (!standard_sse_constant_p (x))
11937 return false;
11939 default:
11940 break;
11943 /* Otherwise we handle everything else in the move patterns. */
11944 return true;
11947 /* Determine if it's legal to put X into the constant pool. This
11948 is not possible for the address of thread-local symbols, which
11949 is checked above. */
11951 static bool
11952 ix86_cannot_force_const_mem (enum machine_mode mode, rtx x)
11954 /* We can always put integral constants and vectors in memory. */
11955 switch (GET_CODE (x))
11957 case CONST_INT:
11958 case CONST_DOUBLE:
11959 case CONST_VECTOR:
11960 return false;
11962 default:
11963 break;
11965 return !ix86_legitimate_constant_p (mode, x);
11969 /* Nonzero if the constant value X is a legitimate general operand
11970 when generating PIC code. It is given that flag_pic is on and
11971 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
11973 bool
11974 legitimate_pic_operand_p (rtx x)
11976 rtx inner;
11978 switch (GET_CODE (x))
11980 case CONST:
11981 inner = XEXP (x, 0);
11982 if (GET_CODE (inner) == PLUS
11983 && CONST_INT_P (XEXP (inner, 1)))
11984 inner = XEXP (inner, 0);
11986 /* Only some unspecs are valid as "constants". */
11987 if (GET_CODE (inner) == UNSPEC)
11988 switch (XINT (inner, 1))
11990 case UNSPEC_GOT:
11991 case UNSPEC_GOTOFF:
11992 case UNSPEC_PLTOFF:
11993 return TARGET_64BIT;
11994 case UNSPEC_TPOFF:
11995 x = XVECEXP (inner, 0, 0);
11996 return (GET_CODE (x) == SYMBOL_REF
11997 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
11998 case UNSPEC_MACHOPIC_OFFSET:
11999 return legitimate_pic_address_disp_p (x);
12000 default:
12001 return false;
12003 /* FALLTHRU */
12005 case SYMBOL_REF:
12006 case LABEL_REF:
12007 return legitimate_pic_address_disp_p (x);
12009 default:
12010 return true;
12014 /* Determine if a given CONST RTX is a valid memory displacement
12015 in PIC mode. */
12017 bool
12018 legitimate_pic_address_disp_p (rtx disp)
12020 bool saw_plus;
12022 /* In 64bit mode we can allow direct addresses of symbols and labels
12023 when they are not dynamic symbols. */
12024 if (TARGET_64BIT)
12026 rtx op0 = disp, op1;
12028 switch (GET_CODE (disp))
12030 case LABEL_REF:
12031 return true;
12033 case CONST:
12034 if (GET_CODE (XEXP (disp, 0)) != PLUS)
12035 break;
12036 op0 = XEXP (XEXP (disp, 0), 0);
12037 op1 = XEXP (XEXP (disp, 0), 1);
12038 if (!CONST_INT_P (op1)
12039 || INTVAL (op1) >= 16*1024*1024
12040 || INTVAL (op1) < -16*1024*1024)
12041 break;
12042 if (GET_CODE (op0) == LABEL_REF)
12043 return true;
12044 if (GET_CODE (op0) != SYMBOL_REF)
12045 break;
12046 /* FALLTHRU */
12048 case SYMBOL_REF:
12049 /* TLS references should always be enclosed in UNSPEC. */
12050 if (SYMBOL_REF_TLS_MODEL (op0))
12051 return false;
12052 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
12053 && ix86_cmodel != CM_LARGE_PIC)
12054 return true;
12055 break;
12057 default:
12058 break;
12061 if (GET_CODE (disp) != CONST)
12062 return false;
12063 disp = XEXP (disp, 0);
12065 if (TARGET_64BIT)
12067 /* We are unsafe to allow PLUS expressions. This limit allowed distance
12068 of GOT tables. We should not need these anyway. */
12069 if (GET_CODE (disp) != UNSPEC
12070 || (XINT (disp, 1) != UNSPEC_GOTPCREL
12071 && XINT (disp, 1) != UNSPEC_GOTOFF
12072 && XINT (disp, 1) != UNSPEC_PCREL
12073 && XINT (disp, 1) != UNSPEC_PLTOFF))
12074 return false;
12076 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
12077 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
12078 return false;
12079 return true;
12082 saw_plus = false;
12083 if (GET_CODE (disp) == PLUS)
12085 if (!CONST_INT_P (XEXP (disp, 1)))
12086 return false;
12087 disp = XEXP (disp, 0);
12088 saw_plus = true;
12091 if (TARGET_MACHO && darwin_local_data_pic (disp))
12092 return true;
12094 if (GET_CODE (disp) != UNSPEC)
12095 return false;
12097 switch (XINT (disp, 1))
12099 case UNSPEC_GOT:
12100 if (saw_plus)
12101 return false;
12102 /* We need to check for both symbols and labels because VxWorks loads
12103 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
12104 details. */
12105 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
12106 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
12107 case UNSPEC_GOTOFF:
12108 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
12109 While ABI specify also 32bit relocation but we don't produce it in
12110 small PIC model at all. */
12111 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
12112 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
12113 && !TARGET_64BIT)
12114 return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
12115 return false;
12116 case UNSPEC_GOTTPOFF:
12117 case UNSPEC_GOTNTPOFF:
12118 case UNSPEC_INDNTPOFF:
12119 if (saw_plus)
12120 return false;
12121 disp = XVECEXP (disp, 0, 0);
12122 return (GET_CODE (disp) == SYMBOL_REF
12123 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
12124 case UNSPEC_NTPOFF:
12125 disp = XVECEXP (disp, 0, 0);
12126 return (GET_CODE (disp) == SYMBOL_REF
12127 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
12128 case UNSPEC_DTPOFF:
12129 disp = XVECEXP (disp, 0, 0);
12130 return (GET_CODE (disp) == SYMBOL_REF
12131 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
12134 return false;
12137 /* Recognizes RTL expressions that are valid memory addresses for an
12138 instruction. The MODE argument is the machine mode for the MEM
12139 expression that wants to use this address.
12141 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
12142 convert common non-canonical forms to canonical form so that they will
12143 be recognized. */
12145 static bool
12146 ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
12147 rtx addr, bool strict)
12149 struct ix86_address parts;
12150 rtx base, index, disp;
12151 HOST_WIDE_INT scale;
12153 if (ix86_decompose_address (addr, &parts) <= 0)
12154 /* Decomposition failed. */
12155 return false;
12157 base = parts.base;
12158 index = parts.index;
12159 disp = parts.disp;
12160 scale = parts.scale;
12162 /* Validate base register.
12164 Don't allow SUBREG's that span more than a word here. It can lead to spill
12165 failures when the base is one word out of a two word structure, which is
12166 represented internally as a DImode int. */
12168 if (base)
12170 rtx reg;
12172 if (REG_P (base))
12173 reg = base;
12174 else if (GET_CODE (base) == SUBREG
12175 && REG_P (SUBREG_REG (base))
12176 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
12177 <= UNITS_PER_WORD)
12178 reg = SUBREG_REG (base);
12179 else
12180 /* Base is not a register. */
12181 return false;
12183 if (GET_MODE (base) != Pmode)
12184 /* Base is not in Pmode. */
12185 return false;
12187 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
12188 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
12189 /* Base is not valid. */
12190 return false;
12193 /* Validate index register.
12195 Don't allow SUBREG's that span more than a word here -- same as above. */
12197 if (index)
12199 rtx reg;
12201 if (REG_P (index))
12202 reg = index;
12203 else if (GET_CODE (index) == SUBREG
12204 && REG_P (SUBREG_REG (index))
12205 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
12206 <= UNITS_PER_WORD)
12207 reg = SUBREG_REG (index);
12208 else
12209 /* Index is not a register. */
12210 return false;
12212 if (GET_MODE (index) != Pmode)
12213 /* Index is not in Pmode. */
12214 return false;
12216 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
12217 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
12218 /* Index is not valid. */
12219 return false;
12222 /* Validate scale factor. */
12223 if (scale != 1)
12225 if (!index)
12226 /* Scale without index. */
12227 return false;
12229 if (scale != 2 && scale != 4 && scale != 8)
12230 /* Scale is not a valid multiplier. */
12231 return false;
12234 /* Validate displacement. */
12235 if (disp)
12237 if (GET_CODE (disp) == CONST
12238 && GET_CODE (XEXP (disp, 0)) == UNSPEC
12239 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
12240 switch (XINT (XEXP (disp, 0), 1))
12242 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
12243 used. While ABI specify also 32bit relocations, we don't produce
12244 them at all and use IP relative instead. */
12245 case UNSPEC_GOT:
12246 case UNSPEC_GOTOFF:
12247 gcc_assert (flag_pic);
12248 if (!TARGET_64BIT)
12249 goto is_legitimate_pic;
12251 /* 64bit address unspec. */
12252 return false;
12254 case UNSPEC_GOTPCREL:
12255 case UNSPEC_PCREL:
12256 gcc_assert (flag_pic);
12257 goto is_legitimate_pic;
12259 case UNSPEC_GOTTPOFF:
12260 case UNSPEC_GOTNTPOFF:
12261 case UNSPEC_INDNTPOFF:
12262 case UNSPEC_NTPOFF:
12263 case UNSPEC_DTPOFF:
12264 break;
12266 case UNSPEC_STACK_CHECK:
12267 gcc_assert (flag_split_stack);
12268 break;
12270 default:
12271 /* Invalid address unspec. */
12272 return false;
12275 else if (SYMBOLIC_CONST (disp)
12276 && (flag_pic
12277 || (TARGET_MACHO
12278 #if TARGET_MACHO
12279 && MACHOPIC_INDIRECT
12280 && !machopic_operand_p (disp)
12281 #endif
12285 is_legitimate_pic:
12286 if (TARGET_64BIT && (index || base))
12288 /* foo@dtpoff(%rX) is ok. */
12289 if (GET_CODE (disp) != CONST
12290 || GET_CODE (XEXP (disp, 0)) != PLUS
12291 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
12292 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
12293 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
12294 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
12295 /* Non-constant pic memory reference. */
12296 return false;
12298 else if ((!TARGET_MACHO || flag_pic)
12299 && ! legitimate_pic_address_disp_p (disp))
12300 /* Displacement is an invalid pic construct. */
12301 return false;
12302 #if TARGET_MACHO
12303 else if (MACHO_DYNAMIC_NO_PIC_P
12304 && !ix86_legitimate_constant_p (Pmode, disp))
12305 /* displacment must be referenced via non_lazy_pointer */
12306 return false;
12307 #endif
12309 /* This code used to verify that a symbolic pic displacement
12310 includes the pic_offset_table_rtx register.
12312 While this is good idea, unfortunately these constructs may
12313 be created by "adds using lea" optimization for incorrect
12314 code like:
12316 int a;
12317 int foo(int i)
12319 return *(&a+i);
12322 This code is nonsensical, but results in addressing
12323 GOT table with pic_offset_table_rtx base. We can't
12324 just refuse it easily, since it gets matched by
12325 "addsi3" pattern, that later gets split to lea in the
12326 case output register differs from input. While this
12327 can be handled by separate addsi pattern for this case
12328 that never results in lea, this seems to be easier and
12329 correct fix for crash to disable this test. */
12331 else if (GET_CODE (disp) != LABEL_REF
12332 && !CONST_INT_P (disp)
12333 && (GET_CODE (disp) != CONST
12334 || !ix86_legitimate_constant_p (Pmode, disp))
12335 && (GET_CODE (disp) != SYMBOL_REF
12336 || !ix86_legitimate_constant_p (Pmode, disp)))
12337 /* Displacement is not constant. */
12338 return false;
12339 else if (TARGET_64BIT
12340 && !x86_64_immediate_operand (disp, VOIDmode))
12341 /* Displacement is out of range. */
12342 return false;
12345 /* Everything looks valid. */
12346 return true;
12349 /* Determine if a given RTX is a valid constant address. */
12351 bool
12352 constant_address_p (rtx x)
12354 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
12357 /* Return a unique alias set for the GOT. */
12359 static alias_set_type
12360 ix86_GOT_alias_set (void)
12362 static alias_set_type set = -1;
12363 if (set == -1)
12364 set = new_alias_set ();
12365 return set;
12368 /* Return a legitimate reference for ORIG (an address) using the
12369 register REG. If REG is 0, a new pseudo is generated.
12371 There are two types of references that must be handled:
12373 1. Global data references must load the address from the GOT, via
12374 the PIC reg. An insn is emitted to do this load, and the reg is
12375 returned.
12377 2. Static data references, constant pool addresses, and code labels
12378 compute the address as an offset from the GOT, whose base is in
12379 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
12380 differentiate them from global data objects. The returned
12381 address is the PIC reg + an unspec constant.
12383 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
12384 reg also appears in the address. */
12386 static rtx
12387 legitimize_pic_address (rtx orig, rtx reg)
12389 rtx addr = orig;
12390 rtx new_rtx = orig;
12391 rtx base;
12393 #if TARGET_MACHO
12394 if (TARGET_MACHO && !TARGET_64BIT)
12396 if (reg == 0)
12397 reg = gen_reg_rtx (Pmode);
12398 /* Use the generic Mach-O PIC machinery. */
12399 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
12401 #endif
12403 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
12404 new_rtx = addr;
12405 else if (TARGET_64BIT
12406 && ix86_cmodel != CM_SMALL_PIC
12407 && gotoff_operand (addr, Pmode))
12409 rtx tmpreg;
12410 /* This symbol may be referenced via a displacement from the PIC
12411 base address (@GOTOFF). */
12413 if (reload_in_progress)
12414 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12415 if (GET_CODE (addr) == CONST)
12416 addr = XEXP (addr, 0);
12417 if (GET_CODE (addr) == PLUS)
12419 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
12420 UNSPEC_GOTOFF);
12421 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
12423 else
12424 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
12425 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12426 if (!reg)
12427 tmpreg = gen_reg_rtx (Pmode);
12428 else
12429 tmpreg = reg;
12430 emit_move_insn (tmpreg, new_rtx);
12432 if (reg != 0)
12434 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
12435 tmpreg, 1, OPTAB_DIRECT);
12436 new_rtx = reg;
12438 else new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
12440 else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
12442 /* This symbol may be referenced via a displacement from the PIC
12443 base address (@GOTOFF). */
12445 if (reload_in_progress)
12446 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12447 if (GET_CODE (addr) == CONST)
12448 addr = XEXP (addr, 0);
12449 if (GET_CODE (addr) == PLUS)
12451 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
12452 UNSPEC_GOTOFF);
12453 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
12455 else
12456 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
12457 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12458 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
12460 if (reg != 0)
12462 emit_move_insn (reg, new_rtx);
12463 new_rtx = reg;
12466 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
12467 /* We can't use @GOTOFF for text labels on VxWorks;
12468 see gotoff_operand. */
12469 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
12471 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
12473 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
12474 return legitimize_dllimport_symbol (addr, true);
12475 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
12476 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
12477 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
12479 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), true);
12480 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
12484 /* For x64 PE-COFF there is no GOT table. So we use address
12485 directly. */
12486 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
12488 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL);
12489 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12491 if (reg == 0)
12492 reg = gen_reg_rtx (Pmode);
12493 emit_move_insn (reg, new_rtx);
12494 new_rtx = reg;
12496 else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
12498 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
12499 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12500 new_rtx = gen_const_mem (Pmode, new_rtx);
12501 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
12503 if (reg == 0)
12504 reg = gen_reg_rtx (Pmode);
12505 /* Use directly gen_movsi, otherwise the address is loaded
12506 into register for CSE. We don't want to CSE this addresses,
12507 instead we CSE addresses from the GOT table, so skip this. */
12508 emit_insn (gen_movsi (reg, new_rtx));
12509 new_rtx = reg;
12511 else
12513 /* This symbol must be referenced via a load from the
12514 Global Offset Table (@GOT). */
12516 if (reload_in_progress)
12517 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12518 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
12519 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12520 if (TARGET_64BIT)
12521 new_rtx = force_reg (Pmode, new_rtx);
12522 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
12523 new_rtx = gen_const_mem (Pmode, new_rtx);
12524 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
12526 if (reg == 0)
12527 reg = gen_reg_rtx (Pmode);
12528 emit_move_insn (reg, new_rtx);
12529 new_rtx = reg;
12532 else
12534 if (CONST_INT_P (addr)
12535 && !x86_64_immediate_operand (addr, VOIDmode))
12537 if (reg)
12539 emit_move_insn (reg, addr);
12540 new_rtx = reg;
12542 else
12543 new_rtx = force_reg (Pmode, addr);
12545 else if (GET_CODE (addr) == CONST)
12547 addr = XEXP (addr, 0);
12549 /* We must match stuff we generate before. Assume the only
12550 unspecs that can get here are ours. Not that we could do
12551 anything with them anyway.... */
12552 if (GET_CODE (addr) == UNSPEC
12553 || (GET_CODE (addr) == PLUS
12554 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
12555 return orig;
12556 gcc_assert (GET_CODE (addr) == PLUS);
12558 if (GET_CODE (addr) == PLUS)
12560 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
12562 /* Check first to see if this is a constant offset from a @GOTOFF
12563 symbol reference. */
12564 if (gotoff_operand (op0, Pmode)
12565 && CONST_INT_P (op1))
12567 if (!TARGET_64BIT)
12569 if (reload_in_progress)
12570 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12571 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
12572 UNSPEC_GOTOFF);
12573 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
12574 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12575 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
12577 if (reg != 0)
12579 emit_move_insn (reg, new_rtx);
12580 new_rtx = reg;
12583 else
12585 if (INTVAL (op1) < -16*1024*1024
12586 || INTVAL (op1) >= 16*1024*1024)
12588 if (!x86_64_immediate_operand (op1, Pmode))
12589 op1 = force_reg (Pmode, op1);
12590 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
12594 else
12596 base = legitimize_pic_address (XEXP (addr, 0), reg);
12597 new_rtx = legitimize_pic_address (XEXP (addr, 1),
12598 base == reg ? NULL_RTX : reg);
12600 if (CONST_INT_P (new_rtx))
12601 new_rtx = plus_constant (base, INTVAL (new_rtx));
12602 else
12604 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
12606 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
12607 new_rtx = XEXP (new_rtx, 1);
12609 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
12614 return new_rtx;
12617 /* Load the thread pointer. If TO_REG is true, force it into a register. */
12619 static rtx
12620 get_thread_pointer (bool to_reg)
12622 rtx tp, reg, insn;
12624 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
12625 if (!to_reg)
12626 return tp;
12628 reg = gen_reg_rtx (Pmode);
12629 insn = gen_rtx_SET (VOIDmode, reg, tp);
12630 insn = emit_insn (insn);
12632 return reg;
12635 /* Construct the SYMBOL_REF for the tls_get_addr function. */
12637 static GTY(()) rtx ix86_tls_symbol;
12639 static rtx
12640 ix86_tls_get_addr (void)
12642 if (!ix86_tls_symbol)
12644 const char *sym
12645 = ((TARGET_ANY_GNU_TLS && !TARGET_64BIT)
12646 ? "___tls_get_addr" : "__tls_get_addr");
12648 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym);
12651 return ix86_tls_symbol;
12654 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
12656 static GTY(()) rtx ix86_tls_module_base_symbol;
12659 ix86_tls_module_base (void)
12661 if (!ix86_tls_module_base_symbol)
12663 ix86_tls_module_base_symbol
12664 = gen_rtx_SYMBOL_REF (Pmode, "_TLS_MODULE_BASE_");
12666 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
12667 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
12670 return ix86_tls_module_base_symbol;
12673 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
12674 false if we expect this to be used for a memory address and true if
12675 we expect to load the address into a register. */
12677 static rtx
12678 legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
12680 rtx dest, base, off;
12681 rtx pic = NULL_RTX, tp = NULL_RTX;
12682 int type;
12684 switch (model)
12686 case TLS_MODEL_GLOBAL_DYNAMIC:
12687 dest = gen_reg_rtx (Pmode);
12689 if (!TARGET_64BIT)
12691 if (flag_pic)
12692 pic = pic_offset_table_rtx;
12693 else
12695 pic = gen_reg_rtx (Pmode);
12696 emit_insn (gen_set_got (pic));
12700 if (TARGET_GNU2_TLS)
12702 if (TARGET_64BIT)
12703 emit_insn (gen_tls_dynamic_gnu2_64 (dest, x));
12704 else
12705 emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic));
12707 tp = get_thread_pointer (true);
12708 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
12710 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
12712 else
12714 rtx caddr = ix86_tls_get_addr ();
12716 if (TARGET_64BIT)
12718 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns;
12720 start_sequence ();
12721 emit_call_insn (gen_tls_global_dynamic_64 (rax, x, caddr));
12722 insns = get_insns ();
12723 end_sequence ();
12725 RTL_CONST_CALL_P (insns) = 1;
12726 emit_libcall_block (insns, dest, rax, x);
12728 else
12729 emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr));
12731 break;
12733 case TLS_MODEL_LOCAL_DYNAMIC:
12734 base = gen_reg_rtx (Pmode);
12736 if (!TARGET_64BIT)
12738 if (flag_pic)
12739 pic = pic_offset_table_rtx;
12740 else
12742 pic = gen_reg_rtx (Pmode);
12743 emit_insn (gen_set_got (pic));
12747 if (TARGET_GNU2_TLS)
12749 rtx tmp = ix86_tls_module_base ();
12751 if (TARGET_64BIT)
12752 emit_insn (gen_tls_dynamic_gnu2_64 (base, tmp));
12753 else
12754 emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic));
12756 tp = get_thread_pointer (true);
12757 set_unique_reg_note (get_last_insn (), REG_EQUIV,
12758 gen_rtx_MINUS (Pmode, tmp, tp));
12760 else
12762 rtx caddr = ix86_tls_get_addr ();
12764 if (TARGET_64BIT)
12766 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns, eqv;
12768 start_sequence ();
12769 emit_call_insn (gen_tls_local_dynamic_base_64 (rax, caddr));
12770 insns = get_insns ();
12771 end_sequence ();
12773 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
12774 share the LD_BASE result with other LD model accesses. */
12775 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
12776 UNSPEC_TLS_LD_BASE);
12778 RTL_CONST_CALL_P (insns) = 1;
12779 emit_libcall_block (insns, base, rax, eqv);
12781 else
12782 emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr));
12785 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
12786 off = gen_rtx_CONST (Pmode, off);
12788 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
12790 if (TARGET_GNU2_TLS)
12792 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
12794 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
12796 break;
12798 case TLS_MODEL_INITIAL_EXEC:
12799 if (TARGET_64BIT)
12801 if (TARGET_SUN_TLS)
12803 /* The Sun linker took the AMD64 TLS spec literally
12804 and can only handle %rax as destination of the
12805 initial executable code sequence. */
12807 dest = gen_reg_rtx (Pmode);
12808 emit_insn (gen_tls_initial_exec_64_sun (dest, x));
12809 return dest;
12812 pic = NULL;
12813 type = UNSPEC_GOTNTPOFF;
12815 else if (flag_pic)
12817 if (reload_in_progress)
12818 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12819 pic = pic_offset_table_rtx;
12820 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
12822 else if (!TARGET_ANY_GNU_TLS)
12824 pic = gen_reg_rtx (Pmode);
12825 emit_insn (gen_set_got (pic));
12826 type = UNSPEC_GOTTPOFF;
12828 else
12830 pic = NULL;
12831 type = UNSPEC_INDNTPOFF;
12834 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
12835 off = gen_rtx_CONST (Pmode, off);
12836 if (pic)
12837 off = gen_rtx_PLUS (Pmode, pic, off);
12838 off = gen_const_mem (Pmode, off);
12839 set_mem_alias_set (off, ix86_GOT_alias_set ());
12841 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
12843 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
12844 off = force_reg (Pmode, off);
12845 return gen_rtx_PLUS (Pmode, base, off);
12847 else
12849 base = get_thread_pointer (true);
12850 dest = gen_reg_rtx (Pmode);
12851 emit_insn (gen_subsi3 (dest, base, off));
12853 break;
12855 case TLS_MODEL_LOCAL_EXEC:
12856 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
12857 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
12858 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
12859 off = gen_rtx_CONST (Pmode, off);
12861 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
12863 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
12864 return gen_rtx_PLUS (Pmode, base, off);
12866 else
12868 base = get_thread_pointer (true);
12869 dest = gen_reg_rtx (Pmode);
12870 emit_insn (gen_subsi3 (dest, base, off));
12872 break;
12874 default:
12875 gcc_unreachable ();
12878 return dest;
12881 /* Create or return the unique __imp_DECL dllimport symbol corresponding
12882 to symbol DECL. */
12884 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
12885 htab_t dllimport_map;
12887 static tree
12888 get_dllimport_decl (tree decl)
12890 struct tree_map *h, in;
12891 void **loc;
12892 const char *name;
12893 const char *prefix;
12894 size_t namelen, prefixlen;
12895 char *imp_name;
12896 tree to;
12897 rtx rtl;
12899 if (!dllimport_map)
12900 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
12902 in.hash = htab_hash_pointer (decl);
12903 in.base.from = decl;
12904 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
12905 h = (struct tree_map *) *loc;
12906 if (h)
12907 return h->to;
12909 *loc = h = ggc_alloc_tree_map ();
12910 h->hash = in.hash;
12911 h->base.from = decl;
12912 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
12913 VAR_DECL, NULL, ptr_type_node);
12914 DECL_ARTIFICIAL (to) = 1;
12915 DECL_IGNORED_P (to) = 1;
12916 DECL_EXTERNAL (to) = 1;
12917 TREE_READONLY (to) = 1;
12919 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
12920 name = targetm.strip_name_encoding (name);
12921 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
12922 ? "*__imp_" : "*__imp__";
12923 namelen = strlen (name);
12924 prefixlen = strlen (prefix);
12925 imp_name = (char *) alloca (namelen + prefixlen + 1);
12926 memcpy (imp_name, prefix, prefixlen);
12927 memcpy (imp_name + prefixlen, name, namelen + 1);
12929 name = ggc_alloc_string (imp_name, namelen + prefixlen);
12930 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
12931 SET_SYMBOL_REF_DECL (rtl, to);
12932 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL;
12934 rtl = gen_const_mem (Pmode, rtl);
12935 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
12937 SET_DECL_RTL (to, rtl);
12938 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
12940 return to;
12943 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
12944 true if we require the result be a register. */
12946 static rtx
12947 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
12949 tree imp_decl;
12950 rtx x;
12952 gcc_assert (SYMBOL_REF_DECL (symbol));
12953 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol));
12955 x = DECL_RTL (imp_decl);
12956 if (want_reg)
12957 x = force_reg (Pmode, x);
12958 return x;
12961 /* Try machine-dependent ways of modifying an illegitimate address
12962 to be legitimate. If we find one, return the new, valid address.
12963 This macro is used in only one place: `memory_address' in explow.c.
12965 OLDX is the address as it was before break_out_memory_refs was called.
12966 In some cases it is useful to look at this to decide what needs to be done.
12968 It is always safe for this macro to do nothing. It exists to recognize
12969 opportunities to optimize the output.
12971 For the 80386, we handle X+REG by loading X into a register R and
12972 using R+REG. R will go in a general reg and indexing will be used.
12973 However, if REG is a broken-out memory address or multiplication,
12974 nothing needs to be done because REG can certainly go in a general reg.
12976 When -fpic is used, special handling is needed for symbolic references.
12977 See comments by legitimize_pic_address in i386.c for details. */
12979 static rtx
12980 ix86_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
12981 enum machine_mode mode)
12983 int changed = 0;
12984 unsigned log;
12986 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
12987 if (log)
12988 return legitimize_tls_address (x, (enum tls_model) log, false);
12989 if (GET_CODE (x) == CONST
12990 && GET_CODE (XEXP (x, 0)) == PLUS
12991 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
12992 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
12994 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
12995 (enum tls_model) log, false);
12996 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
12999 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
13001 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (x))
13002 return legitimize_dllimport_symbol (x, true);
13003 if (GET_CODE (x) == CONST
13004 && GET_CODE (XEXP (x, 0)) == PLUS
13005 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
13006 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x, 0), 0)))
13008 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (x, 0), 0), true);
13009 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
13013 if (flag_pic && SYMBOLIC_CONST (x))
13014 return legitimize_pic_address (x, 0);
13016 #if TARGET_MACHO
13017 if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x))
13018 return machopic_indirect_data_reference (x, 0);
13019 #endif
13021 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
13022 if (GET_CODE (x) == ASHIFT
13023 && CONST_INT_P (XEXP (x, 1))
13024 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
13026 changed = 1;
13027 log = INTVAL (XEXP (x, 1));
13028 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
13029 GEN_INT (1 << log));
13032 if (GET_CODE (x) == PLUS)
13034 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
13036 if (GET_CODE (XEXP (x, 0)) == ASHIFT
13037 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
13038 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
13040 changed = 1;
13041 log = INTVAL (XEXP (XEXP (x, 0), 1));
13042 XEXP (x, 0) = gen_rtx_MULT (Pmode,
13043 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
13044 GEN_INT (1 << log));
13047 if (GET_CODE (XEXP (x, 1)) == ASHIFT
13048 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
13049 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
13051 changed = 1;
13052 log = INTVAL (XEXP (XEXP (x, 1), 1));
13053 XEXP (x, 1) = gen_rtx_MULT (Pmode,
13054 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
13055 GEN_INT (1 << log));
13058 /* Put multiply first if it isn't already. */
13059 if (GET_CODE (XEXP (x, 1)) == MULT)
13061 rtx tmp = XEXP (x, 0);
13062 XEXP (x, 0) = XEXP (x, 1);
13063 XEXP (x, 1) = tmp;
13064 changed = 1;
13067 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
13068 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
13069 created by virtual register instantiation, register elimination, and
13070 similar optimizations. */
13071 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
13073 changed = 1;
13074 x = gen_rtx_PLUS (Pmode,
13075 gen_rtx_PLUS (Pmode, XEXP (x, 0),
13076 XEXP (XEXP (x, 1), 0)),
13077 XEXP (XEXP (x, 1), 1));
13080 /* Canonicalize
13081 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
13082 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
13083 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
13084 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
13085 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
13086 && CONSTANT_P (XEXP (x, 1)))
13088 rtx constant;
13089 rtx other = NULL_RTX;
13091 if (CONST_INT_P (XEXP (x, 1)))
13093 constant = XEXP (x, 1);
13094 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
13096 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
13098 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
13099 other = XEXP (x, 1);
13101 else
13102 constant = 0;
13104 if (constant)
13106 changed = 1;
13107 x = gen_rtx_PLUS (Pmode,
13108 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
13109 XEXP (XEXP (XEXP (x, 0), 1), 0)),
13110 plus_constant (other, INTVAL (constant)));
13114 if (changed && ix86_legitimate_address_p (mode, x, false))
13115 return x;
13117 if (GET_CODE (XEXP (x, 0)) == MULT)
13119 changed = 1;
13120 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
13123 if (GET_CODE (XEXP (x, 1)) == MULT)
13125 changed = 1;
13126 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
13129 if (changed
13130 && REG_P (XEXP (x, 1))
13131 && REG_P (XEXP (x, 0)))
13132 return x;
13134 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
13136 changed = 1;
13137 x = legitimize_pic_address (x, 0);
13140 if (changed && ix86_legitimate_address_p (mode, x, false))
13141 return x;
13143 if (REG_P (XEXP (x, 0)))
13145 rtx temp = gen_reg_rtx (Pmode);
13146 rtx val = force_operand (XEXP (x, 1), temp);
13147 if (val != temp)
13148 emit_move_insn (temp, val);
13150 XEXP (x, 1) = temp;
13151 return x;
13154 else if (REG_P (XEXP (x, 1)))
13156 rtx temp = gen_reg_rtx (Pmode);
13157 rtx val = force_operand (XEXP (x, 0), temp);
13158 if (val != temp)
13159 emit_move_insn (temp, val);
13161 XEXP (x, 0) = temp;
13162 return x;
13166 return x;
13169 /* Print an integer constant expression in assembler syntax. Addition
13170 and subtraction are the only arithmetic that may appear in these
13171 expressions. FILE is the stdio stream to write to, X is the rtx, and
13172 CODE is the operand print code from the output string. */
13174 static void
13175 output_pic_addr_const (FILE *file, rtx x, int code)
13177 char buf[256];
13179 switch (GET_CODE (x))
13181 case PC:
13182 gcc_assert (flag_pic);
13183 putc ('.', file);
13184 break;
13186 case SYMBOL_REF:
13187 if (TARGET_64BIT || ! TARGET_MACHO_BRANCH_ISLANDS)
13188 output_addr_const (file, x);
13189 else
13191 const char *name = XSTR (x, 0);
13193 /* Mark the decl as referenced so that cgraph will
13194 output the function. */
13195 if (SYMBOL_REF_DECL (x))
13196 mark_decl_referenced (SYMBOL_REF_DECL (x));
13198 #if TARGET_MACHO
13199 if (MACHOPIC_INDIRECT
13200 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
13201 name = machopic_indirection_name (x, /*stub_p=*/true);
13202 #endif
13203 assemble_name (file, name);
13205 if (!TARGET_MACHO && !(TARGET_64BIT && DEFAULT_ABI == MS_ABI)
13206 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
13207 fputs ("@PLT", file);
13208 break;
13210 case LABEL_REF:
13211 x = XEXP (x, 0);
13212 /* FALLTHRU */
13213 case CODE_LABEL:
13214 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
13215 assemble_name (asm_out_file, buf);
13216 break;
13218 case CONST_INT:
13219 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
13220 break;
13222 case CONST:
13223 /* This used to output parentheses around the expression,
13224 but that does not work on the 386 (either ATT or BSD assembler). */
13225 output_pic_addr_const (file, XEXP (x, 0), code);
13226 break;
13228 case CONST_DOUBLE:
13229 if (GET_MODE (x) == VOIDmode)
13231 /* We can use %d if the number is <32 bits and positive. */
13232 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
13233 fprintf (file, "0x%lx%08lx",
13234 (unsigned long) CONST_DOUBLE_HIGH (x),
13235 (unsigned long) CONST_DOUBLE_LOW (x));
13236 else
13237 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
13239 else
13240 /* We can't handle floating point constants;
13241 TARGET_PRINT_OPERAND must handle them. */
13242 output_operand_lossage ("floating constant misused");
13243 break;
13245 case PLUS:
13246 /* Some assemblers need integer constants to appear first. */
13247 if (CONST_INT_P (XEXP (x, 0)))
13249 output_pic_addr_const (file, XEXP (x, 0), code);
13250 putc ('+', file);
13251 output_pic_addr_const (file, XEXP (x, 1), code);
13253 else
13255 gcc_assert (CONST_INT_P (XEXP (x, 1)));
13256 output_pic_addr_const (file, XEXP (x, 1), code);
13257 putc ('+', file);
13258 output_pic_addr_const (file, XEXP (x, 0), code);
13260 break;
13262 case MINUS:
13263 if (!TARGET_MACHO)
13264 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
13265 output_pic_addr_const (file, XEXP (x, 0), code);
13266 putc ('-', file);
13267 output_pic_addr_const (file, XEXP (x, 1), code);
13268 if (!TARGET_MACHO)
13269 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
13270 break;
13272 case UNSPEC:
13273 if (XINT (x, 1) == UNSPEC_STACK_CHECK)
13275 bool f = i386_asm_output_addr_const_extra (file, x);
13276 gcc_assert (f);
13277 break;
13280 gcc_assert (XVECLEN (x, 0) == 1);
13281 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
13282 switch (XINT (x, 1))
13284 case UNSPEC_GOT:
13285 fputs ("@GOT", file);
13286 break;
13287 case UNSPEC_GOTOFF:
13288 fputs ("@GOTOFF", file);
13289 break;
13290 case UNSPEC_PLTOFF:
13291 fputs ("@PLTOFF", file);
13292 break;
13293 case UNSPEC_PCREL:
13294 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
13295 "(%rip)" : "[rip]", file);
13296 break;
13297 case UNSPEC_GOTPCREL:
13298 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
13299 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
13300 break;
13301 case UNSPEC_GOTTPOFF:
13302 /* FIXME: This might be @TPOFF in Sun ld too. */
13303 fputs ("@gottpoff", file);
13304 break;
13305 case UNSPEC_TPOFF:
13306 fputs ("@tpoff", file);
13307 break;
13308 case UNSPEC_NTPOFF:
13309 if (TARGET_64BIT)
13310 fputs ("@tpoff", file);
13311 else
13312 fputs ("@ntpoff", file);
13313 break;
13314 case UNSPEC_DTPOFF:
13315 fputs ("@dtpoff", file);
13316 break;
13317 case UNSPEC_GOTNTPOFF:
13318 if (TARGET_64BIT)
13319 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
13320 "@gottpoff(%rip)": "@gottpoff[rip]", file);
13321 else
13322 fputs ("@gotntpoff", file);
13323 break;
13324 case UNSPEC_INDNTPOFF:
13325 fputs ("@indntpoff", file);
13326 break;
13327 #if TARGET_MACHO
13328 case UNSPEC_MACHOPIC_OFFSET:
13329 putc ('-', file);
13330 machopic_output_function_base_name (file);
13331 break;
13332 #endif
13333 default:
13334 output_operand_lossage ("invalid UNSPEC as operand");
13335 break;
13337 break;
13339 default:
13340 output_operand_lossage ("invalid expression as operand");
13344 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
13345 We need to emit DTP-relative relocations. */
13347 static void ATTRIBUTE_UNUSED
13348 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
13350 fputs (ASM_LONG, file);
13351 output_addr_const (file, x);
13352 fputs ("@dtpoff", file);
13353 switch (size)
13355 case 4:
13356 break;
13357 case 8:
13358 fputs (", 0", file);
13359 break;
13360 default:
13361 gcc_unreachable ();
13365 /* Return true if X is a representation of the PIC register. This copes
13366 with calls from ix86_find_base_term, where the register might have
13367 been replaced by a cselib value. */
13369 static bool
13370 ix86_pic_register_p (rtx x)
13372 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
13373 return (pic_offset_table_rtx
13374 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
13375 else
13376 return REG_P (x) && REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
13379 /* Helper function for ix86_delegitimize_address.
13380 Attempt to delegitimize TLS local-exec accesses. */
13382 static rtx
13383 ix86_delegitimize_tls_address (rtx orig_x)
13385 rtx x = orig_x, unspec;
13386 struct ix86_address addr;
13388 if (!TARGET_TLS_DIRECT_SEG_REFS)
13389 return orig_x;
13390 if (MEM_P (x))
13391 x = XEXP (x, 0);
13392 if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
13393 return orig_x;
13394 if (ix86_decompose_address (x, &addr) == 0
13395 || addr.seg != (TARGET_64BIT ? SEG_FS : SEG_GS)
13396 || addr.disp == NULL_RTX
13397 || GET_CODE (addr.disp) != CONST)
13398 return orig_x;
13399 unspec = XEXP (addr.disp, 0);
13400 if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
13401 unspec = XEXP (unspec, 0);
13402 if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
13403 return orig_x;
13404 x = XVECEXP (unspec, 0, 0);
13405 gcc_assert (GET_CODE (x) == SYMBOL_REF);
13406 if (unspec != XEXP (addr.disp, 0))
13407 x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
13408 if (addr.index)
13410 rtx idx = addr.index;
13411 if (addr.scale != 1)
13412 idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
13413 x = gen_rtx_PLUS (Pmode, idx, x);
13415 if (addr.base)
13416 x = gen_rtx_PLUS (Pmode, addr.base, x);
13417 if (MEM_P (orig_x))
13418 x = replace_equiv_address_nv (orig_x, x);
13419 return x;
13422 /* In the name of slightly smaller debug output, and to cater to
13423 general assembler lossage, recognize PIC+GOTOFF and turn it back
13424 into a direct symbol reference.
13426 On Darwin, this is necessary to avoid a crash, because Darwin
13427 has a different PIC label for each routine but the DWARF debugging
13428 information is not associated with any particular routine, so it's
13429 necessary to remove references to the PIC label from RTL stored by
13430 the DWARF output code. */
13432 static rtx
13433 ix86_delegitimize_address (rtx x)
13435 rtx orig_x = delegitimize_mem_from_attrs (x);
13436 /* addend is NULL or some rtx if x is something+GOTOFF where
13437 something doesn't include the PIC register. */
13438 rtx addend = NULL_RTX;
13439 /* reg_addend is NULL or a multiple of some register. */
13440 rtx reg_addend = NULL_RTX;
13441 /* const_addend is NULL or a const_int. */
13442 rtx const_addend = NULL_RTX;
13443 /* This is the result, or NULL. */
13444 rtx result = NULL_RTX;
13446 x = orig_x;
13448 if (MEM_P (x))
13449 x = XEXP (x, 0);
13451 if (TARGET_64BIT)
13453 if (GET_CODE (x) != CONST
13454 || GET_CODE (XEXP (x, 0)) != UNSPEC
13455 || (XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
13456 && XINT (XEXP (x, 0), 1) != UNSPEC_PCREL)
13457 || !MEM_P (orig_x))
13458 return ix86_delegitimize_tls_address (orig_x);
13459 x = XVECEXP (XEXP (x, 0), 0, 0);
13460 if (GET_MODE (orig_x) != Pmode)
13462 x = simplify_gen_subreg (GET_MODE (orig_x), x, Pmode, 0);
13463 if (x == NULL_RTX)
13464 return orig_x;
13466 return x;
13469 if (GET_CODE (x) != PLUS
13470 || GET_CODE (XEXP (x, 1)) != CONST)
13471 return ix86_delegitimize_tls_address (orig_x);
13473 if (ix86_pic_register_p (XEXP (x, 0)))
13474 /* %ebx + GOT/GOTOFF */
13476 else if (GET_CODE (XEXP (x, 0)) == PLUS)
13478 /* %ebx + %reg * scale + GOT/GOTOFF */
13479 reg_addend = XEXP (x, 0);
13480 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
13481 reg_addend = XEXP (reg_addend, 1);
13482 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
13483 reg_addend = XEXP (reg_addend, 0);
13484 else
13486 reg_addend = NULL_RTX;
13487 addend = XEXP (x, 0);
13490 else
13491 addend = XEXP (x, 0);
13493 x = XEXP (XEXP (x, 1), 0);
13494 if (GET_CODE (x) == PLUS
13495 && CONST_INT_P (XEXP (x, 1)))
13497 const_addend = XEXP (x, 1);
13498 x = XEXP (x, 0);
13501 if (GET_CODE (x) == UNSPEC
13502 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
13503 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
13504 result = XVECEXP (x, 0, 0);
13506 if (TARGET_MACHO && darwin_local_data_pic (x)
13507 && !MEM_P (orig_x))
13508 result = XVECEXP (x, 0, 0);
13510 if (! result)
13511 return ix86_delegitimize_tls_address (orig_x);
13513 if (const_addend)
13514 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
13515 if (reg_addend)
13516 result = gen_rtx_PLUS (Pmode, reg_addend, result);
13517 if (addend)
13519 /* If the rest of original X doesn't involve the PIC register, add
13520 addend and subtract pic_offset_table_rtx. This can happen e.g.
13521 for code like:
13522 leal (%ebx, %ecx, 4), %ecx
13524 movl foo@GOTOFF(%ecx), %edx
13525 in which case we return (%ecx - %ebx) + foo. */
13526 if (pic_offset_table_rtx)
13527 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
13528 pic_offset_table_rtx),
13529 result);
13530 else
13531 return orig_x;
13533 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
13535 result = simplify_gen_subreg (GET_MODE (orig_x), result, Pmode, 0);
13536 if (result == NULL_RTX)
13537 return orig_x;
13539 return result;
13542 /* If X is a machine specific address (i.e. a symbol or label being
13543 referenced as a displacement from the GOT implemented using an
13544 UNSPEC), then return the base term. Otherwise return X. */
13547 ix86_find_base_term (rtx x)
13549 rtx term;
13551 if (TARGET_64BIT)
13553 if (GET_CODE (x) != CONST)
13554 return x;
13555 term = XEXP (x, 0);
13556 if (GET_CODE (term) == PLUS
13557 && (CONST_INT_P (XEXP (term, 1))
13558 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
13559 term = XEXP (term, 0);
13560 if (GET_CODE (term) != UNSPEC
13561 || (XINT (term, 1) != UNSPEC_GOTPCREL
13562 && XINT (term, 1) != UNSPEC_PCREL))
13563 return x;
13565 return XVECEXP (term, 0, 0);
13568 return ix86_delegitimize_address (x);
13571 static void
13572 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
13573 int fp, FILE *file)
13575 const char *suffix;
13577 if (mode == CCFPmode || mode == CCFPUmode)
13579 code = ix86_fp_compare_code_to_integer (code);
13580 mode = CCmode;
13582 if (reverse)
13583 code = reverse_condition (code);
13585 switch (code)
13587 case EQ:
13588 switch (mode)
13590 case CCAmode:
13591 suffix = "a";
13592 break;
13594 case CCCmode:
13595 suffix = "c";
13596 break;
13598 case CCOmode:
13599 suffix = "o";
13600 break;
13602 case CCSmode:
13603 suffix = "s";
13604 break;
13606 default:
13607 suffix = "e";
13609 break;
13610 case NE:
13611 switch (mode)
13613 case CCAmode:
13614 suffix = "na";
13615 break;
13617 case CCCmode:
13618 suffix = "nc";
13619 break;
13621 case CCOmode:
13622 suffix = "no";
13623 break;
13625 case CCSmode:
13626 suffix = "ns";
13627 break;
13629 default:
13630 suffix = "ne";
13632 break;
13633 case GT:
13634 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
13635 suffix = "g";
13636 break;
13637 case GTU:
13638 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
13639 Those same assemblers have the same but opposite lossage on cmov. */
13640 if (mode == CCmode)
13641 suffix = fp ? "nbe" : "a";
13642 else if (mode == CCCmode)
13643 suffix = "b";
13644 else
13645 gcc_unreachable ();
13646 break;
13647 case LT:
13648 switch (mode)
13650 case CCNOmode:
13651 case CCGOCmode:
13652 suffix = "s";
13653 break;
13655 case CCmode:
13656 case CCGCmode:
13657 suffix = "l";
13658 break;
13660 default:
13661 gcc_unreachable ();
13663 break;
13664 case LTU:
13665 gcc_assert (mode == CCmode || mode == CCCmode);
13666 suffix = "b";
13667 break;
13668 case GE:
13669 switch (mode)
13671 case CCNOmode:
13672 case CCGOCmode:
13673 suffix = "ns";
13674 break;
13676 case CCmode:
13677 case CCGCmode:
13678 suffix = "ge";
13679 break;
13681 default:
13682 gcc_unreachable ();
13684 break;
13685 case GEU:
13686 /* ??? As above. */
13687 gcc_assert (mode == CCmode || mode == CCCmode);
13688 suffix = fp ? "nb" : "ae";
13689 break;
13690 case LE:
13691 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
13692 suffix = "le";
13693 break;
13694 case LEU:
13695 /* ??? As above. */
13696 if (mode == CCmode)
13697 suffix = "be";
13698 else if (mode == CCCmode)
13699 suffix = fp ? "nb" : "ae";
13700 else
13701 gcc_unreachable ();
13702 break;
13703 case UNORDERED:
13704 suffix = fp ? "u" : "p";
13705 break;
13706 case ORDERED:
13707 suffix = fp ? "nu" : "np";
13708 break;
13709 default:
13710 gcc_unreachable ();
13712 fputs (suffix, file);
13715 /* Print the name of register X to FILE based on its machine mode and number.
13716 If CODE is 'w', pretend the mode is HImode.
13717 If CODE is 'b', pretend the mode is QImode.
13718 If CODE is 'k', pretend the mode is SImode.
13719 If CODE is 'q', pretend the mode is DImode.
13720 If CODE is 'x', pretend the mode is V4SFmode.
13721 If CODE is 't', pretend the mode is V8SFmode.
13722 If CODE is 'h', pretend the reg is the 'high' byte register.
13723 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
13724 If CODE is 'd', duplicate the operand for AVX instruction.
13727 void
13728 print_reg (rtx x, int code, FILE *file)
13730 const char *reg;
13731 bool duplicated = code == 'd' && TARGET_AVX;
13733 gcc_assert (x == pc_rtx
13734 || (REGNO (x) != ARG_POINTER_REGNUM
13735 && REGNO (x) != FRAME_POINTER_REGNUM
13736 && REGNO (x) != FLAGS_REG
13737 && REGNO (x) != FPSR_REG
13738 && REGNO (x) != FPCR_REG));
13740 if (ASSEMBLER_DIALECT == ASM_ATT)
13741 putc ('%', file);
13743 if (x == pc_rtx)
13745 gcc_assert (TARGET_64BIT);
13746 fputs ("rip", file);
13747 return;
13750 if (code == 'w' || MMX_REG_P (x))
13751 code = 2;
13752 else if (code == 'b')
13753 code = 1;
13754 else if (code == 'k')
13755 code = 4;
13756 else if (code == 'q')
13757 code = 8;
13758 else if (code == 'y')
13759 code = 3;
13760 else if (code == 'h')
13761 code = 0;
13762 else if (code == 'x')
13763 code = 16;
13764 else if (code == 't')
13765 code = 32;
13766 else
13767 code = GET_MODE_SIZE (GET_MODE (x));
13769 /* Irritatingly, AMD extended registers use different naming convention
13770 from the normal registers. */
13771 if (REX_INT_REG_P (x))
13773 gcc_assert (TARGET_64BIT);
13774 switch (code)
13776 case 0:
13777 error ("extended registers have no high halves");
13778 break;
13779 case 1:
13780 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
13781 break;
13782 case 2:
13783 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
13784 break;
13785 case 4:
13786 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
13787 break;
13788 case 8:
13789 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
13790 break;
13791 default:
13792 error ("unsupported operand size for extended register");
13793 break;
13795 return;
13798 reg = NULL;
13799 switch (code)
13801 case 3:
13802 if (STACK_TOP_P (x))
13804 reg = "st(0)";
13805 break;
13807 /* FALLTHRU */
13808 case 8:
13809 case 4:
13810 case 12:
13811 if (! ANY_FP_REG_P (x))
13812 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
13813 /* FALLTHRU */
13814 case 16:
13815 case 2:
13816 normal:
13817 reg = hi_reg_name[REGNO (x)];
13818 break;
13819 case 1:
13820 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
13821 goto normal;
13822 reg = qi_reg_name[REGNO (x)];
13823 break;
13824 case 0:
13825 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
13826 goto normal;
13827 reg = qi_high_reg_name[REGNO (x)];
13828 break;
13829 case 32:
13830 if (SSE_REG_P (x))
13832 gcc_assert (!duplicated);
13833 putc ('y', file);
13834 fputs (hi_reg_name[REGNO (x)] + 1, file);
13835 return;
13837 break;
13838 default:
13839 gcc_unreachable ();
13842 fputs (reg, file);
13843 if (duplicated)
13845 if (ASSEMBLER_DIALECT == ASM_ATT)
13846 fprintf (file, ", %%%s", reg);
13847 else
13848 fprintf (file, ", %s", reg);
13852 /* Locate some local-dynamic symbol still in use by this function
13853 so that we can print its name in some tls_local_dynamic_base
13854 pattern. */
13856 static int
13857 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
13859 rtx x = *px;
13861 if (GET_CODE (x) == SYMBOL_REF
13862 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
13864 cfun->machine->some_ld_name = XSTR (x, 0);
13865 return 1;
13868 return 0;
13871 static const char *
13872 get_some_local_dynamic_name (void)
13874 rtx insn;
13876 if (cfun->machine->some_ld_name)
13877 return cfun->machine->some_ld_name;
13879 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
13880 if (NONDEBUG_INSN_P (insn)
13881 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
13882 return cfun->machine->some_ld_name;
13884 return NULL;
13887 /* Meaning of CODE:
13888 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
13889 C -- print opcode suffix for set/cmov insn.
13890 c -- like C, but print reversed condition
13891 F,f -- likewise, but for floating-point.
13892 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
13893 otherwise nothing
13894 R -- print the prefix for register names.
13895 z -- print the opcode suffix for the size of the current operand.
13896 Z -- likewise, with special suffixes for x87 instructions.
13897 * -- print a star (in certain assembler syntax)
13898 A -- print an absolute memory reference.
13899 w -- print the operand as if it's a "word" (HImode) even if it isn't.
13900 s -- print a shift double count, followed by the assemblers argument
13901 delimiter.
13902 b -- print the QImode name of the register for the indicated operand.
13903 %b0 would print %al if operands[0] is reg 0.
13904 w -- likewise, print the HImode name of the register.
13905 k -- likewise, print the SImode name of the register.
13906 q -- likewise, print the DImode name of the register.
13907 x -- likewise, print the V4SFmode name of the register.
13908 t -- likewise, print the V8SFmode name of the register.
13909 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
13910 y -- print "st(0)" instead of "st" as a register.
13911 d -- print duplicated register operand for AVX instruction.
13912 D -- print condition for SSE cmp instruction.
13913 P -- if PIC, print an @PLT suffix.
13914 p -- print raw symbol name.
13915 X -- don't print any sort of PIC '@' suffix for a symbol.
13916 & -- print some in-use local-dynamic symbol name.
13917 H -- print a memory address offset by 8; used for sse high-parts
13918 Y -- print condition for XOP pcom* instruction.
13919 + -- print a branch hint as 'cs' or 'ds' prefix
13920 ; -- print a semicolon (after prefixes due to bug in older gas).
13921 @ -- print a segment register of thread base pointer load
13924 void
13925 ix86_print_operand (FILE *file, rtx x, int code)
13927 if (code)
13929 switch (code)
13931 case '*':
13932 if (ASSEMBLER_DIALECT == ASM_ATT)
13933 putc ('*', file);
13934 return;
13936 case '&':
13938 const char *name = get_some_local_dynamic_name ();
13939 if (name == NULL)
13940 output_operand_lossage ("'%%&' used without any "
13941 "local dynamic TLS references");
13942 else
13943 assemble_name (file, name);
13944 return;
13947 case 'A':
13948 switch (ASSEMBLER_DIALECT)
13950 case ASM_ATT:
13951 putc ('*', file);
13952 break;
13954 case ASM_INTEL:
13955 /* Intel syntax. For absolute addresses, registers should not
13956 be surrounded by braces. */
13957 if (!REG_P (x))
13959 putc ('[', file);
13960 ix86_print_operand (file, x, 0);
13961 putc (']', file);
13962 return;
13964 break;
13966 default:
13967 gcc_unreachable ();
13970 ix86_print_operand (file, x, 0);
13971 return;
13974 case 'L':
13975 if (ASSEMBLER_DIALECT == ASM_ATT)
13976 putc ('l', file);
13977 return;
13979 case 'W':
13980 if (ASSEMBLER_DIALECT == ASM_ATT)
13981 putc ('w', file);
13982 return;
13984 case 'B':
13985 if (ASSEMBLER_DIALECT == ASM_ATT)
13986 putc ('b', file);
13987 return;
13989 case 'Q':
13990 if (ASSEMBLER_DIALECT == ASM_ATT)
13991 putc ('l', file);
13992 return;
13994 case 'S':
13995 if (ASSEMBLER_DIALECT == ASM_ATT)
13996 putc ('s', file);
13997 return;
13999 case 'T':
14000 if (ASSEMBLER_DIALECT == ASM_ATT)
14001 putc ('t', file);
14002 return;
14004 case 'z':
14005 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
14007 /* Opcodes don't get size suffixes if using Intel opcodes. */
14008 if (ASSEMBLER_DIALECT == ASM_INTEL)
14009 return;
14011 switch (GET_MODE_SIZE (GET_MODE (x)))
14013 case 1:
14014 putc ('b', file);
14015 return;
14017 case 2:
14018 putc ('w', file);
14019 return;
14021 case 4:
14022 putc ('l', file);
14023 return;
14025 case 8:
14026 putc ('q', file);
14027 return;
14029 default:
14030 output_operand_lossage
14031 ("invalid operand size for operand code '%c'", code);
14032 return;
14036 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14037 warning
14038 (0, "non-integer operand used with operand code '%c'", code);
14039 /* FALLTHRU */
14041 case 'Z':
14042 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
14043 if (ASSEMBLER_DIALECT == ASM_INTEL)
14044 return;
14046 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
14048 switch (GET_MODE_SIZE (GET_MODE (x)))
14050 case 2:
14051 #ifdef HAVE_AS_IX86_FILDS
14052 putc ('s', file);
14053 #endif
14054 return;
14056 case 4:
14057 putc ('l', file);
14058 return;
14060 case 8:
14061 #ifdef HAVE_AS_IX86_FILDQ
14062 putc ('q', file);
14063 #else
14064 fputs ("ll", file);
14065 #endif
14066 return;
14068 default:
14069 break;
14072 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14074 /* 387 opcodes don't get size suffixes
14075 if the operands are registers. */
14076 if (STACK_REG_P (x))
14077 return;
14079 switch (GET_MODE_SIZE (GET_MODE (x)))
14081 case 4:
14082 putc ('s', file);
14083 return;
14085 case 8:
14086 putc ('l', file);
14087 return;
14089 case 12:
14090 case 16:
14091 putc ('t', file);
14092 return;
14094 default:
14095 break;
14098 else
14100 output_operand_lossage
14101 ("invalid operand type used with operand code '%c'", code);
14102 return;
14105 output_operand_lossage
14106 ("invalid operand size for operand code '%c'", code);
14107 return;
14109 case 'd':
14110 case 'b':
14111 case 'w':
14112 case 'k':
14113 case 'q':
14114 case 'h':
14115 case 't':
14116 case 'y':
14117 case 'x':
14118 case 'X':
14119 case 'P':
14120 case 'p':
14121 break;
14123 case 's':
14124 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
14126 ix86_print_operand (file, x, 0);
14127 fputs (", ", file);
14129 return;
14131 case 'D':
14132 /* Little bit of braindamage here. The SSE compare instructions
14133 does use completely different names for the comparisons that the
14134 fp conditional moves. */
14135 if (TARGET_AVX)
14137 switch (GET_CODE (x))
14139 case EQ:
14140 fputs ("eq", file);
14141 break;
14142 case UNEQ:
14143 fputs ("eq_us", file);
14144 break;
14145 case LT:
14146 fputs ("lt", file);
14147 break;
14148 case UNLT:
14149 fputs ("nge", file);
14150 break;
14151 case LE:
14152 fputs ("le", file);
14153 break;
14154 case UNLE:
14155 fputs ("ngt", file);
14156 break;
14157 case UNORDERED:
14158 fputs ("unord", file);
14159 break;
14160 case NE:
14161 fputs ("neq", file);
14162 break;
14163 case LTGT:
14164 fputs ("neq_oq", file);
14165 break;
14166 case GE:
14167 fputs ("ge", file);
14168 break;
14169 case UNGE:
14170 fputs ("nlt", file);
14171 break;
14172 case GT:
14173 fputs ("gt", file);
14174 break;
14175 case UNGT:
14176 fputs ("nle", file);
14177 break;
14178 case ORDERED:
14179 fputs ("ord", file);
14180 break;
14181 default:
14182 output_operand_lossage ("operand is not a condition code, "
14183 "invalid operand code 'D'");
14184 return;
14187 else
14189 switch (GET_CODE (x))
14191 case EQ:
14192 case UNEQ:
14193 fputs ("eq", file);
14194 break;
14195 case LT:
14196 case UNLT:
14197 fputs ("lt", file);
14198 break;
14199 case LE:
14200 case UNLE:
14201 fputs ("le", file);
14202 break;
14203 case UNORDERED:
14204 fputs ("unord", file);
14205 break;
14206 case NE:
14207 case LTGT:
14208 fputs ("neq", file);
14209 break;
14210 case UNGE:
14211 case GE:
14212 fputs ("nlt", file);
14213 break;
14214 case UNGT:
14215 case GT:
14216 fputs ("nle", file);
14217 break;
14218 case ORDERED:
14219 fputs ("ord", file);
14220 break;
14221 default:
14222 output_operand_lossage ("operand is not a condition code, "
14223 "invalid operand code 'D'");
14224 return;
14227 return;
14228 case 'O':
14229 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14230 if (ASSEMBLER_DIALECT == ASM_ATT)
14232 switch (GET_MODE (x))
14234 case HImode: putc ('w', file); break;
14235 case SImode:
14236 case SFmode: putc ('l', file); break;
14237 case DImode:
14238 case DFmode: putc ('q', file); break;
14239 default: gcc_unreachable ();
14241 putc ('.', file);
14243 #endif
14244 return;
14245 case 'C':
14246 if (!COMPARISON_P (x))
14248 output_operand_lossage ("operand is neither a constant nor a "
14249 "condition code, invalid operand code "
14250 "'C'");
14251 return;
14253 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
14254 return;
14255 case 'F':
14256 if (!COMPARISON_P (x))
14258 output_operand_lossage ("operand is neither a constant nor a "
14259 "condition code, invalid operand code "
14260 "'F'");
14261 return;
14263 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14264 if (ASSEMBLER_DIALECT == ASM_ATT)
14265 putc ('.', file);
14266 #endif
14267 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
14268 return;
14270 /* Like above, but reverse condition */
14271 case 'c':
14272 /* Check to see if argument to %c is really a constant
14273 and not a condition code which needs to be reversed. */
14274 if (!COMPARISON_P (x))
14276 output_operand_lossage ("operand is neither a constant nor a "
14277 "condition code, invalid operand "
14278 "code 'c'");
14279 return;
14281 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
14282 return;
14283 case 'f':
14284 if (!COMPARISON_P (x))
14286 output_operand_lossage ("operand is neither a constant nor a "
14287 "condition code, invalid operand "
14288 "code 'f'");
14289 return;
14291 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14292 if (ASSEMBLER_DIALECT == ASM_ATT)
14293 putc ('.', file);
14294 #endif
14295 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
14296 return;
14298 case 'H':
14299 /* It doesn't actually matter what mode we use here, as we're
14300 only going to use this for printing. */
14301 x = adjust_address_nv (x, DImode, 8);
14302 break;
14304 case '+':
14306 rtx x;
14308 if (!optimize
14309 || optimize_function_for_size_p (cfun) || !TARGET_BRANCH_PREDICTION_HINTS)
14310 return;
14312 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
14313 if (x)
14315 int pred_val = INTVAL (XEXP (x, 0));
14317 if (pred_val < REG_BR_PROB_BASE * 45 / 100
14318 || pred_val > REG_BR_PROB_BASE * 55 / 100)
14320 int taken = pred_val > REG_BR_PROB_BASE / 2;
14321 int cputaken = final_forward_branch_p (current_output_insn) == 0;
14323 /* Emit hints only in the case default branch prediction
14324 heuristics would fail. */
14325 if (taken != cputaken)
14327 /* We use 3e (DS) prefix for taken branches and
14328 2e (CS) prefix for not taken branches. */
14329 if (taken)
14330 fputs ("ds ; ", file);
14331 else
14332 fputs ("cs ; ", file);
14336 return;
14339 case 'Y':
14340 switch (GET_CODE (x))
14342 case NE:
14343 fputs ("neq", file);
14344 break;
14345 case EQ:
14346 fputs ("eq", file);
14347 break;
14348 case GE:
14349 case GEU:
14350 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
14351 break;
14352 case GT:
14353 case GTU:
14354 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
14355 break;
14356 case LE:
14357 case LEU:
14358 fputs ("le", file);
14359 break;
14360 case LT:
14361 case LTU:
14362 fputs ("lt", file);
14363 break;
14364 case UNORDERED:
14365 fputs ("unord", file);
14366 break;
14367 case ORDERED:
14368 fputs ("ord", file);
14369 break;
14370 case UNEQ:
14371 fputs ("ueq", file);
14372 break;
14373 case UNGE:
14374 fputs ("nlt", file);
14375 break;
14376 case UNGT:
14377 fputs ("nle", file);
14378 break;
14379 case UNLE:
14380 fputs ("ule", file);
14381 break;
14382 case UNLT:
14383 fputs ("ult", file);
14384 break;
14385 case LTGT:
14386 fputs ("une", file);
14387 break;
14388 default:
14389 output_operand_lossage ("operand is not a condition code, "
14390 "invalid operand code 'Y'");
14391 return;
14393 return;
14395 case ';':
14396 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
14397 putc (';', file);
14398 #endif
14399 return;
14401 case '@':
14402 if (ASSEMBLER_DIALECT == ASM_ATT)
14403 putc ('%', file);
14405 /* The kernel uses a different segment register for performance
14406 reasons; a system call would not have to trash the userspace
14407 segment register, which would be expensive. */
14408 if (TARGET_64BIT && ix86_cmodel != CM_KERNEL)
14409 fputs ("fs", file);
14410 else
14411 fputs ("gs", file);
14412 return;
14414 default:
14415 output_operand_lossage ("invalid operand code '%c'", code);
14419 if (REG_P (x))
14420 print_reg (x, code, file);
14422 else if (MEM_P (x))
14424 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
14425 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
14426 && GET_MODE (x) != BLKmode)
14428 const char * size;
14429 switch (GET_MODE_SIZE (GET_MODE (x)))
14431 case 1: size = "BYTE"; break;
14432 case 2: size = "WORD"; break;
14433 case 4: size = "DWORD"; break;
14434 case 8: size = "QWORD"; break;
14435 case 12: size = "TBYTE"; break;
14436 case 16:
14437 if (GET_MODE (x) == XFmode)
14438 size = "TBYTE";
14439 else
14440 size = "XMMWORD";
14441 break;
14442 case 32: size = "YMMWORD"; break;
14443 default:
14444 gcc_unreachable ();
14447 /* Check for explicit size override (codes 'b', 'w' and 'k') */
14448 if (code == 'b')
14449 size = "BYTE";
14450 else if (code == 'w')
14451 size = "WORD";
14452 else if (code == 'k')
14453 size = "DWORD";
14455 fputs (size, file);
14456 fputs (" PTR ", file);
14459 x = XEXP (x, 0);
14460 /* Avoid (%rip) for call operands. */
14461 if (CONSTANT_ADDRESS_P (x) && code == 'P'
14462 && !CONST_INT_P (x))
14463 output_addr_const (file, x);
14464 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
14465 output_operand_lossage ("invalid constraints for operand");
14466 else
14467 output_address (x);
14470 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
14472 REAL_VALUE_TYPE r;
14473 long l;
14475 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
14476 REAL_VALUE_TO_TARGET_SINGLE (r, l);
14478 if (ASSEMBLER_DIALECT == ASM_ATT)
14479 putc ('$', file);
14480 /* Sign extend 32bit SFmode immediate to 8 bytes. */
14481 if (code == 'q')
14482 fprintf (file, "0x%08llx", (unsigned long long) (int) l);
14483 else
14484 fprintf (file, "0x%08x", (unsigned int) l);
14487 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
14489 REAL_VALUE_TYPE r;
14490 long l[2];
14492 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
14493 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
14495 if (ASSEMBLER_DIALECT == ASM_ATT)
14496 putc ('$', file);
14497 fprintf (file, "0x%lx%08lx", l[1] & 0xffffffff, l[0] & 0xffffffff);
14500 /* These float cases don't actually occur as immediate operands. */
14501 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == XFmode)
14503 char dstr[30];
14505 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
14506 fputs (dstr, file);
14509 else
14511 /* We have patterns that allow zero sets of memory, for instance.
14512 In 64-bit mode, we should probably support all 8-byte vectors,
14513 since we can in fact encode that into an immediate. */
14514 if (GET_CODE (x) == CONST_VECTOR)
14516 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
14517 x = const0_rtx;
14520 if (code != 'P' && code != 'p')
14522 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
14524 if (ASSEMBLER_DIALECT == ASM_ATT)
14525 putc ('$', file);
14527 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
14528 || GET_CODE (x) == LABEL_REF)
14530 if (ASSEMBLER_DIALECT == ASM_ATT)
14531 putc ('$', file);
14532 else
14533 fputs ("OFFSET FLAT:", file);
14536 if (CONST_INT_P (x))
14537 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
14538 else if (flag_pic || MACHOPIC_INDIRECT)
14539 output_pic_addr_const (file, x, code);
14540 else
14541 output_addr_const (file, x);
14545 static bool
14546 ix86_print_operand_punct_valid_p (unsigned char code)
14548 return (code == '@' || code == '*' || code == '+'
14549 || code == '&' || code == ';');
14552 /* Print a memory operand whose address is ADDR. */
14554 static void
14555 ix86_print_operand_address (FILE *file, rtx addr)
14557 struct ix86_address parts;
14558 rtx base, index, disp;
14559 int scale;
14560 int ok = ix86_decompose_address (addr, &parts);
14562 gcc_assert (ok);
14564 base = parts.base;
14565 index = parts.index;
14566 disp = parts.disp;
14567 scale = parts.scale;
14569 switch (parts.seg)
14571 case SEG_DEFAULT:
14572 break;
14573 case SEG_FS:
14574 case SEG_GS:
14575 if (ASSEMBLER_DIALECT == ASM_ATT)
14576 putc ('%', file);
14577 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
14578 break;
14579 default:
14580 gcc_unreachable ();
14583 /* Use one byte shorter RIP relative addressing for 64bit mode. */
14584 if (TARGET_64BIT && !base && !index)
14586 rtx symbol = disp;
14588 if (GET_CODE (disp) == CONST
14589 && GET_CODE (XEXP (disp, 0)) == PLUS
14590 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
14591 symbol = XEXP (XEXP (disp, 0), 0);
14593 if (GET_CODE (symbol) == LABEL_REF
14594 || (GET_CODE (symbol) == SYMBOL_REF
14595 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
14596 base = pc_rtx;
14598 if (!base && !index)
14600 /* Displacement only requires special attention. */
14602 if (CONST_INT_P (disp))
14604 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
14605 fputs ("ds:", file);
14606 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
14608 else if (flag_pic)
14609 output_pic_addr_const (file, disp, 0);
14610 else
14611 output_addr_const (file, disp);
14613 else
14615 if (ASSEMBLER_DIALECT == ASM_ATT)
14617 if (disp)
14619 if (flag_pic)
14620 output_pic_addr_const (file, disp, 0);
14621 else if (GET_CODE (disp) == LABEL_REF)
14622 output_asm_label (disp);
14623 else
14624 output_addr_const (file, disp);
14627 putc ('(', file);
14628 if (base)
14629 print_reg (base, 0, file);
14630 if (index)
14632 putc (',', file);
14633 print_reg (index, 0, file);
14634 if (scale != 1)
14635 fprintf (file, ",%d", scale);
14637 putc (')', file);
14639 else
14641 rtx offset = NULL_RTX;
14643 if (disp)
14645 /* Pull out the offset of a symbol; print any symbol itself. */
14646 if (GET_CODE (disp) == CONST
14647 && GET_CODE (XEXP (disp, 0)) == PLUS
14648 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
14650 offset = XEXP (XEXP (disp, 0), 1);
14651 disp = gen_rtx_CONST (VOIDmode,
14652 XEXP (XEXP (disp, 0), 0));
14655 if (flag_pic)
14656 output_pic_addr_const (file, disp, 0);
14657 else if (GET_CODE (disp) == LABEL_REF)
14658 output_asm_label (disp);
14659 else if (CONST_INT_P (disp))
14660 offset = disp;
14661 else
14662 output_addr_const (file, disp);
14665 putc ('[', file);
14666 if (base)
14668 print_reg (base, 0, file);
14669 if (offset)
14671 if (INTVAL (offset) >= 0)
14672 putc ('+', file);
14673 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
14676 else if (offset)
14677 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
14678 else
14679 putc ('0', file);
14681 if (index)
14683 putc ('+', file);
14684 print_reg (index, 0, file);
14685 if (scale != 1)
14686 fprintf (file, "*%d", scale);
14688 putc (']', file);
14693 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
14695 static bool
14696 i386_asm_output_addr_const_extra (FILE *file, rtx x)
14698 rtx op;
14700 if (GET_CODE (x) != UNSPEC)
14701 return false;
14703 op = XVECEXP (x, 0, 0);
14704 switch (XINT (x, 1))
14706 case UNSPEC_GOTTPOFF:
14707 output_addr_const (file, op);
14708 /* FIXME: This might be @TPOFF in Sun ld. */
14709 fputs ("@gottpoff", file);
14710 break;
14711 case UNSPEC_TPOFF:
14712 output_addr_const (file, op);
14713 fputs ("@tpoff", file);
14714 break;
14715 case UNSPEC_NTPOFF:
14716 output_addr_const (file, op);
14717 if (TARGET_64BIT)
14718 fputs ("@tpoff", file);
14719 else
14720 fputs ("@ntpoff", file);
14721 break;
14722 case UNSPEC_DTPOFF:
14723 output_addr_const (file, op);
14724 fputs ("@dtpoff", file);
14725 break;
14726 case UNSPEC_GOTNTPOFF:
14727 output_addr_const (file, op);
14728 if (TARGET_64BIT)
14729 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14730 "@gottpoff(%rip)" : "@gottpoff[rip]", file);
14731 else
14732 fputs ("@gotntpoff", file);
14733 break;
14734 case UNSPEC_INDNTPOFF:
14735 output_addr_const (file, op);
14736 fputs ("@indntpoff", file);
14737 break;
14738 #if TARGET_MACHO
14739 case UNSPEC_MACHOPIC_OFFSET:
14740 output_addr_const (file, op);
14741 putc ('-', file);
14742 machopic_output_function_base_name (file);
14743 break;
14744 #endif
14746 case UNSPEC_STACK_CHECK:
14748 int offset;
14750 gcc_assert (flag_split_stack);
14752 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
14753 offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
14754 #else
14755 gcc_unreachable ();
14756 #endif
14758 fprintf (file, "%s:%d", TARGET_64BIT ? "%fs" : "%gs", offset);
14760 break;
14762 default:
14763 return false;
14766 return true;
14769 /* Split one or more double-mode RTL references into pairs of half-mode
14770 references. The RTL can be REG, offsettable MEM, integer constant, or
14771 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
14772 split and "num" is its length. lo_half and hi_half are output arrays
14773 that parallel "operands". */
14775 void
14776 split_double_mode (enum machine_mode mode, rtx operands[],
14777 int num, rtx lo_half[], rtx hi_half[])
14779 enum machine_mode half_mode;
14780 unsigned int byte;
14782 switch (mode)
14784 case TImode:
14785 half_mode = DImode;
14786 break;
14787 case DImode:
14788 half_mode = SImode;
14789 break;
14790 default:
14791 gcc_unreachable ();
14794 byte = GET_MODE_SIZE (half_mode);
14796 while (num--)
14798 rtx op = operands[num];
14800 /* simplify_subreg refuse to split volatile memory addresses,
14801 but we still have to handle it. */
14802 if (MEM_P (op))
14804 lo_half[num] = adjust_address (op, half_mode, 0);
14805 hi_half[num] = adjust_address (op, half_mode, byte);
14807 else
14809 lo_half[num] = simplify_gen_subreg (half_mode, op,
14810 GET_MODE (op) == VOIDmode
14811 ? mode : GET_MODE (op), 0);
14812 hi_half[num] = simplify_gen_subreg (half_mode, op,
14813 GET_MODE (op) == VOIDmode
14814 ? mode : GET_MODE (op), byte);
14819 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
14820 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
14821 is the expression of the binary operation. The output may either be
14822 emitted here, or returned to the caller, like all output_* functions.
14824 There is no guarantee that the operands are the same mode, as they
14825 might be within FLOAT or FLOAT_EXTEND expressions. */
14827 #ifndef SYSV386_COMPAT
14828 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
14829 wants to fix the assemblers because that causes incompatibility
14830 with gcc. No-one wants to fix gcc because that causes
14831 incompatibility with assemblers... You can use the option of
14832 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
14833 #define SYSV386_COMPAT 1
14834 #endif
14836 const char *
14837 output_387_binary_op (rtx insn, rtx *operands)
14839 static char buf[40];
14840 const char *p;
14841 const char *ssep;
14842 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
14844 #ifdef ENABLE_CHECKING
14845 /* Even if we do not want to check the inputs, this documents input
14846 constraints. Which helps in understanding the following code. */
14847 if (STACK_REG_P (operands[0])
14848 && ((REG_P (operands[1])
14849 && REGNO (operands[0]) == REGNO (operands[1])
14850 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
14851 || (REG_P (operands[2])
14852 && REGNO (operands[0]) == REGNO (operands[2])
14853 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
14854 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
14855 ; /* ok */
14856 else
14857 gcc_assert (is_sse);
14858 #endif
14860 switch (GET_CODE (operands[3]))
14862 case PLUS:
14863 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14864 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14865 p = "fiadd";
14866 else
14867 p = "fadd";
14868 ssep = "vadd";
14869 break;
14871 case MINUS:
14872 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14873 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14874 p = "fisub";
14875 else
14876 p = "fsub";
14877 ssep = "vsub";
14878 break;
14880 case MULT:
14881 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14882 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14883 p = "fimul";
14884 else
14885 p = "fmul";
14886 ssep = "vmul";
14887 break;
14889 case DIV:
14890 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14891 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14892 p = "fidiv";
14893 else
14894 p = "fdiv";
14895 ssep = "vdiv";
14896 break;
14898 default:
14899 gcc_unreachable ();
14902 if (is_sse)
14904 if (TARGET_AVX)
14906 strcpy (buf, ssep);
14907 if (GET_MODE (operands[0]) == SFmode)
14908 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
14909 else
14910 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
14912 else
14914 strcpy (buf, ssep + 1);
14915 if (GET_MODE (operands[0]) == SFmode)
14916 strcat (buf, "ss\t{%2, %0|%0, %2}");
14917 else
14918 strcat (buf, "sd\t{%2, %0|%0, %2}");
14920 return buf;
14922 strcpy (buf, p);
14924 switch (GET_CODE (operands[3]))
14926 case MULT:
14927 case PLUS:
14928 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
14930 rtx temp = operands[2];
14931 operands[2] = operands[1];
14932 operands[1] = temp;
14935 /* know operands[0] == operands[1]. */
14937 if (MEM_P (operands[2]))
14939 p = "%Z2\t%2";
14940 break;
14943 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
14945 if (STACK_TOP_P (operands[0]))
14946 /* How is it that we are storing to a dead operand[2]?
14947 Well, presumably operands[1] is dead too. We can't
14948 store the result to st(0) as st(0) gets popped on this
14949 instruction. Instead store to operands[2] (which I
14950 think has to be st(1)). st(1) will be popped later.
14951 gcc <= 2.8.1 didn't have this check and generated
14952 assembly code that the Unixware assembler rejected. */
14953 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
14954 else
14955 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
14956 break;
14959 if (STACK_TOP_P (operands[0]))
14960 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
14961 else
14962 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
14963 break;
14965 case MINUS:
14966 case DIV:
14967 if (MEM_P (operands[1]))
14969 p = "r%Z1\t%1";
14970 break;
14973 if (MEM_P (operands[2]))
14975 p = "%Z2\t%2";
14976 break;
14979 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
14981 #if SYSV386_COMPAT
14982 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
14983 derived assemblers, confusingly reverse the direction of
14984 the operation for fsub{r} and fdiv{r} when the
14985 destination register is not st(0). The Intel assembler
14986 doesn't have this brain damage. Read !SYSV386_COMPAT to
14987 figure out what the hardware really does. */
14988 if (STACK_TOP_P (operands[0]))
14989 p = "{p\t%0, %2|rp\t%2, %0}";
14990 else
14991 p = "{rp\t%2, %0|p\t%0, %2}";
14992 #else
14993 if (STACK_TOP_P (operands[0]))
14994 /* As above for fmul/fadd, we can't store to st(0). */
14995 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
14996 else
14997 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
14998 #endif
14999 break;
15002 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
15004 #if SYSV386_COMPAT
15005 if (STACK_TOP_P (operands[0]))
15006 p = "{rp\t%0, %1|p\t%1, %0}";
15007 else
15008 p = "{p\t%1, %0|rp\t%0, %1}";
15009 #else
15010 if (STACK_TOP_P (operands[0]))
15011 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
15012 else
15013 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
15014 #endif
15015 break;
15018 if (STACK_TOP_P (operands[0]))
15020 if (STACK_TOP_P (operands[1]))
15021 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
15022 else
15023 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
15024 break;
15026 else if (STACK_TOP_P (operands[1]))
15028 #if SYSV386_COMPAT
15029 p = "{\t%1, %0|r\t%0, %1}";
15030 #else
15031 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
15032 #endif
15034 else
15036 #if SYSV386_COMPAT
15037 p = "{r\t%2, %0|\t%0, %2}";
15038 #else
15039 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
15040 #endif
15042 break;
15044 default:
15045 gcc_unreachable ();
15048 strcat (buf, p);
15049 return buf;
15052 /* Return needed mode for entity in optimize_mode_switching pass. */
15055 ix86_mode_needed (int entity, rtx insn)
15057 enum attr_i387_cw mode;
15059 /* The mode UNINITIALIZED is used to store control word after a
15060 function call or ASM pattern. The mode ANY specify that function
15061 has no requirements on the control word and make no changes in the
15062 bits we are interested in. */
15064 if (CALL_P (insn)
15065 || (NONJUMP_INSN_P (insn)
15066 && (asm_noperands (PATTERN (insn)) >= 0
15067 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
15068 return I387_CW_UNINITIALIZED;
15070 if (recog_memoized (insn) < 0)
15071 return I387_CW_ANY;
15073 mode = get_attr_i387_cw (insn);
15075 switch (entity)
15077 case I387_TRUNC:
15078 if (mode == I387_CW_TRUNC)
15079 return mode;
15080 break;
15082 case I387_FLOOR:
15083 if (mode == I387_CW_FLOOR)
15084 return mode;
15085 break;
15087 case I387_CEIL:
15088 if (mode == I387_CW_CEIL)
15089 return mode;
15090 break;
15092 case I387_MASK_PM:
15093 if (mode == I387_CW_MASK_PM)
15094 return mode;
15095 break;
15097 default:
15098 gcc_unreachable ();
15101 return I387_CW_ANY;
15104 /* Output code to initialize control word copies used by trunc?f?i and
15105 rounding patterns. CURRENT_MODE is set to current control word,
15106 while NEW_MODE is set to new control word. */
15108 void
15109 emit_i387_cw_initialization (int mode)
15111 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
15112 rtx new_mode;
15114 enum ix86_stack_slot slot;
15116 rtx reg = gen_reg_rtx (HImode);
15118 emit_insn (gen_x86_fnstcw_1 (stored_mode));
15119 emit_move_insn (reg, copy_rtx (stored_mode));
15121 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
15122 || optimize_function_for_size_p (cfun))
15124 switch (mode)
15126 case I387_CW_TRUNC:
15127 /* round toward zero (truncate) */
15128 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
15129 slot = SLOT_CW_TRUNC;
15130 break;
15132 case I387_CW_FLOOR:
15133 /* round down toward -oo */
15134 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
15135 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
15136 slot = SLOT_CW_FLOOR;
15137 break;
15139 case I387_CW_CEIL:
15140 /* round up toward +oo */
15141 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
15142 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
15143 slot = SLOT_CW_CEIL;
15144 break;
15146 case I387_CW_MASK_PM:
15147 /* mask precision exception for nearbyint() */
15148 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
15149 slot = SLOT_CW_MASK_PM;
15150 break;
15152 default:
15153 gcc_unreachable ();
15156 else
15158 switch (mode)
15160 case I387_CW_TRUNC:
15161 /* round toward zero (truncate) */
15162 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
15163 slot = SLOT_CW_TRUNC;
15164 break;
15166 case I387_CW_FLOOR:
15167 /* round down toward -oo */
15168 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
15169 slot = SLOT_CW_FLOOR;
15170 break;
15172 case I387_CW_CEIL:
15173 /* round up toward +oo */
15174 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
15175 slot = SLOT_CW_CEIL;
15176 break;
15178 case I387_CW_MASK_PM:
15179 /* mask precision exception for nearbyint() */
15180 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
15181 slot = SLOT_CW_MASK_PM;
15182 break;
15184 default:
15185 gcc_unreachable ();
15189 gcc_assert (slot < MAX_386_STACK_LOCALS);
15191 new_mode = assign_386_stack_local (HImode, slot);
15192 emit_move_insn (new_mode, reg);
15195 /* Output code for INSN to convert a float to a signed int. OPERANDS
15196 are the insn operands. The output may be [HSD]Imode and the input
15197 operand may be [SDX]Fmode. */
15199 const char *
15200 output_fix_trunc (rtx insn, rtx *operands, bool fisttp)
15202 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
15203 int dimode_p = GET_MODE (operands[0]) == DImode;
15204 int round_mode = get_attr_i387_cw (insn);
15206 /* Jump through a hoop or two for DImode, since the hardware has no
15207 non-popping instruction. We used to do this a different way, but
15208 that was somewhat fragile and broke with post-reload splitters. */
15209 if ((dimode_p || fisttp) && !stack_top_dies)
15210 output_asm_insn ("fld\t%y1", operands);
15212 gcc_assert (STACK_TOP_P (operands[1]));
15213 gcc_assert (MEM_P (operands[0]));
15214 gcc_assert (GET_MODE (operands[1]) != TFmode);
15216 if (fisttp)
15217 output_asm_insn ("fisttp%Z0\t%0", operands);
15218 else
15220 if (round_mode != I387_CW_ANY)
15221 output_asm_insn ("fldcw\t%3", operands);
15222 if (stack_top_dies || dimode_p)
15223 output_asm_insn ("fistp%Z0\t%0", operands);
15224 else
15225 output_asm_insn ("fist%Z0\t%0", operands);
15226 if (round_mode != I387_CW_ANY)
15227 output_asm_insn ("fldcw\t%2", operands);
15230 return "";
15233 /* Output code for x87 ffreep insn. The OPNO argument, which may only
15234 have the values zero or one, indicates the ffreep insn's operand
15235 from the OPERANDS array. */
15237 static const char *
15238 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
15240 if (TARGET_USE_FFREEP)
15241 #ifdef HAVE_AS_IX86_FFREEP
15242 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
15243 #else
15245 static char retval[32];
15246 int regno = REGNO (operands[opno]);
15248 gcc_assert (FP_REGNO_P (regno));
15250 regno -= FIRST_STACK_REG;
15252 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
15253 return retval;
15255 #endif
15257 return opno ? "fstp\t%y1" : "fstp\t%y0";
15261 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
15262 should be used. UNORDERED_P is true when fucom should be used. */
15264 const char *
15265 output_fp_compare (rtx insn, rtx *operands, bool eflags_p, bool unordered_p)
15267 int stack_top_dies;
15268 rtx cmp_op0, cmp_op1;
15269 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
15271 if (eflags_p)
15273 cmp_op0 = operands[0];
15274 cmp_op1 = operands[1];
15276 else
15278 cmp_op0 = operands[1];
15279 cmp_op1 = operands[2];
15282 if (is_sse)
15284 static const char ucomiss[] = "vucomiss\t{%1, %0|%0, %1}";
15285 static const char ucomisd[] = "vucomisd\t{%1, %0|%0, %1}";
15286 static const char comiss[] = "vcomiss\t{%1, %0|%0, %1}";
15287 static const char comisd[] = "vcomisd\t{%1, %0|%0, %1}";
15289 if (GET_MODE (operands[0]) == SFmode)
15290 if (unordered_p)
15291 return &ucomiss[TARGET_AVX ? 0 : 1];
15292 else
15293 return &comiss[TARGET_AVX ? 0 : 1];
15294 else
15295 if (unordered_p)
15296 return &ucomisd[TARGET_AVX ? 0 : 1];
15297 else
15298 return &comisd[TARGET_AVX ? 0 : 1];
15301 gcc_assert (STACK_TOP_P (cmp_op0));
15303 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
15305 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
15307 if (stack_top_dies)
15309 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
15310 return output_387_ffreep (operands, 1);
15312 else
15313 return "ftst\n\tfnstsw\t%0";
15316 if (STACK_REG_P (cmp_op1)
15317 && stack_top_dies
15318 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
15319 && REGNO (cmp_op1) != FIRST_STACK_REG)
15321 /* If both the top of the 387 stack dies, and the other operand
15322 is also a stack register that dies, then this must be a
15323 `fcompp' float compare */
15325 if (eflags_p)
15327 /* There is no double popping fcomi variant. Fortunately,
15328 eflags is immune from the fstp's cc clobbering. */
15329 if (unordered_p)
15330 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
15331 else
15332 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
15333 return output_387_ffreep (operands, 0);
15335 else
15337 if (unordered_p)
15338 return "fucompp\n\tfnstsw\t%0";
15339 else
15340 return "fcompp\n\tfnstsw\t%0";
15343 else
15345 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
15347 static const char * const alt[16] =
15349 "fcom%Z2\t%y2\n\tfnstsw\t%0",
15350 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
15351 "fucom%Z2\t%y2\n\tfnstsw\t%0",
15352 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
15354 "ficom%Z2\t%y2\n\tfnstsw\t%0",
15355 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
15356 NULL,
15357 NULL,
15359 "fcomi\t{%y1, %0|%0, %y1}",
15360 "fcomip\t{%y1, %0|%0, %y1}",
15361 "fucomi\t{%y1, %0|%0, %y1}",
15362 "fucomip\t{%y1, %0|%0, %y1}",
15364 NULL,
15365 NULL,
15366 NULL,
15367 NULL
15370 int mask;
15371 const char *ret;
15373 mask = eflags_p << 3;
15374 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
15375 mask |= unordered_p << 1;
15376 mask |= stack_top_dies;
15378 gcc_assert (mask < 16);
15379 ret = alt[mask];
15380 gcc_assert (ret);
15382 return ret;
15386 void
15387 ix86_output_addr_vec_elt (FILE *file, int value)
15389 const char *directive = ASM_LONG;
15391 #ifdef ASM_QUAD
15392 if (TARGET_64BIT)
15393 directive = ASM_QUAD;
15394 #else
15395 gcc_assert (!TARGET_64BIT);
15396 #endif
15398 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
15401 void
15402 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
15404 const char *directive = ASM_LONG;
15406 #ifdef ASM_QUAD
15407 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
15408 directive = ASM_QUAD;
15409 #else
15410 gcc_assert (!TARGET_64BIT);
15411 #endif
15412 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
15413 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
15414 fprintf (file, "%s%s%d-%s%d\n",
15415 directive, LPREFIX, value, LPREFIX, rel);
15416 else if (HAVE_AS_GOTOFF_IN_DATA)
15417 fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
15418 #if TARGET_MACHO
15419 else if (TARGET_MACHO)
15421 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
15422 machopic_output_function_base_name (file);
15423 putc ('\n', file);
15425 #endif
15426 else
15427 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
15428 GOT_SYMBOL_NAME, LPREFIX, value);
15431 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
15432 for the target. */
15434 void
15435 ix86_expand_clear (rtx dest)
15437 rtx tmp;
15439 /* We play register width games, which are only valid after reload. */
15440 gcc_assert (reload_completed);
15442 /* Avoid HImode and its attendant prefix byte. */
15443 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
15444 dest = gen_rtx_REG (SImode, REGNO (dest));
15445 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
15447 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
15448 if (!TARGET_USE_MOV0 || optimize_insn_for_speed_p ())
15450 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
15451 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
15454 emit_insn (tmp);
15457 /* X is an unchanging MEM. If it is a constant pool reference, return
15458 the constant pool rtx, else NULL. */
15461 maybe_get_pool_constant (rtx x)
15463 x = ix86_delegitimize_address (XEXP (x, 0));
15465 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
15466 return get_pool_constant (x);
15468 return NULL_RTX;
15471 void
15472 ix86_expand_move (enum machine_mode mode, rtx operands[])
15474 rtx op0, op1;
15475 enum tls_model model;
15477 op0 = operands[0];
15478 op1 = operands[1];
15480 if (GET_CODE (op1) == SYMBOL_REF)
15482 model = SYMBOL_REF_TLS_MODEL (op1);
15483 if (model)
15485 op1 = legitimize_tls_address (op1, model, true);
15486 op1 = force_operand (op1, op0);
15487 if (op1 == op0)
15488 return;
15490 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
15491 && SYMBOL_REF_DLLIMPORT_P (op1))
15492 op1 = legitimize_dllimport_symbol (op1, false);
15494 else if (GET_CODE (op1) == CONST
15495 && GET_CODE (XEXP (op1, 0)) == PLUS
15496 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
15498 rtx addend = XEXP (XEXP (op1, 0), 1);
15499 rtx symbol = XEXP (XEXP (op1, 0), 0);
15500 rtx tmp = NULL;
15502 model = SYMBOL_REF_TLS_MODEL (symbol);
15503 if (model)
15504 tmp = legitimize_tls_address (symbol, model, true);
15505 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
15506 && SYMBOL_REF_DLLIMPORT_P (symbol))
15507 tmp = legitimize_dllimport_symbol (symbol, true);
15509 if (tmp)
15511 tmp = force_operand (tmp, NULL);
15512 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
15513 op0, 1, OPTAB_DIRECT);
15514 if (tmp == op0)
15515 return;
15519 if ((flag_pic || MACHOPIC_INDIRECT)
15520 && mode == Pmode && symbolic_operand (op1, Pmode))
15522 if (TARGET_MACHO && !TARGET_64BIT)
15524 #if TARGET_MACHO
15525 /* dynamic-no-pic */
15526 if (MACHOPIC_INDIRECT)
15528 rtx temp = ((reload_in_progress
15529 || ((op0 && REG_P (op0))
15530 && mode == Pmode))
15531 ? op0 : gen_reg_rtx (Pmode));
15532 op1 = machopic_indirect_data_reference (op1, temp);
15533 if (MACHOPIC_PURE)
15534 op1 = machopic_legitimize_pic_address (op1, mode,
15535 temp == op1 ? 0 : temp);
15537 if (op0 != op1 && GET_CODE (op0) != MEM)
15539 rtx insn = gen_rtx_SET (VOIDmode, op0, op1);
15540 emit_insn (insn);
15541 return;
15543 if (GET_CODE (op0) == MEM)
15544 op1 = force_reg (Pmode, op1);
15545 else
15547 rtx temp = op0;
15548 if (GET_CODE (temp) != REG)
15549 temp = gen_reg_rtx (Pmode);
15550 temp = legitimize_pic_address (op1, temp);
15551 if (temp == op0)
15552 return;
15553 op1 = temp;
15555 /* dynamic-no-pic */
15556 #endif
15558 else
15560 if (MEM_P (op0))
15561 op1 = force_reg (Pmode, op1);
15562 else if (!TARGET_64BIT || !x86_64_movabs_operand (op1, Pmode))
15564 rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
15565 op1 = legitimize_pic_address (op1, reg);
15566 if (op0 == op1)
15567 return;
15571 else
15573 if (MEM_P (op0)
15574 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
15575 || !push_operand (op0, mode))
15576 && MEM_P (op1))
15577 op1 = force_reg (mode, op1);
15579 if (push_operand (op0, mode)
15580 && ! general_no_elim_operand (op1, mode))
15581 op1 = copy_to_mode_reg (mode, op1);
15583 /* Force large constants in 64bit compilation into register
15584 to get them CSEed. */
15585 if (can_create_pseudo_p ()
15586 && (mode == DImode) && TARGET_64BIT
15587 && immediate_operand (op1, mode)
15588 && !x86_64_zext_immediate_operand (op1, VOIDmode)
15589 && !register_operand (op0, mode)
15590 && optimize)
15591 op1 = copy_to_mode_reg (mode, op1);
15593 if (can_create_pseudo_p ()
15594 && FLOAT_MODE_P (mode)
15595 && GET_CODE (op1) == CONST_DOUBLE)
15597 /* If we are loading a floating point constant to a register,
15598 force the value to memory now, since we'll get better code
15599 out the back end. */
15601 op1 = validize_mem (force_const_mem (mode, op1));
15602 if (!register_operand (op0, mode))
15604 rtx temp = gen_reg_rtx (mode);
15605 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
15606 emit_move_insn (op0, temp);
15607 return;
15612 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
15615 void
15616 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
15618 rtx op0 = operands[0], op1 = operands[1];
15619 unsigned int align = GET_MODE_ALIGNMENT (mode);
15621 /* Force constants other than zero into memory. We do not know how
15622 the instructions used to build constants modify the upper 64 bits
15623 of the register, once we have that information we may be able
15624 to handle some of them more efficiently. */
15625 if (can_create_pseudo_p ()
15626 && register_operand (op0, mode)
15627 && (CONSTANT_P (op1)
15628 || (GET_CODE (op1) == SUBREG
15629 && CONSTANT_P (SUBREG_REG (op1))))
15630 && !standard_sse_constant_p (op1))
15631 op1 = validize_mem (force_const_mem (mode, op1));
15633 /* We need to check memory alignment for SSE mode since attribute
15634 can make operands unaligned. */
15635 if (can_create_pseudo_p ()
15636 && SSE_REG_MODE_P (mode)
15637 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
15638 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
15640 rtx tmp[2];
15642 /* ix86_expand_vector_move_misalign() does not like constants ... */
15643 if (CONSTANT_P (op1)
15644 || (GET_CODE (op1) == SUBREG
15645 && CONSTANT_P (SUBREG_REG (op1))))
15646 op1 = validize_mem (force_const_mem (mode, op1));
15648 /* ... nor both arguments in memory. */
15649 if (!register_operand (op0, mode)
15650 && !register_operand (op1, mode))
15651 op1 = force_reg (mode, op1);
15653 tmp[0] = op0; tmp[1] = op1;
15654 ix86_expand_vector_move_misalign (mode, tmp);
15655 return;
15658 /* Make operand1 a register if it isn't already. */
15659 if (can_create_pseudo_p ()
15660 && !register_operand (op0, mode)
15661 && !register_operand (op1, mode))
15663 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
15664 return;
15667 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
15670 /* Split 32-byte AVX unaligned load and store if needed. */
15672 static void
15673 ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1)
15675 rtx m;
15676 rtx (*extract) (rtx, rtx, rtx);
15677 rtx (*move_unaligned) (rtx, rtx);
15678 enum machine_mode mode;
15680 switch (GET_MODE (op0))
15682 default:
15683 gcc_unreachable ();
15684 case V32QImode:
15685 extract = gen_avx_vextractf128v32qi;
15686 move_unaligned = gen_avx_movdqu256;
15687 mode = V16QImode;
15688 break;
15689 case V8SFmode:
15690 extract = gen_avx_vextractf128v8sf;
15691 move_unaligned = gen_avx_movups256;
15692 mode = V4SFmode;
15693 break;
15694 case V4DFmode:
15695 extract = gen_avx_vextractf128v4df;
15696 move_unaligned = gen_avx_movupd256;
15697 mode = V2DFmode;
15698 break;
15701 if (MEM_P (op1) && TARGET_AVX256_SPLIT_UNALIGNED_LOAD)
15703 rtx r = gen_reg_rtx (mode);
15704 m = adjust_address (op1, mode, 0);
15705 emit_move_insn (r, m);
15706 m = adjust_address (op1, mode, 16);
15707 r = gen_rtx_VEC_CONCAT (GET_MODE (op0), r, m);
15708 emit_move_insn (op0, r);
15710 else if (MEM_P (op0) && TARGET_AVX256_SPLIT_UNALIGNED_STORE)
15712 m = adjust_address (op0, mode, 0);
15713 emit_insn (extract (m, op1, const0_rtx));
15714 m = adjust_address (op0, mode, 16);
15715 emit_insn (extract (m, op1, const1_rtx));
15717 else
15718 emit_insn (move_unaligned (op0, op1));
15721 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
15722 straight to ix86_expand_vector_move. */
15723 /* Code generation for scalar reg-reg moves of single and double precision data:
15724 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
15725 movaps reg, reg
15726 else
15727 movss reg, reg
15728 if (x86_sse_partial_reg_dependency == true)
15729 movapd reg, reg
15730 else
15731 movsd reg, reg
15733 Code generation for scalar loads of double precision data:
15734 if (x86_sse_split_regs == true)
15735 movlpd mem, reg (gas syntax)
15736 else
15737 movsd mem, reg
15739 Code generation for unaligned packed loads of single precision data
15740 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
15741 if (x86_sse_unaligned_move_optimal)
15742 movups mem, reg
15744 if (x86_sse_partial_reg_dependency == true)
15746 xorps reg, reg
15747 movlps mem, reg
15748 movhps mem+8, reg
15750 else
15752 movlps mem, reg
15753 movhps mem+8, reg
15756 Code generation for unaligned packed loads of double precision data
15757 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
15758 if (x86_sse_unaligned_move_optimal)
15759 movupd mem, reg
15761 if (x86_sse_split_regs == true)
15763 movlpd mem, reg
15764 movhpd mem+8, reg
15766 else
15768 movsd mem, reg
15769 movhpd mem+8, reg
15773 void
15774 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
15776 rtx op0, op1, m;
15778 op0 = operands[0];
15779 op1 = operands[1];
15781 if (TARGET_AVX)
15783 switch (GET_MODE_CLASS (mode))
15785 case MODE_VECTOR_INT:
15786 case MODE_INT:
15787 switch (GET_MODE_SIZE (mode))
15789 case 16:
15790 /* If we're optimizing for size, movups is the smallest. */
15791 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
15793 op0 = gen_lowpart (V4SFmode, op0);
15794 op1 = gen_lowpart (V4SFmode, op1);
15795 emit_insn (gen_sse_movups (op0, op1));
15796 return;
15798 op0 = gen_lowpart (V16QImode, op0);
15799 op1 = gen_lowpart (V16QImode, op1);
15800 emit_insn (gen_sse2_movdqu (op0, op1));
15801 break;
15802 case 32:
15803 op0 = gen_lowpart (V32QImode, op0);
15804 op1 = gen_lowpart (V32QImode, op1);
15805 ix86_avx256_split_vector_move_misalign (op0, op1);
15806 break;
15807 default:
15808 gcc_unreachable ();
15810 break;
15811 case MODE_VECTOR_FLOAT:
15812 op0 = gen_lowpart (mode, op0);
15813 op1 = gen_lowpart (mode, op1);
15815 switch (mode)
15817 case V4SFmode:
15818 emit_insn (gen_sse_movups (op0, op1));
15819 break;
15820 case V8SFmode:
15821 ix86_avx256_split_vector_move_misalign (op0, op1);
15822 break;
15823 case V2DFmode:
15824 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
15826 op0 = gen_lowpart (V4SFmode, op0);
15827 op1 = gen_lowpart (V4SFmode, op1);
15828 emit_insn (gen_sse_movups (op0, op1));
15829 return;
15831 emit_insn (gen_sse2_movupd (op0, op1));
15832 break;
15833 case V4DFmode:
15834 ix86_avx256_split_vector_move_misalign (op0, op1);
15835 break;
15836 default:
15837 gcc_unreachable ();
15839 break;
15841 default:
15842 gcc_unreachable ();
15845 return;
15848 if (MEM_P (op1))
15850 /* If we're optimizing for size, movups is the smallest. */
15851 if (optimize_insn_for_size_p ()
15852 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
15854 op0 = gen_lowpart (V4SFmode, op0);
15855 op1 = gen_lowpart (V4SFmode, op1);
15856 emit_insn (gen_sse_movups (op0, op1));
15857 return;
15860 /* ??? If we have typed data, then it would appear that using
15861 movdqu is the only way to get unaligned data loaded with
15862 integer type. */
15863 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
15865 op0 = gen_lowpart (V16QImode, op0);
15866 op1 = gen_lowpart (V16QImode, op1);
15867 emit_insn (gen_sse2_movdqu (op0, op1));
15868 return;
15871 if (TARGET_SSE2 && mode == V2DFmode)
15873 rtx zero;
15875 if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL)
15877 op0 = gen_lowpart (V2DFmode, op0);
15878 op1 = gen_lowpart (V2DFmode, op1);
15879 emit_insn (gen_sse2_movupd (op0, op1));
15880 return;
15883 /* When SSE registers are split into halves, we can avoid
15884 writing to the top half twice. */
15885 if (TARGET_SSE_SPLIT_REGS)
15887 emit_clobber (op0);
15888 zero = op0;
15890 else
15892 /* ??? Not sure about the best option for the Intel chips.
15893 The following would seem to satisfy; the register is
15894 entirely cleared, breaking the dependency chain. We
15895 then store to the upper half, with a dependency depth
15896 of one. A rumor has it that Intel recommends two movsd
15897 followed by an unpacklpd, but this is unconfirmed. And
15898 given that the dependency depth of the unpacklpd would
15899 still be one, I'm not sure why this would be better. */
15900 zero = CONST0_RTX (V2DFmode);
15903 m = adjust_address (op1, DFmode, 0);
15904 emit_insn (gen_sse2_loadlpd (op0, zero, m));
15905 m = adjust_address (op1, DFmode, 8);
15906 emit_insn (gen_sse2_loadhpd (op0, op0, m));
15908 else
15910 if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL)
15912 op0 = gen_lowpart (V4SFmode, op0);
15913 op1 = gen_lowpart (V4SFmode, op1);
15914 emit_insn (gen_sse_movups (op0, op1));
15915 return;
15918 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
15919 emit_move_insn (op0, CONST0_RTX (mode));
15920 else
15921 emit_clobber (op0);
15923 if (mode != V4SFmode)
15924 op0 = gen_lowpart (V4SFmode, op0);
15925 m = adjust_address (op1, V2SFmode, 0);
15926 emit_insn (gen_sse_loadlps (op0, op0, m));
15927 m = adjust_address (op1, V2SFmode, 8);
15928 emit_insn (gen_sse_loadhps (op0, op0, m));
15931 else if (MEM_P (op0))
15933 /* If we're optimizing for size, movups is the smallest. */
15934 if (optimize_insn_for_size_p ()
15935 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
15937 op0 = gen_lowpart (V4SFmode, op0);
15938 op1 = gen_lowpart (V4SFmode, op1);
15939 emit_insn (gen_sse_movups (op0, op1));
15940 return;
15943 /* ??? Similar to above, only less clear because of quote
15944 typeless stores unquote. */
15945 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
15946 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
15948 op0 = gen_lowpart (V16QImode, op0);
15949 op1 = gen_lowpart (V16QImode, op1);
15950 emit_insn (gen_sse2_movdqu (op0, op1));
15951 return;
15954 if (TARGET_SSE2 && mode == V2DFmode)
15956 if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
15958 op0 = gen_lowpart (V2DFmode, op0);
15959 op1 = gen_lowpart (V2DFmode, op1);
15960 emit_insn (gen_sse2_movupd (op0, op1));
15962 else
15964 m = adjust_address (op0, DFmode, 0);
15965 emit_insn (gen_sse2_storelpd (m, op1));
15966 m = adjust_address (op0, DFmode, 8);
15967 emit_insn (gen_sse2_storehpd (m, op1));
15970 else
15972 if (mode != V4SFmode)
15973 op1 = gen_lowpart (V4SFmode, op1);
15975 if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
15977 op0 = gen_lowpart (V4SFmode, op0);
15978 emit_insn (gen_sse_movups (op0, op1));
15980 else
15982 m = adjust_address (op0, V2SFmode, 0);
15983 emit_insn (gen_sse_storelps (m, op1));
15984 m = adjust_address (op0, V2SFmode, 8);
15985 emit_insn (gen_sse_storehps (m, op1));
15989 else
15990 gcc_unreachable ();
15993 /* Expand a push in MODE. This is some mode for which we do not support
15994 proper push instructions, at least from the registers that we expect
15995 the value to live in. */
15997 void
15998 ix86_expand_push (enum machine_mode mode, rtx x)
16000 rtx tmp;
16002 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
16003 GEN_INT (-GET_MODE_SIZE (mode)),
16004 stack_pointer_rtx, 1, OPTAB_DIRECT);
16005 if (tmp != stack_pointer_rtx)
16006 emit_move_insn (stack_pointer_rtx, tmp);
16008 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
16010 /* When we push an operand onto stack, it has to be aligned at least
16011 at the function argument boundary. However since we don't have
16012 the argument type, we can't determine the actual argument
16013 boundary. */
16014 emit_move_insn (tmp, x);
16017 /* Helper function of ix86_fixup_binary_operands to canonicalize
16018 operand order. Returns true if the operands should be swapped. */
16020 static bool
16021 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
16022 rtx operands[])
16024 rtx dst = operands[0];
16025 rtx src1 = operands[1];
16026 rtx src2 = operands[2];
16028 /* If the operation is not commutative, we can't do anything. */
16029 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
16030 return false;
16032 /* Highest priority is that src1 should match dst. */
16033 if (rtx_equal_p (dst, src1))
16034 return false;
16035 if (rtx_equal_p (dst, src2))
16036 return true;
16038 /* Next highest priority is that immediate constants come second. */
16039 if (immediate_operand (src2, mode))
16040 return false;
16041 if (immediate_operand (src1, mode))
16042 return true;
16044 /* Lowest priority is that memory references should come second. */
16045 if (MEM_P (src2))
16046 return false;
16047 if (MEM_P (src1))
16048 return true;
16050 return false;
16054 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
16055 destination to use for the operation. If different from the true
16056 destination in operands[0], a copy operation will be required. */
16059 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
16060 rtx operands[])
16062 rtx dst = operands[0];
16063 rtx src1 = operands[1];
16064 rtx src2 = operands[2];
16066 /* Canonicalize operand order. */
16067 if (ix86_swap_binary_operands_p (code, mode, operands))
16069 rtx temp;
16071 /* It is invalid to swap operands of different modes. */
16072 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
16074 temp = src1;
16075 src1 = src2;
16076 src2 = temp;
16079 /* Both source operands cannot be in memory. */
16080 if (MEM_P (src1) && MEM_P (src2))
16082 /* Optimization: Only read from memory once. */
16083 if (rtx_equal_p (src1, src2))
16085 src2 = force_reg (mode, src2);
16086 src1 = src2;
16088 else
16089 src2 = force_reg (mode, src2);
16092 /* If the destination is memory, and we do not have matching source
16093 operands, do things in registers. */
16094 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
16095 dst = gen_reg_rtx (mode);
16097 /* Source 1 cannot be a constant. */
16098 if (CONSTANT_P (src1))
16099 src1 = force_reg (mode, src1);
16101 /* Source 1 cannot be a non-matching memory. */
16102 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
16103 src1 = force_reg (mode, src1);
16105 operands[1] = src1;
16106 operands[2] = src2;
16107 return dst;
16110 /* Similarly, but assume that the destination has already been
16111 set up properly. */
16113 void
16114 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
16115 enum machine_mode mode, rtx operands[])
16117 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
16118 gcc_assert (dst == operands[0]);
16121 /* Attempt to expand a binary operator. Make the expansion closer to the
16122 actual machine, then just general_operand, which will allow 3 separate
16123 memory references (one output, two input) in a single insn. */
16125 void
16126 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
16127 rtx operands[])
16129 rtx src1, src2, dst, op, clob;
16131 dst = ix86_fixup_binary_operands (code, mode, operands);
16132 src1 = operands[1];
16133 src2 = operands[2];
16135 /* Emit the instruction. */
16137 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
16138 if (reload_in_progress)
16140 /* Reload doesn't know about the flags register, and doesn't know that
16141 it doesn't want to clobber it. We can only do this with PLUS. */
16142 gcc_assert (code == PLUS);
16143 emit_insn (op);
16145 else if (reload_completed
16146 && code == PLUS
16147 && !rtx_equal_p (dst, src1))
16149 /* This is going to be an LEA; avoid splitting it later. */
16150 emit_insn (op);
16152 else
16154 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
16155 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
16158 /* Fix up the destination if needed. */
16159 if (dst != operands[0])
16160 emit_move_insn (operands[0], dst);
16163 /* Return TRUE or FALSE depending on whether the binary operator meets the
16164 appropriate constraints. */
16166 bool
16167 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
16168 rtx operands[3])
16170 rtx dst = operands[0];
16171 rtx src1 = operands[1];
16172 rtx src2 = operands[2];
16174 /* Both source operands cannot be in memory. */
16175 if (MEM_P (src1) && MEM_P (src2))
16176 return false;
16178 /* Canonicalize operand order for commutative operators. */
16179 if (ix86_swap_binary_operands_p (code, mode, operands))
16181 rtx temp = src1;
16182 src1 = src2;
16183 src2 = temp;
16186 /* If the destination is memory, we must have a matching source operand. */
16187 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
16188 return false;
16190 /* Source 1 cannot be a constant. */
16191 if (CONSTANT_P (src1))
16192 return false;
16194 /* Source 1 cannot be a non-matching memory. */
16195 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
16197 /* Support "andhi/andsi/anddi" as a zero-extending move. */
16198 return (code == AND
16199 && (mode == HImode
16200 || mode == SImode
16201 || (TARGET_64BIT && mode == DImode))
16202 && CONST_INT_P (src2)
16203 && (INTVAL (src2) == 0xff
16204 || INTVAL (src2) == 0xffff));
16207 return true;
16210 /* Attempt to expand a unary operator. Make the expansion closer to the
16211 actual machine, then just general_operand, which will allow 2 separate
16212 memory references (one output, one input) in a single insn. */
16214 void
16215 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
16216 rtx operands[])
16218 int matching_memory;
16219 rtx src, dst, op, clob;
16221 dst = operands[0];
16222 src = operands[1];
16224 /* If the destination is memory, and we do not have matching source
16225 operands, do things in registers. */
16226 matching_memory = 0;
16227 if (MEM_P (dst))
16229 if (rtx_equal_p (dst, src))
16230 matching_memory = 1;
16231 else
16232 dst = gen_reg_rtx (mode);
16235 /* When source operand is memory, destination must match. */
16236 if (MEM_P (src) && !matching_memory)
16237 src = force_reg (mode, src);
16239 /* Emit the instruction. */
16241 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
16242 if (reload_in_progress || code == NOT)
16244 /* Reload doesn't know about the flags register, and doesn't know that
16245 it doesn't want to clobber it. */
16246 gcc_assert (code == NOT);
16247 emit_insn (op);
16249 else
16251 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
16252 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
16255 /* Fix up the destination if needed. */
16256 if (dst != operands[0])
16257 emit_move_insn (operands[0], dst);
16260 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
16261 divisor are within the range [0-255]. */
16263 void
16264 ix86_split_idivmod (enum machine_mode mode, rtx operands[],
16265 bool signed_p)
16267 rtx end_label, qimode_label;
16268 rtx insn, div, mod;
16269 rtx scratch, tmp0, tmp1, tmp2;
16270 rtx (*gen_divmod4_1) (rtx, rtx, rtx, rtx);
16271 rtx (*gen_zero_extend) (rtx, rtx);
16272 rtx (*gen_test_ccno_1) (rtx, rtx);
16274 switch (mode)
16276 case SImode:
16277 gen_divmod4_1 = signed_p ? gen_divmodsi4_1 : gen_udivmodsi4_1;
16278 gen_test_ccno_1 = gen_testsi_ccno_1;
16279 gen_zero_extend = gen_zero_extendqisi2;
16280 break;
16281 case DImode:
16282 gen_divmod4_1 = signed_p ? gen_divmoddi4_1 : gen_udivmoddi4_1;
16283 gen_test_ccno_1 = gen_testdi_ccno_1;
16284 gen_zero_extend = gen_zero_extendqidi2;
16285 break;
16286 default:
16287 gcc_unreachable ();
16290 end_label = gen_label_rtx ();
16291 qimode_label = gen_label_rtx ();
16293 scratch = gen_reg_rtx (mode);
16295 /* Use 8bit unsigned divimod if dividend and divisor are within
16296 the range [0-255]. */
16297 emit_move_insn (scratch, operands[2]);
16298 scratch = expand_simple_binop (mode, IOR, scratch, operands[3],
16299 scratch, 1, OPTAB_DIRECT);
16300 emit_insn (gen_test_ccno_1 (scratch, GEN_INT (-0x100)));
16301 tmp0 = gen_rtx_REG (CCNOmode, FLAGS_REG);
16302 tmp0 = gen_rtx_EQ (VOIDmode, tmp0, const0_rtx);
16303 tmp0 = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp0,
16304 gen_rtx_LABEL_REF (VOIDmode, qimode_label),
16305 pc_rtx);
16306 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp0));
16307 predict_jump (REG_BR_PROB_BASE * 50 / 100);
16308 JUMP_LABEL (insn) = qimode_label;
16310 /* Generate original signed/unsigned divimod. */
16311 div = gen_divmod4_1 (operands[0], operands[1],
16312 operands[2], operands[3]);
16313 emit_insn (div);
16315 /* Branch to the end. */
16316 emit_jump_insn (gen_jump (end_label));
16317 emit_barrier ();
16319 /* Generate 8bit unsigned divide. */
16320 emit_label (qimode_label);
16321 /* Don't use operands[0] for result of 8bit divide since not all
16322 registers support QImode ZERO_EXTRACT. */
16323 tmp0 = simplify_gen_subreg (HImode, scratch, mode, 0);
16324 tmp1 = simplify_gen_subreg (HImode, operands[2], mode, 0);
16325 tmp2 = simplify_gen_subreg (QImode, operands[3], mode, 0);
16326 emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, tmp2));
16328 if (signed_p)
16330 div = gen_rtx_DIV (SImode, operands[2], operands[3]);
16331 mod = gen_rtx_MOD (SImode, operands[2], operands[3]);
16333 else
16335 div = gen_rtx_UDIV (SImode, operands[2], operands[3]);
16336 mod = gen_rtx_UMOD (SImode, operands[2], operands[3]);
16339 /* Extract remainder from AH. */
16340 tmp1 = gen_rtx_ZERO_EXTRACT (mode, tmp0, GEN_INT (8), GEN_INT (8));
16341 if (REG_P (operands[1]))
16342 insn = emit_move_insn (operands[1], tmp1);
16343 else
16345 /* Need a new scratch register since the old one has result
16346 of 8bit divide. */
16347 scratch = gen_reg_rtx (mode);
16348 emit_move_insn (scratch, tmp1);
16349 insn = emit_move_insn (operands[1], scratch);
16351 set_unique_reg_note (insn, REG_EQUAL, mod);
16353 /* Zero extend quotient from AL. */
16354 tmp1 = gen_lowpart (QImode, tmp0);
16355 insn = emit_insn (gen_zero_extend (operands[0], tmp1));
16356 set_unique_reg_note (insn, REG_EQUAL, div);
16358 emit_label (end_label);
16361 #define LEA_SEARCH_THRESHOLD 12
16363 /* Search backward for non-agu definition of register number REGNO1
16364 or register number REGNO2 in INSN's basic block until
16365 1. Pass LEA_SEARCH_THRESHOLD instructions, or
16366 2. Reach BB boundary, or
16367 3. Reach agu definition.
16368 Returns the distance between the non-agu definition point and INSN.
16369 If no definition point, returns -1. */
16371 static int
16372 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
16373 rtx insn)
16375 basic_block bb = BLOCK_FOR_INSN (insn);
16376 int distance = 0;
16377 df_ref *def_rec;
16378 enum attr_type insn_type;
16380 if (insn != BB_HEAD (bb))
16382 rtx prev = PREV_INSN (insn);
16383 while (prev && distance < LEA_SEARCH_THRESHOLD)
16385 if (NONDEBUG_INSN_P (prev))
16387 distance++;
16388 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
16389 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
16390 && !DF_REF_IS_ARTIFICIAL (*def_rec)
16391 && (regno1 == DF_REF_REGNO (*def_rec)
16392 || regno2 == DF_REF_REGNO (*def_rec)))
16394 insn_type = get_attr_type (prev);
16395 if (insn_type != TYPE_LEA)
16396 goto done;
16399 if (prev == BB_HEAD (bb))
16400 break;
16401 prev = PREV_INSN (prev);
16405 if (distance < LEA_SEARCH_THRESHOLD)
16407 edge e;
16408 edge_iterator ei;
16409 bool simple_loop = false;
16411 FOR_EACH_EDGE (e, ei, bb->preds)
16412 if (e->src == bb)
16414 simple_loop = true;
16415 break;
16418 if (simple_loop)
16420 rtx prev = BB_END (bb);
16421 while (prev
16422 && prev != insn
16423 && distance < LEA_SEARCH_THRESHOLD)
16425 if (NONDEBUG_INSN_P (prev))
16427 distance++;
16428 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
16429 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
16430 && !DF_REF_IS_ARTIFICIAL (*def_rec)
16431 && (regno1 == DF_REF_REGNO (*def_rec)
16432 || regno2 == DF_REF_REGNO (*def_rec)))
16434 insn_type = get_attr_type (prev);
16435 if (insn_type != TYPE_LEA)
16436 goto done;
16439 prev = PREV_INSN (prev);
16444 distance = -1;
16446 done:
16447 /* get_attr_type may modify recog data. We want to make sure
16448 that recog data is valid for instruction INSN, on which
16449 distance_non_agu_define is called. INSN is unchanged here. */
16450 extract_insn_cached (insn);
16451 return distance;
16454 /* Return the distance between INSN and the next insn that uses
16455 register number REGNO0 in memory address. Return -1 if no such
16456 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
16458 static int
16459 distance_agu_use (unsigned int regno0, rtx insn)
16461 basic_block bb = BLOCK_FOR_INSN (insn);
16462 int distance = 0;
16463 df_ref *def_rec;
16464 df_ref *use_rec;
16466 if (insn != BB_END (bb))
16468 rtx next = NEXT_INSN (insn);
16469 while (next && distance < LEA_SEARCH_THRESHOLD)
16471 if (NONDEBUG_INSN_P (next))
16473 distance++;
16475 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
16476 if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
16477 || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
16478 && regno0 == DF_REF_REGNO (*use_rec))
16480 /* Return DISTANCE if OP0 is used in memory
16481 address in NEXT. */
16482 return distance;
16485 for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
16486 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
16487 && !DF_REF_IS_ARTIFICIAL (*def_rec)
16488 && regno0 == DF_REF_REGNO (*def_rec))
16490 /* Return -1 if OP0 is set in NEXT. */
16491 return -1;
16494 if (next == BB_END (bb))
16495 break;
16496 next = NEXT_INSN (next);
16500 if (distance < LEA_SEARCH_THRESHOLD)
16502 edge e;
16503 edge_iterator ei;
16504 bool simple_loop = false;
16506 FOR_EACH_EDGE (e, ei, bb->succs)
16507 if (e->dest == bb)
16509 simple_loop = true;
16510 break;
16513 if (simple_loop)
16515 rtx next = BB_HEAD (bb);
16516 while (next
16517 && next != insn
16518 && distance < LEA_SEARCH_THRESHOLD)
16520 if (NONDEBUG_INSN_P (next))
16522 distance++;
16524 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
16525 if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
16526 || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
16527 && regno0 == DF_REF_REGNO (*use_rec))
16529 /* Return DISTANCE if OP0 is used in memory
16530 address in NEXT. */
16531 return distance;
16534 for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
16535 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
16536 && !DF_REF_IS_ARTIFICIAL (*def_rec)
16537 && regno0 == DF_REF_REGNO (*def_rec))
16539 /* Return -1 if OP0 is set in NEXT. */
16540 return -1;
16544 next = NEXT_INSN (next);
16549 return -1;
16552 /* Define this macro to tune LEA priority vs ADD, it take effect when
16553 there is a dilemma of choicing LEA or ADD
16554 Negative value: ADD is more preferred than LEA
16555 Zero: Netrual
16556 Positive value: LEA is more preferred than ADD*/
16557 #define IX86_LEA_PRIORITY 2
16559 /* Return true if it is ok to optimize an ADD operation to LEA
16560 operation to avoid flag register consumation. For most processors,
16561 ADD is faster than LEA. For the processors like ATOM, if the
16562 destination register of LEA holds an actual address which will be
16563 used soon, LEA is better and otherwise ADD is better. */
16565 bool
16566 ix86_lea_for_add_ok (rtx insn, rtx operands[])
16568 unsigned int regno0 = true_regnum (operands[0]);
16569 unsigned int regno1 = true_regnum (operands[1]);
16570 unsigned int regno2 = true_regnum (operands[2]);
16572 /* If a = b + c, (a!=b && a!=c), must use lea form. */
16573 if (regno0 != regno1 && regno0 != regno2)
16574 return true;
16576 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
16577 return false;
16578 else
16580 int dist_define, dist_use;
16582 /* Return false if REGNO0 isn't used in memory address. */
16583 dist_use = distance_agu_use (regno0, insn);
16584 if (dist_use <= 0)
16585 return false;
16587 dist_define = distance_non_agu_define (regno1, regno2, insn);
16588 if (dist_define <= 0)
16589 return true;
16591 /* If this insn has both backward non-agu dependence and forward
16592 agu dependence, the one with short distance take effect. */
16593 if ((dist_define + IX86_LEA_PRIORITY) < dist_use)
16594 return false;
16596 return true;
16600 /* Return true if destination reg of SET_BODY is shift count of
16601 USE_BODY. */
16603 static bool
16604 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
16606 rtx set_dest;
16607 rtx shift_rtx;
16608 int i;
16610 /* Retrieve destination of SET_BODY. */
16611 switch (GET_CODE (set_body))
16613 case SET:
16614 set_dest = SET_DEST (set_body);
16615 if (!set_dest || !REG_P (set_dest))
16616 return false;
16617 break;
16618 case PARALLEL:
16619 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
16620 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
16621 use_body))
16622 return true;
16623 default:
16624 return false;
16625 break;
16628 /* Retrieve shift count of USE_BODY. */
16629 switch (GET_CODE (use_body))
16631 case SET:
16632 shift_rtx = XEXP (use_body, 1);
16633 break;
16634 case PARALLEL:
16635 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
16636 if (ix86_dep_by_shift_count_body (set_body,
16637 XVECEXP (use_body, 0, i)))
16638 return true;
16639 default:
16640 return false;
16641 break;
16644 if (shift_rtx
16645 && (GET_CODE (shift_rtx) == ASHIFT
16646 || GET_CODE (shift_rtx) == LSHIFTRT
16647 || GET_CODE (shift_rtx) == ASHIFTRT
16648 || GET_CODE (shift_rtx) == ROTATE
16649 || GET_CODE (shift_rtx) == ROTATERT))
16651 rtx shift_count = XEXP (shift_rtx, 1);
16653 /* Return true if shift count is dest of SET_BODY. */
16654 if (REG_P (shift_count)
16655 && true_regnum (set_dest) == true_regnum (shift_count))
16656 return true;
16659 return false;
16662 /* Return true if destination reg of SET_INSN is shift count of
16663 USE_INSN. */
16665 bool
16666 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
16668 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
16669 PATTERN (use_insn));
16672 /* Return TRUE or FALSE depending on whether the unary operator meets the
16673 appropriate constraints. */
16675 bool
16676 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
16677 enum machine_mode mode ATTRIBUTE_UNUSED,
16678 rtx operands[2] ATTRIBUTE_UNUSED)
16680 /* If one of operands is memory, source and destination must match. */
16681 if ((MEM_P (operands[0])
16682 || MEM_P (operands[1]))
16683 && ! rtx_equal_p (operands[0], operands[1]))
16684 return false;
16685 return true;
16688 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
16689 are ok, keeping in mind the possible movddup alternative. */
16691 bool
16692 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
16694 if (MEM_P (operands[0]))
16695 return rtx_equal_p (operands[0], operands[1 + high]);
16696 if (MEM_P (operands[1]) && MEM_P (operands[2]))
16697 return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
16698 return true;
16701 /* Post-reload splitter for converting an SF or DFmode value in an
16702 SSE register into an unsigned SImode. */
16704 void
16705 ix86_split_convert_uns_si_sse (rtx operands[])
16707 enum machine_mode vecmode;
16708 rtx value, large, zero_or_two31, input, two31, x;
16710 large = operands[1];
16711 zero_or_two31 = operands[2];
16712 input = operands[3];
16713 two31 = operands[4];
16714 vecmode = GET_MODE (large);
16715 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
16717 /* Load up the value into the low element. We must ensure that the other
16718 elements are valid floats -- zero is the easiest such value. */
16719 if (MEM_P (input))
16721 if (vecmode == V4SFmode)
16722 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
16723 else
16724 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
16726 else
16728 input = gen_rtx_REG (vecmode, REGNO (input));
16729 emit_move_insn (value, CONST0_RTX (vecmode));
16730 if (vecmode == V4SFmode)
16731 emit_insn (gen_sse_movss (value, value, input));
16732 else
16733 emit_insn (gen_sse2_movsd (value, value, input));
16736 emit_move_insn (large, two31);
16737 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
16739 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
16740 emit_insn (gen_rtx_SET (VOIDmode, large, x));
16742 x = gen_rtx_AND (vecmode, zero_or_two31, large);
16743 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
16745 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
16746 emit_insn (gen_rtx_SET (VOIDmode, value, x));
16748 large = gen_rtx_REG (V4SImode, REGNO (large));
16749 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
16751 x = gen_rtx_REG (V4SImode, REGNO (value));
16752 if (vecmode == V4SFmode)
16753 emit_insn (gen_sse2_cvttps2dq (x, value));
16754 else
16755 emit_insn (gen_sse2_cvttpd2dq (x, value));
16756 value = x;
16758 emit_insn (gen_xorv4si3 (value, value, large));
16761 /* Convert an unsigned DImode value into a DFmode, using only SSE.
16762 Expects the 64-bit DImode to be supplied in a pair of integral
16763 registers. Requires SSE2; will use SSE3 if available. For x86_32,
16764 -mfpmath=sse, !optimize_size only. */
16766 void
16767 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
16769 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
16770 rtx int_xmm, fp_xmm;
16771 rtx biases, exponents;
16772 rtx x;
16774 int_xmm = gen_reg_rtx (V4SImode);
16775 if (TARGET_INTER_UNIT_MOVES)
16776 emit_insn (gen_movdi_to_sse (int_xmm, input));
16777 else if (TARGET_SSE_SPLIT_REGS)
16779 emit_clobber (int_xmm);
16780 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
16782 else
16784 x = gen_reg_rtx (V2DImode);
16785 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
16786 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
16789 x = gen_rtx_CONST_VECTOR (V4SImode,
16790 gen_rtvec (4, GEN_INT (0x43300000UL),
16791 GEN_INT (0x45300000UL),
16792 const0_rtx, const0_rtx));
16793 exponents = validize_mem (force_const_mem (V4SImode, x));
16795 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
16796 emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents));
16798 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
16799 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
16800 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
16801 (0x1.0p84 + double(fp_value_hi_xmm)).
16802 Note these exponents differ by 32. */
16804 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
16806 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
16807 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
16808 real_ldexp (&bias_lo_rvt, &dconst1, 52);
16809 real_ldexp (&bias_hi_rvt, &dconst1, 84);
16810 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
16811 x = const_double_from_real_value (bias_hi_rvt, DFmode);
16812 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
16813 biases = validize_mem (force_const_mem (V2DFmode, biases));
16814 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
16816 /* Add the upper and lower DFmode values together. */
16817 if (TARGET_SSE3)
16818 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
16819 else
16821 x = copy_to_mode_reg (V2DFmode, fp_xmm);
16822 emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm));
16823 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
16826 ix86_expand_vector_extract (false, target, fp_xmm, 0);
16829 /* Not used, but eases macroization of patterns. */
16830 void
16831 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED,
16832 rtx input ATTRIBUTE_UNUSED)
16834 gcc_unreachable ();
16837 /* Convert an unsigned SImode value into a DFmode. Only currently used
16838 for SSE, but applicable anywhere. */
16840 void
16841 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
16843 REAL_VALUE_TYPE TWO31r;
16844 rtx x, fp;
16846 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
16847 NULL, 1, OPTAB_DIRECT);
16849 fp = gen_reg_rtx (DFmode);
16850 emit_insn (gen_floatsidf2 (fp, x));
16852 real_ldexp (&TWO31r, &dconst1, 31);
16853 x = const_double_from_real_value (TWO31r, DFmode);
16855 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
16856 if (x != target)
16857 emit_move_insn (target, x);
16860 /* Convert a signed DImode value into a DFmode. Only used for SSE in
16861 32-bit mode; otherwise we have a direct convert instruction. */
16863 void
16864 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
16866 REAL_VALUE_TYPE TWO32r;
16867 rtx fp_lo, fp_hi, x;
16869 fp_lo = gen_reg_rtx (DFmode);
16870 fp_hi = gen_reg_rtx (DFmode);
16872 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
16874 real_ldexp (&TWO32r, &dconst1, 32);
16875 x = const_double_from_real_value (TWO32r, DFmode);
16876 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
16878 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
16880 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
16881 0, OPTAB_DIRECT);
16882 if (x != target)
16883 emit_move_insn (target, x);
16886 /* Convert an unsigned SImode value into a SFmode, using only SSE.
16887 For x86_32, -mfpmath=sse, !optimize_size only. */
16888 void
16889 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
16891 REAL_VALUE_TYPE ONE16r;
16892 rtx fp_hi, fp_lo, int_hi, int_lo, x;
16894 real_ldexp (&ONE16r, &dconst1, 16);
16895 x = const_double_from_real_value (ONE16r, SFmode);
16896 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
16897 NULL, 0, OPTAB_DIRECT);
16898 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
16899 NULL, 0, OPTAB_DIRECT);
16900 fp_hi = gen_reg_rtx (SFmode);
16901 fp_lo = gen_reg_rtx (SFmode);
16902 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
16903 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
16904 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
16905 0, OPTAB_DIRECT);
16906 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
16907 0, OPTAB_DIRECT);
16908 if (!rtx_equal_p (target, fp_hi))
16909 emit_move_insn (target, fp_hi);
16912 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
16913 then replicate the value for all elements of the vector
16914 register. */
16917 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
16919 rtvec v;
16920 switch (mode)
16922 case V4SImode:
16923 gcc_assert (vect);
16924 v = gen_rtvec (4, value, value, value, value);
16925 return gen_rtx_CONST_VECTOR (V4SImode, v);
16927 case V2DImode:
16928 gcc_assert (vect);
16929 v = gen_rtvec (2, value, value);
16930 return gen_rtx_CONST_VECTOR (V2DImode, v);
16932 case V8SFmode:
16933 if (vect)
16934 v = gen_rtvec (8, value, value, value, value,
16935 value, value, value, value);
16936 else
16937 v = gen_rtvec (8, value, CONST0_RTX (SFmode),
16938 CONST0_RTX (SFmode), CONST0_RTX (SFmode),
16939 CONST0_RTX (SFmode), CONST0_RTX (SFmode),
16940 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
16941 return gen_rtx_CONST_VECTOR (V8SFmode, v);
16943 case V4SFmode:
16944 if (vect)
16945 v = gen_rtvec (4, value, value, value, value);
16946 else
16947 v = gen_rtvec (4, value, CONST0_RTX (SFmode),
16948 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
16949 return gen_rtx_CONST_VECTOR (V4SFmode, v);
16951 case V4DFmode:
16952 if (vect)
16953 v = gen_rtvec (4, value, value, value, value);
16954 else
16955 v = gen_rtvec (4, value, CONST0_RTX (DFmode),
16956 CONST0_RTX (DFmode), CONST0_RTX (DFmode));
16957 return gen_rtx_CONST_VECTOR (V4DFmode, v);
16959 case V2DFmode:
16960 if (vect)
16961 v = gen_rtvec (2, value, value);
16962 else
16963 v = gen_rtvec (2, value, CONST0_RTX (DFmode));
16964 return gen_rtx_CONST_VECTOR (V2DFmode, v);
16966 default:
16967 gcc_unreachable ();
16971 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
16972 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
16973 for an SSE register. If VECT is true, then replicate the mask for
16974 all elements of the vector register. If INVERT is true, then create
16975 a mask excluding the sign bit. */
16978 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
16980 enum machine_mode vec_mode, imode;
16981 HOST_WIDE_INT hi, lo;
16982 int shift = 63;
16983 rtx v;
16984 rtx mask;
16986 /* Find the sign bit, sign extended to 2*HWI. */
16987 switch (mode)
16989 case V4SImode:
16990 case V8SFmode:
16991 case V4SFmode:
16992 vec_mode = mode;
16993 mode = GET_MODE_INNER (mode);
16994 imode = SImode;
16995 lo = 0x80000000, hi = lo < 0;
16996 break;
16998 case V2DImode:
16999 case V4DFmode:
17000 case V2DFmode:
17001 vec_mode = mode;
17002 mode = GET_MODE_INNER (mode);
17003 imode = DImode;
17004 if (HOST_BITS_PER_WIDE_INT >= 64)
17005 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
17006 else
17007 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
17008 break;
17010 case TImode:
17011 case TFmode:
17012 vec_mode = VOIDmode;
17013 if (HOST_BITS_PER_WIDE_INT >= 64)
17015 imode = TImode;
17016 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
17018 else
17020 rtvec vec;
17022 imode = DImode;
17023 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
17025 if (invert)
17027 lo = ~lo, hi = ~hi;
17028 v = constm1_rtx;
17030 else
17031 v = const0_rtx;
17033 mask = immed_double_const (lo, hi, imode);
17035 vec = gen_rtvec (2, v, mask);
17036 v = gen_rtx_CONST_VECTOR (V2DImode, vec);
17037 v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
17039 return v;
17041 break;
17043 default:
17044 gcc_unreachable ();
17047 if (invert)
17048 lo = ~lo, hi = ~hi;
17050 /* Force this value into the low part of a fp vector constant. */
17051 mask = immed_double_const (lo, hi, imode);
17052 mask = gen_lowpart (mode, mask);
17054 if (vec_mode == VOIDmode)
17055 return force_reg (mode, mask);
17057 v = ix86_build_const_vector (vec_mode, vect, mask);
17058 return force_reg (vec_mode, v);
17061 /* Generate code for floating point ABS or NEG. */
17063 void
17064 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
17065 rtx operands[])
17067 rtx mask, set, dst, src;
17068 bool use_sse = false;
17069 bool vector_mode = VECTOR_MODE_P (mode);
17070 enum machine_mode vmode = mode;
17072 if (vector_mode)
17073 use_sse = true;
17074 else if (mode == TFmode)
17075 use_sse = true;
17076 else if (TARGET_SSE_MATH)
17078 use_sse = SSE_FLOAT_MODE_P (mode);
17079 if (mode == SFmode)
17080 vmode = V4SFmode;
17081 else if (mode == DFmode)
17082 vmode = V2DFmode;
17085 /* NEG and ABS performed with SSE use bitwise mask operations.
17086 Create the appropriate mask now. */
17087 if (use_sse)
17088 mask = ix86_build_signbit_mask (vmode, vector_mode, code == ABS);
17089 else
17090 mask = NULL_RTX;
17092 dst = operands[0];
17093 src = operands[1];
17095 set = gen_rtx_fmt_e (code, mode, src);
17096 set = gen_rtx_SET (VOIDmode, dst, set);
17098 if (mask)
17100 rtx use, clob;
17101 rtvec par;
17103 use = gen_rtx_USE (VOIDmode, mask);
17104 if (vector_mode)
17105 par = gen_rtvec (2, set, use);
17106 else
17108 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
17109 par = gen_rtvec (3, set, use, clob);
17111 emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
17113 else
17114 emit_insn (set);
17117 /* Expand a copysign operation. Special case operand 0 being a constant. */
17119 void
17120 ix86_expand_copysign (rtx operands[])
17122 enum machine_mode mode, vmode;
17123 rtx dest, op0, op1, mask, nmask;
17125 dest = operands[0];
17126 op0 = operands[1];
17127 op1 = operands[2];
17129 mode = GET_MODE (dest);
17131 if (mode == SFmode)
17132 vmode = V4SFmode;
17133 else if (mode == DFmode)
17134 vmode = V2DFmode;
17135 else
17136 vmode = mode;
17138 if (GET_CODE (op0) == CONST_DOUBLE)
17140 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
17142 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
17143 op0 = simplify_unary_operation (ABS, mode, op0, mode);
17145 if (mode == SFmode || mode == DFmode)
17147 if (op0 == CONST0_RTX (mode))
17148 op0 = CONST0_RTX (vmode);
17149 else
17151 rtx v = ix86_build_const_vector (vmode, false, op0);
17153 op0 = force_reg (vmode, v);
17156 else if (op0 != CONST0_RTX (mode))
17157 op0 = force_reg (mode, op0);
17159 mask = ix86_build_signbit_mask (vmode, 0, 0);
17161 if (mode == SFmode)
17162 copysign_insn = gen_copysignsf3_const;
17163 else if (mode == DFmode)
17164 copysign_insn = gen_copysigndf3_const;
17165 else
17166 copysign_insn = gen_copysigntf3_const;
17168 emit_insn (copysign_insn (dest, op0, op1, mask));
17170 else
17172 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
17174 nmask = ix86_build_signbit_mask (vmode, 0, 1);
17175 mask = ix86_build_signbit_mask (vmode, 0, 0);
17177 if (mode == SFmode)
17178 copysign_insn = gen_copysignsf3_var;
17179 else if (mode == DFmode)
17180 copysign_insn = gen_copysigndf3_var;
17181 else
17182 copysign_insn = gen_copysigntf3_var;
17184 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
17188 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
17189 be a constant, and so has already been expanded into a vector constant. */
17191 void
17192 ix86_split_copysign_const (rtx operands[])
17194 enum machine_mode mode, vmode;
17195 rtx dest, op0, mask, x;
17197 dest = operands[0];
17198 op0 = operands[1];
17199 mask = operands[3];
17201 mode = GET_MODE (dest);
17202 vmode = GET_MODE (mask);
17204 dest = simplify_gen_subreg (vmode, dest, mode, 0);
17205 x = gen_rtx_AND (vmode, dest, mask);
17206 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17208 if (op0 != CONST0_RTX (vmode))
17210 x = gen_rtx_IOR (vmode, dest, op0);
17211 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17215 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
17216 so we have to do two masks. */
17218 void
17219 ix86_split_copysign_var (rtx operands[])
17221 enum machine_mode mode, vmode;
17222 rtx dest, scratch, op0, op1, mask, nmask, x;
17224 dest = operands[0];
17225 scratch = operands[1];
17226 op0 = operands[2];
17227 op1 = operands[3];
17228 nmask = operands[4];
17229 mask = operands[5];
17231 mode = GET_MODE (dest);
17232 vmode = GET_MODE (mask);
17234 if (rtx_equal_p (op0, op1))
17236 /* Shouldn't happen often (it's useless, obviously), but when it does
17237 we'd generate incorrect code if we continue below. */
17238 emit_move_insn (dest, op0);
17239 return;
17242 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
17244 gcc_assert (REGNO (op1) == REGNO (scratch));
17246 x = gen_rtx_AND (vmode, scratch, mask);
17247 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
17249 dest = mask;
17250 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
17251 x = gen_rtx_NOT (vmode, dest);
17252 x = gen_rtx_AND (vmode, x, op0);
17253 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17255 else
17257 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
17259 x = gen_rtx_AND (vmode, scratch, mask);
17261 else /* alternative 2,4 */
17263 gcc_assert (REGNO (mask) == REGNO (scratch));
17264 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
17265 x = gen_rtx_AND (vmode, scratch, op1);
17267 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
17269 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
17271 dest = simplify_gen_subreg (vmode, op0, mode, 0);
17272 x = gen_rtx_AND (vmode, dest, nmask);
17274 else /* alternative 3,4 */
17276 gcc_assert (REGNO (nmask) == REGNO (dest));
17277 dest = nmask;
17278 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
17279 x = gen_rtx_AND (vmode, dest, op0);
17281 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17284 x = gen_rtx_IOR (vmode, dest, scratch);
17285 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17288 /* Return TRUE or FALSE depending on whether the first SET in INSN
17289 has source and destination with matching CC modes, and that the
17290 CC mode is at least as constrained as REQ_MODE. */
17292 bool
17293 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
17295 rtx set;
17296 enum machine_mode set_mode;
17298 set = PATTERN (insn);
17299 if (GET_CODE (set) == PARALLEL)
17300 set = XVECEXP (set, 0, 0);
17301 gcc_assert (GET_CODE (set) == SET);
17302 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
17304 set_mode = GET_MODE (SET_DEST (set));
17305 switch (set_mode)
17307 case CCNOmode:
17308 if (req_mode != CCNOmode
17309 && (req_mode != CCmode
17310 || XEXP (SET_SRC (set), 1) != const0_rtx))
17311 return false;
17312 break;
17313 case CCmode:
17314 if (req_mode == CCGCmode)
17315 return false;
17316 /* FALLTHRU */
17317 case CCGCmode:
17318 if (req_mode == CCGOCmode || req_mode == CCNOmode)
17319 return false;
17320 /* FALLTHRU */
17321 case CCGOCmode:
17322 if (req_mode == CCZmode)
17323 return false;
17324 /* FALLTHRU */
17325 case CCZmode:
17326 break;
17328 case CCAmode:
17329 case CCCmode:
17330 case CCOmode:
17331 case CCSmode:
17332 if (set_mode != req_mode)
17333 return false;
17334 break;
17336 default:
17337 gcc_unreachable ();
17340 return GET_MODE (SET_SRC (set)) == set_mode;
17343 /* Generate insn patterns to do an integer compare of OPERANDS. */
17345 static rtx
17346 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
17348 enum machine_mode cmpmode;
17349 rtx tmp, flags;
17351 cmpmode = SELECT_CC_MODE (code, op0, op1);
17352 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
17354 /* This is very simple, but making the interface the same as in the
17355 FP case makes the rest of the code easier. */
17356 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
17357 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
17359 /* Return the test that should be put into the flags user, i.e.
17360 the bcc, scc, or cmov instruction. */
17361 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
17364 /* Figure out whether to use ordered or unordered fp comparisons.
17365 Return the appropriate mode to use. */
17367 enum machine_mode
17368 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
17370 /* ??? In order to make all comparisons reversible, we do all comparisons
17371 non-trapping when compiling for IEEE. Once gcc is able to distinguish
17372 all forms trapping and nontrapping comparisons, we can make inequality
17373 comparisons trapping again, since it results in better code when using
17374 FCOM based compares. */
17375 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
17378 enum machine_mode
17379 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
17381 enum machine_mode mode = GET_MODE (op0);
17383 if (SCALAR_FLOAT_MODE_P (mode))
17385 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
17386 return ix86_fp_compare_mode (code);
17389 switch (code)
17391 /* Only zero flag is needed. */
17392 case EQ: /* ZF=0 */
17393 case NE: /* ZF!=0 */
17394 return CCZmode;
17395 /* Codes needing carry flag. */
17396 case GEU: /* CF=0 */
17397 case LTU: /* CF=1 */
17398 /* Detect overflow checks. They need just the carry flag. */
17399 if (GET_CODE (op0) == PLUS
17400 && rtx_equal_p (op1, XEXP (op0, 0)))
17401 return CCCmode;
17402 else
17403 return CCmode;
17404 case GTU: /* CF=0 & ZF=0 */
17405 case LEU: /* CF=1 | ZF=1 */
17406 /* Detect overflow checks. They need just the carry flag. */
17407 if (GET_CODE (op0) == MINUS
17408 && rtx_equal_p (op1, XEXP (op0, 0)))
17409 return CCCmode;
17410 else
17411 return CCmode;
17412 /* Codes possibly doable only with sign flag when
17413 comparing against zero. */
17414 case GE: /* SF=OF or SF=0 */
17415 case LT: /* SF<>OF or SF=1 */
17416 if (op1 == const0_rtx)
17417 return CCGOCmode;
17418 else
17419 /* For other cases Carry flag is not required. */
17420 return CCGCmode;
17421 /* Codes doable only with sign flag when comparing
17422 against zero, but we miss jump instruction for it
17423 so we need to use relational tests against overflow
17424 that thus needs to be zero. */
17425 case GT: /* ZF=0 & SF=OF */
17426 case LE: /* ZF=1 | SF<>OF */
17427 if (op1 == const0_rtx)
17428 return CCNOmode;
17429 else
17430 return CCGCmode;
17431 /* strcmp pattern do (use flags) and combine may ask us for proper
17432 mode. */
17433 case USE:
17434 return CCmode;
17435 default:
17436 gcc_unreachable ();
17440 /* Return the fixed registers used for condition codes. */
17442 static bool
17443 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
17445 *p1 = FLAGS_REG;
17446 *p2 = FPSR_REG;
17447 return true;
17450 /* If two condition code modes are compatible, return a condition code
17451 mode which is compatible with both. Otherwise, return
17452 VOIDmode. */
17454 static enum machine_mode
17455 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
17457 if (m1 == m2)
17458 return m1;
17460 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
17461 return VOIDmode;
17463 if ((m1 == CCGCmode && m2 == CCGOCmode)
17464 || (m1 == CCGOCmode && m2 == CCGCmode))
17465 return CCGCmode;
17467 switch (m1)
17469 default:
17470 gcc_unreachable ();
17472 case CCmode:
17473 case CCGCmode:
17474 case CCGOCmode:
17475 case CCNOmode:
17476 case CCAmode:
17477 case CCCmode:
17478 case CCOmode:
17479 case CCSmode:
17480 case CCZmode:
17481 switch (m2)
17483 default:
17484 return VOIDmode;
17486 case CCmode:
17487 case CCGCmode:
17488 case CCGOCmode:
17489 case CCNOmode:
17490 case CCAmode:
17491 case CCCmode:
17492 case CCOmode:
17493 case CCSmode:
17494 case CCZmode:
17495 return CCmode;
17498 case CCFPmode:
17499 case CCFPUmode:
17500 /* These are only compatible with themselves, which we already
17501 checked above. */
17502 return VOIDmode;
17507 /* Return a comparison we can do and that it is equivalent to
17508 swap_condition (code) apart possibly from orderedness.
17509 But, never change orderedness if TARGET_IEEE_FP, returning
17510 UNKNOWN in that case if necessary. */
17512 static enum rtx_code
17513 ix86_fp_swap_condition (enum rtx_code code)
17515 switch (code)
17517 case GT: /* GTU - CF=0 & ZF=0 */
17518 return TARGET_IEEE_FP ? UNKNOWN : UNLT;
17519 case GE: /* GEU - CF=0 */
17520 return TARGET_IEEE_FP ? UNKNOWN : UNLE;
17521 case UNLT: /* LTU - CF=1 */
17522 return TARGET_IEEE_FP ? UNKNOWN : GT;
17523 case UNLE: /* LEU - CF=1 | ZF=1 */
17524 return TARGET_IEEE_FP ? UNKNOWN : GE;
17525 default:
17526 return swap_condition (code);
17530 /* Return cost of comparison CODE using the best strategy for performance.
17531 All following functions do use number of instructions as a cost metrics.
17532 In future this should be tweaked to compute bytes for optimize_size and
17533 take into account performance of various instructions on various CPUs. */
17535 static int
17536 ix86_fp_comparison_cost (enum rtx_code code)
17538 int arith_cost;
17540 /* The cost of code using bit-twiddling on %ah. */
17541 switch (code)
17543 case UNLE:
17544 case UNLT:
17545 case LTGT:
17546 case GT:
17547 case GE:
17548 case UNORDERED:
17549 case ORDERED:
17550 case UNEQ:
17551 arith_cost = 4;
17552 break;
17553 case LT:
17554 case NE:
17555 case EQ:
17556 case UNGE:
17557 arith_cost = TARGET_IEEE_FP ? 5 : 4;
17558 break;
17559 case LE:
17560 case UNGT:
17561 arith_cost = TARGET_IEEE_FP ? 6 : 4;
17562 break;
17563 default:
17564 gcc_unreachable ();
17567 switch (ix86_fp_comparison_strategy (code))
17569 case IX86_FPCMP_COMI:
17570 return arith_cost > 4 ? 3 : 2;
17571 case IX86_FPCMP_SAHF:
17572 return arith_cost > 4 ? 4 : 3;
17573 default:
17574 return arith_cost;
17578 /* Return strategy to use for floating-point. We assume that fcomi is always
17579 preferrable where available, since that is also true when looking at size
17580 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
17582 enum ix86_fpcmp_strategy
17583 ix86_fp_comparison_strategy (enum rtx_code code ATTRIBUTE_UNUSED)
17585 /* Do fcomi/sahf based test when profitable. */
17587 if (TARGET_CMOVE)
17588 return IX86_FPCMP_COMI;
17590 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_function_for_size_p (cfun)))
17591 return IX86_FPCMP_SAHF;
17593 return IX86_FPCMP_ARITH;
17596 /* Swap, force into registers, or otherwise massage the two operands
17597 to a fp comparison. The operands are updated in place; the new
17598 comparison code is returned. */
17600 static enum rtx_code
17601 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
17603 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
17604 rtx op0 = *pop0, op1 = *pop1;
17605 enum machine_mode op_mode = GET_MODE (op0);
17606 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
17608 /* All of the unordered compare instructions only work on registers.
17609 The same is true of the fcomi compare instructions. The XFmode
17610 compare instructions require registers except when comparing
17611 against zero or when converting operand 1 from fixed point to
17612 floating point. */
17614 if (!is_sse
17615 && (fpcmp_mode == CCFPUmode
17616 || (op_mode == XFmode
17617 && ! (standard_80387_constant_p (op0) == 1
17618 || standard_80387_constant_p (op1) == 1)
17619 && GET_CODE (op1) != FLOAT)
17620 || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
17622 op0 = force_reg (op_mode, op0);
17623 op1 = force_reg (op_mode, op1);
17625 else
17627 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
17628 things around if they appear profitable, otherwise force op0
17629 into a register. */
17631 if (standard_80387_constant_p (op0) == 0
17632 || (MEM_P (op0)
17633 && ! (standard_80387_constant_p (op1) == 0
17634 || MEM_P (op1))))
17636 enum rtx_code new_code = ix86_fp_swap_condition (code);
17637 if (new_code != UNKNOWN)
17639 rtx tmp;
17640 tmp = op0, op0 = op1, op1 = tmp;
17641 code = new_code;
17645 if (!REG_P (op0))
17646 op0 = force_reg (op_mode, op0);
17648 if (CONSTANT_P (op1))
17650 int tmp = standard_80387_constant_p (op1);
17651 if (tmp == 0)
17652 op1 = validize_mem (force_const_mem (op_mode, op1));
17653 else if (tmp == 1)
17655 if (TARGET_CMOVE)
17656 op1 = force_reg (op_mode, op1);
17658 else
17659 op1 = force_reg (op_mode, op1);
17663 /* Try to rearrange the comparison to make it cheaper. */
17664 if (ix86_fp_comparison_cost (code)
17665 > ix86_fp_comparison_cost (swap_condition (code))
17666 && (REG_P (op1) || can_create_pseudo_p ()))
17668 rtx tmp;
17669 tmp = op0, op0 = op1, op1 = tmp;
17670 code = swap_condition (code);
17671 if (!REG_P (op0))
17672 op0 = force_reg (op_mode, op0);
17675 *pop0 = op0;
17676 *pop1 = op1;
17677 return code;
17680 /* Convert comparison codes we use to represent FP comparison to integer
17681 code that will result in proper branch. Return UNKNOWN if no such code
17682 is available. */
17684 enum rtx_code
17685 ix86_fp_compare_code_to_integer (enum rtx_code code)
17687 switch (code)
17689 case GT:
17690 return GTU;
17691 case GE:
17692 return GEU;
17693 case ORDERED:
17694 case UNORDERED:
17695 return code;
17696 break;
17697 case UNEQ:
17698 return EQ;
17699 break;
17700 case UNLT:
17701 return LTU;
17702 break;
17703 case UNLE:
17704 return LEU;
17705 break;
17706 case LTGT:
17707 return NE;
17708 break;
17709 default:
17710 return UNKNOWN;
17714 /* Generate insn patterns to do a floating point compare of OPERANDS. */
17716 static rtx
17717 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch)
17719 enum machine_mode fpcmp_mode, intcmp_mode;
17720 rtx tmp, tmp2;
17722 fpcmp_mode = ix86_fp_compare_mode (code);
17723 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
17725 /* Do fcomi/sahf based test when profitable. */
17726 switch (ix86_fp_comparison_strategy (code))
17728 case IX86_FPCMP_COMI:
17729 intcmp_mode = fpcmp_mode;
17730 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
17731 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
17732 tmp);
17733 emit_insn (tmp);
17734 break;
17736 case IX86_FPCMP_SAHF:
17737 intcmp_mode = fpcmp_mode;
17738 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
17739 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
17740 tmp);
17742 if (!scratch)
17743 scratch = gen_reg_rtx (HImode);
17744 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
17745 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
17746 break;
17748 case IX86_FPCMP_ARITH:
17749 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
17750 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
17751 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
17752 if (!scratch)
17753 scratch = gen_reg_rtx (HImode);
17754 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
17756 /* In the unordered case, we have to check C2 for NaN's, which
17757 doesn't happen to work out to anything nice combination-wise.
17758 So do some bit twiddling on the value we've got in AH to come
17759 up with an appropriate set of condition codes. */
17761 intcmp_mode = CCNOmode;
17762 switch (code)
17764 case GT:
17765 case UNGT:
17766 if (code == GT || !TARGET_IEEE_FP)
17768 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
17769 code = EQ;
17771 else
17773 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17774 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
17775 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
17776 intcmp_mode = CCmode;
17777 code = GEU;
17779 break;
17780 case LT:
17781 case UNLT:
17782 if (code == LT && TARGET_IEEE_FP)
17784 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17785 emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx));
17786 intcmp_mode = CCmode;
17787 code = EQ;
17789 else
17791 emit_insn (gen_testqi_ext_ccno_0 (scratch, const1_rtx));
17792 code = NE;
17794 break;
17795 case GE:
17796 case UNGE:
17797 if (code == GE || !TARGET_IEEE_FP)
17799 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
17800 code = EQ;
17802 else
17804 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17805 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, const1_rtx));
17806 code = NE;
17808 break;
17809 case LE:
17810 case UNLE:
17811 if (code == LE && TARGET_IEEE_FP)
17813 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17814 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
17815 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
17816 intcmp_mode = CCmode;
17817 code = LTU;
17819 else
17821 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
17822 code = NE;
17824 break;
17825 case EQ:
17826 case UNEQ:
17827 if (code == EQ && TARGET_IEEE_FP)
17829 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17830 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
17831 intcmp_mode = CCmode;
17832 code = EQ;
17834 else
17836 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
17837 code = NE;
17839 break;
17840 case NE:
17841 case LTGT:
17842 if (code == NE && TARGET_IEEE_FP)
17844 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17845 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
17846 GEN_INT (0x40)));
17847 code = NE;
17849 else
17851 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
17852 code = EQ;
17854 break;
17856 case UNORDERED:
17857 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
17858 code = NE;
17859 break;
17860 case ORDERED:
17861 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
17862 code = EQ;
17863 break;
17865 default:
17866 gcc_unreachable ();
17868 break;
17870 default:
17871 gcc_unreachable();
17874 /* Return the test that should be put into the flags user, i.e.
17875 the bcc, scc, or cmov instruction. */
17876 return gen_rtx_fmt_ee (code, VOIDmode,
17877 gen_rtx_REG (intcmp_mode, FLAGS_REG),
17878 const0_rtx);
17881 static rtx
17882 ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1)
17884 rtx ret;
17886 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
17887 ret = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
17889 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
17891 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
17892 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
17894 else
17895 ret = ix86_expand_int_compare (code, op0, op1);
17897 return ret;
17900 void
17901 ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label)
17903 enum machine_mode mode = GET_MODE (op0);
17904 rtx tmp;
17906 switch (mode)
17908 case SFmode:
17909 case DFmode:
17910 case XFmode:
17911 case QImode:
17912 case HImode:
17913 case SImode:
17914 simple:
17915 tmp = ix86_expand_compare (code, op0, op1);
17916 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
17917 gen_rtx_LABEL_REF (VOIDmode, label),
17918 pc_rtx);
17919 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
17920 return;
17922 case DImode:
17923 if (TARGET_64BIT)
17924 goto simple;
17925 case TImode:
17926 /* Expand DImode branch into multiple compare+branch. */
17928 rtx lo[2], hi[2], label2;
17929 enum rtx_code code1, code2, code3;
17930 enum machine_mode submode;
17932 if (CONSTANT_P (op0) && !CONSTANT_P (op1))
17934 tmp = op0, op0 = op1, op1 = tmp;
17935 code = swap_condition (code);
17938 split_double_mode (mode, &op0, 1, lo+0, hi+0);
17939 split_double_mode (mode, &op1, 1, lo+1, hi+1);
17941 submode = mode == DImode ? SImode : DImode;
17943 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
17944 avoid two branches. This costs one extra insn, so disable when
17945 optimizing for size. */
17947 if ((code == EQ || code == NE)
17948 && (!optimize_insn_for_size_p ()
17949 || hi[1] == const0_rtx || lo[1] == const0_rtx))
17951 rtx xor0, xor1;
17953 xor1 = hi[0];
17954 if (hi[1] != const0_rtx)
17955 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
17956 NULL_RTX, 0, OPTAB_WIDEN);
17958 xor0 = lo[0];
17959 if (lo[1] != const0_rtx)
17960 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
17961 NULL_RTX, 0, OPTAB_WIDEN);
17963 tmp = expand_binop (submode, ior_optab, xor1, xor0,
17964 NULL_RTX, 0, OPTAB_WIDEN);
17966 ix86_expand_branch (code, tmp, const0_rtx, label);
17967 return;
17970 /* Otherwise, if we are doing less-than or greater-or-equal-than,
17971 op1 is a constant and the low word is zero, then we can just
17972 examine the high word. Similarly for low word -1 and
17973 less-or-equal-than or greater-than. */
17975 if (CONST_INT_P (hi[1]))
17976 switch (code)
17978 case LT: case LTU: case GE: case GEU:
17979 if (lo[1] == const0_rtx)
17981 ix86_expand_branch (code, hi[0], hi[1], label);
17982 return;
17984 break;
17985 case LE: case LEU: case GT: case GTU:
17986 if (lo[1] == constm1_rtx)
17988 ix86_expand_branch (code, hi[0], hi[1], label);
17989 return;
17991 break;
17992 default:
17993 break;
17996 /* Otherwise, we need two or three jumps. */
17998 label2 = gen_label_rtx ();
18000 code1 = code;
18001 code2 = swap_condition (code);
18002 code3 = unsigned_condition (code);
18004 switch (code)
18006 case LT: case GT: case LTU: case GTU:
18007 break;
18009 case LE: code1 = LT; code2 = GT; break;
18010 case GE: code1 = GT; code2 = LT; break;
18011 case LEU: code1 = LTU; code2 = GTU; break;
18012 case GEU: code1 = GTU; code2 = LTU; break;
18014 case EQ: code1 = UNKNOWN; code2 = NE; break;
18015 case NE: code2 = UNKNOWN; break;
18017 default:
18018 gcc_unreachable ();
18022 * a < b =>
18023 * if (hi(a) < hi(b)) goto true;
18024 * if (hi(a) > hi(b)) goto false;
18025 * if (lo(a) < lo(b)) goto true;
18026 * false:
18029 if (code1 != UNKNOWN)
18030 ix86_expand_branch (code1, hi[0], hi[1], label);
18031 if (code2 != UNKNOWN)
18032 ix86_expand_branch (code2, hi[0], hi[1], label2);
18034 ix86_expand_branch (code3, lo[0], lo[1], label);
18036 if (code2 != UNKNOWN)
18037 emit_label (label2);
18038 return;
18041 default:
18042 gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC);
18043 goto simple;
18047 /* Split branch based on floating point condition. */
18048 void
18049 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
18050 rtx target1, rtx target2, rtx tmp, rtx pushed)
18052 rtx condition;
18053 rtx i;
18055 if (target2 != pc_rtx)
18057 rtx tmp = target2;
18058 code = reverse_condition_maybe_unordered (code);
18059 target2 = target1;
18060 target1 = tmp;
18063 condition = ix86_expand_fp_compare (code, op1, op2,
18064 tmp);
18066 /* Remove pushed operand from stack. */
18067 if (pushed)
18068 ix86_free_from_memory (GET_MODE (pushed));
18070 i = emit_jump_insn (gen_rtx_SET
18071 (VOIDmode, pc_rtx,
18072 gen_rtx_IF_THEN_ELSE (VOIDmode,
18073 condition, target1, target2)));
18074 if (split_branch_probability >= 0)
18075 add_reg_note (i, REG_BR_PROB, GEN_INT (split_branch_probability));
18078 void
18079 ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1)
18081 rtx ret;
18083 gcc_assert (GET_MODE (dest) == QImode);
18085 ret = ix86_expand_compare (code, op0, op1);
18086 PUT_MODE (ret, QImode);
18087 emit_insn (gen_rtx_SET (VOIDmode, dest, ret));
18090 /* Expand comparison setting or clearing carry flag. Return true when
18091 successful and set pop for the operation. */
18092 static bool
18093 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
18095 enum machine_mode mode =
18096 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
18098 /* Do not handle double-mode compares that go through special path. */
18099 if (mode == (TARGET_64BIT ? TImode : DImode))
18100 return false;
18102 if (SCALAR_FLOAT_MODE_P (mode))
18104 rtx compare_op, compare_seq;
18106 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
18108 /* Shortcut: following common codes never translate
18109 into carry flag compares. */
18110 if (code == EQ || code == NE || code == UNEQ || code == LTGT
18111 || code == ORDERED || code == UNORDERED)
18112 return false;
18114 /* These comparisons require zero flag; swap operands so they won't. */
18115 if ((code == GT || code == UNLE || code == LE || code == UNGT)
18116 && !TARGET_IEEE_FP)
18118 rtx tmp = op0;
18119 op0 = op1;
18120 op1 = tmp;
18121 code = swap_condition (code);
18124 /* Try to expand the comparison and verify that we end up with
18125 carry flag based comparison. This fails to be true only when
18126 we decide to expand comparison using arithmetic that is not
18127 too common scenario. */
18128 start_sequence ();
18129 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
18130 compare_seq = get_insns ();
18131 end_sequence ();
18133 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
18134 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
18135 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
18136 else
18137 code = GET_CODE (compare_op);
18139 if (code != LTU && code != GEU)
18140 return false;
18142 emit_insn (compare_seq);
18143 *pop = compare_op;
18144 return true;
18147 if (!INTEGRAL_MODE_P (mode))
18148 return false;
18150 switch (code)
18152 case LTU:
18153 case GEU:
18154 break;
18156 /* Convert a==0 into (unsigned)a<1. */
18157 case EQ:
18158 case NE:
18159 if (op1 != const0_rtx)
18160 return false;
18161 op1 = const1_rtx;
18162 code = (code == EQ ? LTU : GEU);
18163 break;
18165 /* Convert a>b into b<a or a>=b-1. */
18166 case GTU:
18167 case LEU:
18168 if (CONST_INT_P (op1))
18170 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
18171 /* Bail out on overflow. We still can swap operands but that
18172 would force loading of the constant into register. */
18173 if (op1 == const0_rtx
18174 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
18175 return false;
18176 code = (code == GTU ? GEU : LTU);
18178 else
18180 rtx tmp = op1;
18181 op1 = op0;
18182 op0 = tmp;
18183 code = (code == GTU ? LTU : GEU);
18185 break;
18187 /* Convert a>=0 into (unsigned)a<0x80000000. */
18188 case LT:
18189 case GE:
18190 if (mode == DImode || op1 != const0_rtx)
18191 return false;
18192 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
18193 code = (code == LT ? GEU : LTU);
18194 break;
18195 case LE:
18196 case GT:
18197 if (mode == DImode || op1 != constm1_rtx)
18198 return false;
18199 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
18200 code = (code == LE ? GEU : LTU);
18201 break;
18203 default:
18204 return false;
18206 /* Swapping operands may cause constant to appear as first operand. */
18207 if (!nonimmediate_operand (op0, VOIDmode))
18209 if (!can_create_pseudo_p ())
18210 return false;
18211 op0 = force_reg (mode, op0);
18213 *pop = ix86_expand_compare (code, op0, op1);
18214 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
18215 return true;
18218 bool
18219 ix86_expand_int_movcc (rtx operands[])
18221 enum rtx_code code = GET_CODE (operands[1]), compare_code;
18222 rtx compare_seq, compare_op;
18223 enum machine_mode mode = GET_MODE (operands[0]);
18224 bool sign_bit_compare_p = false;
18225 rtx op0 = XEXP (operands[1], 0);
18226 rtx op1 = XEXP (operands[1], 1);
18228 start_sequence ();
18229 compare_op = ix86_expand_compare (code, op0, op1);
18230 compare_seq = get_insns ();
18231 end_sequence ();
18233 compare_code = GET_CODE (compare_op);
18235 if ((op1 == const0_rtx && (code == GE || code == LT))
18236 || (op1 == constm1_rtx && (code == GT || code == LE)))
18237 sign_bit_compare_p = true;
18239 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
18240 HImode insns, we'd be swallowed in word prefix ops. */
18242 if ((mode != HImode || TARGET_FAST_PREFIX)
18243 && (mode != (TARGET_64BIT ? TImode : DImode))
18244 && CONST_INT_P (operands[2])
18245 && CONST_INT_P (operands[3]))
18247 rtx out = operands[0];
18248 HOST_WIDE_INT ct = INTVAL (operands[2]);
18249 HOST_WIDE_INT cf = INTVAL (operands[3]);
18250 HOST_WIDE_INT diff;
18252 diff = ct - cf;
18253 /* Sign bit compares are better done using shifts than we do by using
18254 sbb. */
18255 if (sign_bit_compare_p
18256 || ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
18258 /* Detect overlap between destination and compare sources. */
18259 rtx tmp = out;
18261 if (!sign_bit_compare_p)
18263 rtx flags;
18264 bool fpcmp = false;
18266 compare_code = GET_CODE (compare_op);
18268 flags = XEXP (compare_op, 0);
18270 if (GET_MODE (flags) == CCFPmode
18271 || GET_MODE (flags) == CCFPUmode)
18273 fpcmp = true;
18274 compare_code
18275 = ix86_fp_compare_code_to_integer (compare_code);
18278 /* To simplify rest of code, restrict to the GEU case. */
18279 if (compare_code == LTU)
18281 HOST_WIDE_INT tmp = ct;
18282 ct = cf;
18283 cf = tmp;
18284 compare_code = reverse_condition (compare_code);
18285 code = reverse_condition (code);
18287 else
18289 if (fpcmp)
18290 PUT_CODE (compare_op,
18291 reverse_condition_maybe_unordered
18292 (GET_CODE (compare_op)));
18293 else
18294 PUT_CODE (compare_op,
18295 reverse_condition (GET_CODE (compare_op)));
18297 diff = ct - cf;
18299 if (reg_overlap_mentioned_p (out, op0)
18300 || reg_overlap_mentioned_p (out, op1))
18301 tmp = gen_reg_rtx (mode);
18303 if (mode == DImode)
18304 emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op));
18305 else
18306 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp),
18307 flags, compare_op));
18309 else
18311 if (code == GT || code == GE)
18312 code = reverse_condition (code);
18313 else
18315 HOST_WIDE_INT tmp = ct;
18316 ct = cf;
18317 cf = tmp;
18318 diff = ct - cf;
18320 tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1);
18323 if (diff == 1)
18326 * cmpl op0,op1
18327 * sbbl dest,dest
18328 * [addl dest, ct]
18330 * Size 5 - 8.
18332 if (ct)
18333 tmp = expand_simple_binop (mode, PLUS,
18334 tmp, GEN_INT (ct),
18335 copy_rtx (tmp), 1, OPTAB_DIRECT);
18337 else if (cf == -1)
18340 * cmpl op0,op1
18341 * sbbl dest,dest
18342 * orl $ct, dest
18344 * Size 8.
18346 tmp = expand_simple_binop (mode, IOR,
18347 tmp, GEN_INT (ct),
18348 copy_rtx (tmp), 1, OPTAB_DIRECT);
18350 else if (diff == -1 && ct)
18353 * cmpl op0,op1
18354 * sbbl dest,dest
18355 * notl dest
18356 * [addl dest, cf]
18358 * Size 8 - 11.
18360 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
18361 if (cf)
18362 tmp = expand_simple_binop (mode, PLUS,
18363 copy_rtx (tmp), GEN_INT (cf),
18364 copy_rtx (tmp), 1, OPTAB_DIRECT);
18366 else
18369 * cmpl op0,op1
18370 * sbbl dest,dest
18371 * [notl dest]
18372 * andl cf - ct, dest
18373 * [addl dest, ct]
18375 * Size 8 - 11.
18378 if (cf == 0)
18380 cf = ct;
18381 ct = 0;
18382 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
18385 tmp = expand_simple_binop (mode, AND,
18386 copy_rtx (tmp),
18387 gen_int_mode (cf - ct, mode),
18388 copy_rtx (tmp), 1, OPTAB_DIRECT);
18389 if (ct)
18390 tmp = expand_simple_binop (mode, PLUS,
18391 copy_rtx (tmp), GEN_INT (ct),
18392 copy_rtx (tmp), 1, OPTAB_DIRECT);
18395 if (!rtx_equal_p (tmp, out))
18396 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
18398 return true;
18401 if (diff < 0)
18403 enum machine_mode cmp_mode = GET_MODE (op0);
18405 HOST_WIDE_INT tmp;
18406 tmp = ct, ct = cf, cf = tmp;
18407 diff = -diff;
18409 if (SCALAR_FLOAT_MODE_P (cmp_mode))
18411 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
18413 /* We may be reversing unordered compare to normal compare, that
18414 is not valid in general (we may convert non-trapping condition
18415 to trapping one), however on i386 we currently emit all
18416 comparisons unordered. */
18417 compare_code = reverse_condition_maybe_unordered (compare_code);
18418 code = reverse_condition_maybe_unordered (code);
18420 else
18422 compare_code = reverse_condition (compare_code);
18423 code = reverse_condition (code);
18427 compare_code = UNKNOWN;
18428 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT
18429 && CONST_INT_P (op1))
18431 if (op1 == const0_rtx
18432 && (code == LT || code == GE))
18433 compare_code = code;
18434 else if (op1 == constm1_rtx)
18436 if (code == LE)
18437 compare_code = LT;
18438 else if (code == GT)
18439 compare_code = GE;
18443 /* Optimize dest = (op0 < 0) ? -1 : cf. */
18444 if (compare_code != UNKNOWN
18445 && GET_MODE (op0) == GET_MODE (out)
18446 && (cf == -1 || ct == -1))
18448 /* If lea code below could be used, only optimize
18449 if it results in a 2 insn sequence. */
18451 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
18452 || diff == 3 || diff == 5 || diff == 9)
18453 || (compare_code == LT && ct == -1)
18454 || (compare_code == GE && cf == -1))
18457 * notl op1 (if necessary)
18458 * sarl $31, op1
18459 * orl cf, op1
18461 if (ct != -1)
18463 cf = ct;
18464 ct = -1;
18465 code = reverse_condition (code);
18468 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
18470 out = expand_simple_binop (mode, IOR,
18471 out, GEN_INT (cf),
18472 out, 1, OPTAB_DIRECT);
18473 if (out != operands[0])
18474 emit_move_insn (operands[0], out);
18476 return true;
18481 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
18482 || diff == 3 || diff == 5 || diff == 9)
18483 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
18484 && (mode != DImode
18485 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
18488 * xorl dest,dest
18489 * cmpl op1,op2
18490 * setcc dest
18491 * lea cf(dest*(ct-cf)),dest
18493 * Size 14.
18495 * This also catches the degenerate setcc-only case.
18498 rtx tmp;
18499 int nops;
18501 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
18503 nops = 0;
18504 /* On x86_64 the lea instruction operates on Pmode, so we need
18505 to get arithmetics done in proper mode to match. */
18506 if (diff == 1)
18507 tmp = copy_rtx (out);
18508 else
18510 rtx out1;
18511 out1 = copy_rtx (out);
18512 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
18513 nops++;
18514 if (diff & 1)
18516 tmp = gen_rtx_PLUS (mode, tmp, out1);
18517 nops++;
18520 if (cf != 0)
18522 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
18523 nops++;
18525 if (!rtx_equal_p (tmp, out))
18527 if (nops == 1)
18528 out = force_operand (tmp, copy_rtx (out));
18529 else
18530 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
18532 if (!rtx_equal_p (out, operands[0]))
18533 emit_move_insn (operands[0], copy_rtx (out));
18535 return true;
18539 * General case: Jumpful:
18540 * xorl dest,dest cmpl op1, op2
18541 * cmpl op1, op2 movl ct, dest
18542 * setcc dest jcc 1f
18543 * decl dest movl cf, dest
18544 * andl (cf-ct),dest 1:
18545 * addl ct,dest
18547 * Size 20. Size 14.
18549 * This is reasonably steep, but branch mispredict costs are
18550 * high on modern cpus, so consider failing only if optimizing
18551 * for space.
18554 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
18555 && BRANCH_COST (optimize_insn_for_speed_p (),
18556 false) >= 2)
18558 if (cf == 0)
18560 enum machine_mode cmp_mode = GET_MODE (op0);
18562 cf = ct;
18563 ct = 0;
18565 if (SCALAR_FLOAT_MODE_P (cmp_mode))
18567 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
18569 /* We may be reversing unordered compare to normal compare,
18570 that is not valid in general (we may convert non-trapping
18571 condition to trapping one), however on i386 we currently
18572 emit all comparisons unordered. */
18573 code = reverse_condition_maybe_unordered (code);
18575 else
18577 code = reverse_condition (code);
18578 if (compare_code != UNKNOWN)
18579 compare_code = reverse_condition (compare_code);
18583 if (compare_code != UNKNOWN)
18585 /* notl op1 (if needed)
18586 sarl $31, op1
18587 andl (cf-ct), op1
18588 addl ct, op1
18590 For x < 0 (resp. x <= -1) there will be no notl,
18591 so if possible swap the constants to get rid of the
18592 complement.
18593 True/false will be -1/0 while code below (store flag
18594 followed by decrement) is 0/-1, so the constants need
18595 to be exchanged once more. */
18597 if (compare_code == GE || !cf)
18599 code = reverse_condition (code);
18600 compare_code = LT;
18602 else
18604 HOST_WIDE_INT tmp = cf;
18605 cf = ct;
18606 ct = tmp;
18609 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
18611 else
18613 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
18615 out = expand_simple_binop (mode, PLUS, copy_rtx (out),
18616 constm1_rtx,
18617 copy_rtx (out), 1, OPTAB_DIRECT);
18620 out = expand_simple_binop (mode, AND, copy_rtx (out),
18621 gen_int_mode (cf - ct, mode),
18622 copy_rtx (out), 1, OPTAB_DIRECT);
18623 if (ct)
18624 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
18625 copy_rtx (out), 1, OPTAB_DIRECT);
18626 if (!rtx_equal_p (out, operands[0]))
18627 emit_move_insn (operands[0], copy_rtx (out));
18629 return true;
18633 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
18635 /* Try a few things more with specific constants and a variable. */
18637 optab op;
18638 rtx var, orig_out, out, tmp;
18640 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
18641 return false;
18643 /* If one of the two operands is an interesting constant, load a
18644 constant with the above and mask it in with a logical operation. */
18646 if (CONST_INT_P (operands[2]))
18648 var = operands[3];
18649 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
18650 operands[3] = constm1_rtx, op = and_optab;
18651 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
18652 operands[3] = const0_rtx, op = ior_optab;
18653 else
18654 return false;
18656 else if (CONST_INT_P (operands[3]))
18658 var = operands[2];
18659 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
18660 operands[2] = constm1_rtx, op = and_optab;
18661 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
18662 operands[2] = const0_rtx, op = ior_optab;
18663 else
18664 return false;
18666 else
18667 return false;
18669 orig_out = operands[0];
18670 tmp = gen_reg_rtx (mode);
18671 operands[0] = tmp;
18673 /* Recurse to get the constant loaded. */
18674 if (ix86_expand_int_movcc (operands) == 0)
18675 return false;
18677 /* Mask in the interesting variable. */
18678 out = expand_binop (mode, op, var, tmp, orig_out, 0,
18679 OPTAB_WIDEN);
18680 if (!rtx_equal_p (out, orig_out))
18681 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
18683 return true;
18687 * For comparison with above,
18689 * movl cf,dest
18690 * movl ct,tmp
18691 * cmpl op1,op2
18692 * cmovcc tmp,dest
18694 * Size 15.
18697 if (! nonimmediate_operand (operands[2], mode))
18698 operands[2] = force_reg (mode, operands[2]);
18699 if (! nonimmediate_operand (operands[3], mode))
18700 operands[3] = force_reg (mode, operands[3]);
18702 if (! register_operand (operands[2], VOIDmode)
18703 && (mode == QImode
18704 || ! register_operand (operands[3], VOIDmode)))
18705 operands[2] = force_reg (mode, operands[2]);
18707 if (mode == QImode
18708 && ! register_operand (operands[3], VOIDmode))
18709 operands[3] = force_reg (mode, operands[3]);
18711 emit_insn (compare_seq);
18712 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
18713 gen_rtx_IF_THEN_ELSE (mode,
18714 compare_op, operands[2],
18715 operands[3])));
18716 return true;
18719 /* Swap, force into registers, or otherwise massage the two operands
18720 to an sse comparison with a mask result. Thus we differ a bit from
18721 ix86_prepare_fp_compare_args which expects to produce a flags result.
18723 The DEST operand exists to help determine whether to commute commutative
18724 operators. The POP0/POP1 operands are updated in place. The new
18725 comparison code is returned, or UNKNOWN if not implementable. */
18727 static enum rtx_code
18728 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
18729 rtx *pop0, rtx *pop1)
18731 rtx tmp;
18733 switch (code)
18735 case LTGT:
18736 case UNEQ:
18737 /* We have no LTGT as an operator. We could implement it with
18738 NE & ORDERED, but this requires an extra temporary. It's
18739 not clear that it's worth it. */
18740 return UNKNOWN;
18742 case LT:
18743 case LE:
18744 case UNGT:
18745 case UNGE:
18746 /* These are supported directly. */
18747 break;
18749 case EQ:
18750 case NE:
18751 case UNORDERED:
18752 case ORDERED:
18753 /* For commutative operators, try to canonicalize the destination
18754 operand to be first in the comparison - this helps reload to
18755 avoid extra moves. */
18756 if (!dest || !rtx_equal_p (dest, *pop1))
18757 break;
18758 /* FALLTHRU */
18760 case GE:
18761 case GT:
18762 case UNLE:
18763 case UNLT:
18764 /* These are not supported directly. Swap the comparison operands
18765 to transform into something that is supported. */
18766 tmp = *pop0;
18767 *pop0 = *pop1;
18768 *pop1 = tmp;
18769 code = swap_condition (code);
18770 break;
18772 default:
18773 gcc_unreachable ();
18776 return code;
18779 /* Detect conditional moves that exactly match min/max operational
18780 semantics. Note that this is IEEE safe, as long as we don't
18781 interchange the operands.
18783 Returns FALSE if this conditional move doesn't match a MIN/MAX,
18784 and TRUE if the operation is successful and instructions are emitted. */
18786 static bool
18787 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
18788 rtx cmp_op1, rtx if_true, rtx if_false)
18790 enum machine_mode mode;
18791 bool is_min;
18792 rtx tmp;
18794 if (code == LT)
18796 else if (code == UNGE)
18798 tmp = if_true;
18799 if_true = if_false;
18800 if_false = tmp;
18802 else
18803 return false;
18805 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
18806 is_min = true;
18807 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
18808 is_min = false;
18809 else
18810 return false;
18812 mode = GET_MODE (dest);
18814 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
18815 but MODE may be a vector mode and thus not appropriate. */
18816 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
18818 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
18819 rtvec v;
18821 if_true = force_reg (mode, if_true);
18822 v = gen_rtvec (2, if_true, if_false);
18823 tmp = gen_rtx_UNSPEC (mode, v, u);
18825 else
18827 code = is_min ? SMIN : SMAX;
18828 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
18831 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
18832 return true;
18835 /* Expand an sse vector comparison. Return the register with the result. */
18837 static rtx
18838 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
18839 rtx op_true, rtx op_false)
18841 enum machine_mode mode = GET_MODE (dest);
18842 rtx x;
18844 cmp_op0 = force_reg (mode, cmp_op0);
18845 if (!nonimmediate_operand (cmp_op1, mode))
18846 cmp_op1 = force_reg (mode, cmp_op1);
18848 if (optimize
18849 || reg_overlap_mentioned_p (dest, op_true)
18850 || reg_overlap_mentioned_p (dest, op_false))
18851 dest = gen_reg_rtx (mode);
18853 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
18854 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
18856 return dest;
18859 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
18860 operations. This is used for both scalar and vector conditional moves. */
18862 static void
18863 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
18865 enum machine_mode mode = GET_MODE (dest);
18866 rtx t2, t3, x;
18868 if (op_false == CONST0_RTX (mode))
18870 op_true = force_reg (mode, op_true);
18871 x = gen_rtx_AND (mode, cmp, op_true);
18872 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
18874 else if (op_true == CONST0_RTX (mode))
18876 op_false = force_reg (mode, op_false);
18877 x = gen_rtx_NOT (mode, cmp);
18878 x = gen_rtx_AND (mode, x, op_false);
18879 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
18881 else if (TARGET_XOP)
18883 rtx pcmov = gen_rtx_SET (mode, dest,
18884 gen_rtx_IF_THEN_ELSE (mode, cmp,
18885 op_true,
18886 op_false));
18887 emit_insn (pcmov);
18889 else
18891 op_true = force_reg (mode, op_true);
18892 op_false = force_reg (mode, op_false);
18894 t2 = gen_reg_rtx (mode);
18895 if (optimize)
18896 t3 = gen_reg_rtx (mode);
18897 else
18898 t3 = dest;
18900 x = gen_rtx_AND (mode, op_true, cmp);
18901 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
18903 x = gen_rtx_NOT (mode, cmp);
18904 x = gen_rtx_AND (mode, x, op_false);
18905 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
18907 x = gen_rtx_IOR (mode, t3, t2);
18908 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
18912 /* Expand a floating-point conditional move. Return true if successful. */
18914 bool
18915 ix86_expand_fp_movcc (rtx operands[])
18917 enum machine_mode mode = GET_MODE (operands[0]);
18918 enum rtx_code code = GET_CODE (operands[1]);
18919 rtx tmp, compare_op;
18920 rtx op0 = XEXP (operands[1], 0);
18921 rtx op1 = XEXP (operands[1], 1);
18923 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
18925 enum machine_mode cmode;
18927 /* Since we've no cmove for sse registers, don't force bad register
18928 allocation just to gain access to it. Deny movcc when the
18929 comparison mode doesn't match the move mode. */
18930 cmode = GET_MODE (op0);
18931 if (cmode == VOIDmode)
18932 cmode = GET_MODE (op1);
18933 if (cmode != mode)
18934 return false;
18936 code = ix86_prepare_sse_fp_compare_args (operands[0], code, &op0, &op1);
18937 if (code == UNKNOWN)
18938 return false;
18940 if (ix86_expand_sse_fp_minmax (operands[0], code, op0, op1,
18941 operands[2], operands[3]))
18942 return true;
18944 tmp = ix86_expand_sse_cmp (operands[0], code, op0, op1,
18945 operands[2], operands[3]);
18946 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
18947 return true;
18950 /* The floating point conditional move instructions don't directly
18951 support conditions resulting from a signed integer comparison. */
18953 compare_op = ix86_expand_compare (code, op0, op1);
18954 if (!fcmov_comparison_operator (compare_op, VOIDmode))
18956 tmp = gen_reg_rtx (QImode);
18957 ix86_expand_setcc (tmp, code, op0, op1);
18959 compare_op = ix86_expand_compare (NE, tmp, const0_rtx);
18962 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
18963 gen_rtx_IF_THEN_ELSE (mode, compare_op,
18964 operands[2], operands[3])));
18966 return true;
18969 /* Expand a floating-point vector conditional move; a vcond operation
18970 rather than a movcc operation. */
18972 bool
18973 ix86_expand_fp_vcond (rtx operands[])
18975 enum rtx_code code = GET_CODE (operands[3]);
18976 rtx cmp;
18978 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
18979 &operands[4], &operands[5]);
18980 if (code == UNKNOWN)
18981 return false;
18983 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
18984 operands[5], operands[1], operands[2]))
18985 return true;
18987 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
18988 operands[1], operands[2]);
18989 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
18990 return true;
18993 /* Expand a signed/unsigned integral vector conditional move. */
18995 bool
18996 ix86_expand_int_vcond (rtx operands[])
18998 enum machine_mode mode = GET_MODE (operands[0]);
18999 enum rtx_code code = GET_CODE (operands[3]);
19000 bool negate = false;
19001 rtx x, cop0, cop1;
19003 cop0 = operands[4];
19004 cop1 = operands[5];
19006 /* XOP supports all of the comparisons on all vector int types. */
19007 if (!TARGET_XOP)
19009 /* Canonicalize the comparison to EQ, GT, GTU. */
19010 switch (code)
19012 case EQ:
19013 case GT:
19014 case GTU:
19015 break;
19017 case NE:
19018 case LE:
19019 case LEU:
19020 code = reverse_condition (code);
19021 negate = true;
19022 break;
19024 case GE:
19025 case GEU:
19026 code = reverse_condition (code);
19027 negate = true;
19028 /* FALLTHRU */
19030 case LT:
19031 case LTU:
19032 code = swap_condition (code);
19033 x = cop0, cop0 = cop1, cop1 = x;
19034 break;
19036 default:
19037 gcc_unreachable ();
19040 /* Only SSE4.1/SSE4.2 supports V2DImode. */
19041 if (mode == V2DImode)
19043 switch (code)
19045 case EQ:
19046 /* SSE4.1 supports EQ. */
19047 if (!TARGET_SSE4_1)
19048 return false;
19049 break;
19051 case GT:
19052 case GTU:
19053 /* SSE4.2 supports GT/GTU. */
19054 if (!TARGET_SSE4_2)
19055 return false;
19056 break;
19058 default:
19059 gcc_unreachable ();
19063 /* Unsigned parallel compare is not supported by the hardware.
19064 Play some tricks to turn this into a signed comparison
19065 against 0. */
19066 if (code == GTU)
19068 cop0 = force_reg (mode, cop0);
19070 switch (mode)
19072 case V4SImode:
19073 case V2DImode:
19075 rtx t1, t2, mask;
19076 rtx (*gen_sub3) (rtx, rtx, rtx);
19078 /* Subtract (-(INT MAX) - 1) from both operands to make
19079 them signed. */
19080 mask = ix86_build_signbit_mask (mode, true, false);
19081 gen_sub3 = (mode == V4SImode
19082 ? gen_subv4si3 : gen_subv2di3);
19083 t1 = gen_reg_rtx (mode);
19084 emit_insn (gen_sub3 (t1, cop0, mask));
19086 t2 = gen_reg_rtx (mode);
19087 emit_insn (gen_sub3 (t2, cop1, mask));
19089 cop0 = t1;
19090 cop1 = t2;
19091 code = GT;
19093 break;
19095 case V16QImode:
19096 case V8HImode:
19097 /* Perform a parallel unsigned saturating subtraction. */
19098 x = gen_reg_rtx (mode);
19099 emit_insn (gen_rtx_SET (VOIDmode, x,
19100 gen_rtx_US_MINUS (mode, cop0, cop1)));
19102 cop0 = x;
19103 cop1 = CONST0_RTX (mode);
19104 code = EQ;
19105 negate = !negate;
19106 break;
19108 default:
19109 gcc_unreachable ();
19114 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
19115 operands[1+negate], operands[2-negate]);
19117 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
19118 operands[2-negate]);
19119 return true;
19122 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
19123 true if we should do zero extension, else sign extension. HIGH_P is
19124 true if we want the N/2 high elements, else the low elements. */
19126 void
19127 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
19129 enum machine_mode imode = GET_MODE (operands[1]);
19130 rtx tmp, dest;
19132 if (TARGET_SSE4_1)
19134 rtx (*unpack)(rtx, rtx);
19136 switch (imode)
19138 case V16QImode:
19139 if (unsigned_p)
19140 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
19141 else
19142 unpack = gen_sse4_1_sign_extendv8qiv8hi2;
19143 break;
19144 case V8HImode:
19145 if (unsigned_p)
19146 unpack = gen_sse4_1_zero_extendv4hiv4si2;
19147 else
19148 unpack = gen_sse4_1_sign_extendv4hiv4si2;
19149 break;
19150 case V4SImode:
19151 if (unsigned_p)
19152 unpack = gen_sse4_1_zero_extendv2siv2di2;
19153 else
19154 unpack = gen_sse4_1_sign_extendv2siv2di2;
19155 break;
19156 default:
19157 gcc_unreachable ();
19160 if (high_p)
19162 /* Shift higher 8 bytes to lower 8 bytes. */
19163 tmp = gen_reg_rtx (imode);
19164 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, tmp),
19165 gen_lowpart (V1TImode, operands[1]),
19166 GEN_INT (64)));
19168 else
19169 tmp = operands[1];
19171 emit_insn (unpack (operands[0], tmp));
19173 else
19175 rtx (*unpack)(rtx, rtx, rtx);
19177 switch (imode)
19179 case V16QImode:
19180 if (high_p)
19181 unpack = gen_vec_interleave_highv16qi;
19182 else
19183 unpack = gen_vec_interleave_lowv16qi;
19184 break;
19185 case V8HImode:
19186 if (high_p)
19187 unpack = gen_vec_interleave_highv8hi;
19188 else
19189 unpack = gen_vec_interleave_lowv8hi;
19190 break;
19191 case V4SImode:
19192 if (high_p)
19193 unpack = gen_vec_interleave_highv4si;
19194 else
19195 unpack = gen_vec_interleave_lowv4si;
19196 break;
19197 default:
19198 gcc_unreachable ();
19201 dest = gen_lowpart (imode, operands[0]);
19203 if (unsigned_p)
19204 tmp = force_reg (imode, CONST0_RTX (imode));
19205 else
19206 tmp = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
19207 operands[1], pc_rtx, pc_rtx);
19209 emit_insn (unpack (dest, operands[1], tmp));
19213 /* Expand conditional increment or decrement using adb/sbb instructions.
19214 The default case using setcc followed by the conditional move can be
19215 done by generic code. */
19216 bool
19217 ix86_expand_int_addcc (rtx operands[])
19219 enum rtx_code code = GET_CODE (operands[1]);
19220 rtx flags;
19221 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
19222 rtx compare_op;
19223 rtx val = const0_rtx;
19224 bool fpcmp = false;
19225 enum machine_mode mode;
19226 rtx op0 = XEXP (operands[1], 0);
19227 rtx op1 = XEXP (operands[1], 1);
19229 if (operands[3] != const1_rtx
19230 && operands[3] != constm1_rtx)
19231 return false;
19232 if (!ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
19233 return false;
19234 code = GET_CODE (compare_op);
19236 flags = XEXP (compare_op, 0);
19238 if (GET_MODE (flags) == CCFPmode
19239 || GET_MODE (flags) == CCFPUmode)
19241 fpcmp = true;
19242 code = ix86_fp_compare_code_to_integer (code);
19245 if (code != LTU)
19247 val = constm1_rtx;
19248 if (fpcmp)
19249 PUT_CODE (compare_op,
19250 reverse_condition_maybe_unordered
19251 (GET_CODE (compare_op)));
19252 else
19253 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
19256 mode = GET_MODE (operands[0]);
19258 /* Construct either adc or sbb insn. */
19259 if ((code == LTU) == (operands[3] == constm1_rtx))
19261 switch (mode)
19263 case QImode:
19264 insn = gen_subqi3_carry;
19265 break;
19266 case HImode:
19267 insn = gen_subhi3_carry;
19268 break;
19269 case SImode:
19270 insn = gen_subsi3_carry;
19271 break;
19272 case DImode:
19273 insn = gen_subdi3_carry;
19274 break;
19275 default:
19276 gcc_unreachable ();
19279 else
19281 switch (mode)
19283 case QImode:
19284 insn = gen_addqi3_carry;
19285 break;
19286 case HImode:
19287 insn = gen_addhi3_carry;
19288 break;
19289 case SImode:
19290 insn = gen_addsi3_carry;
19291 break;
19292 case DImode:
19293 insn = gen_adddi3_carry;
19294 break;
19295 default:
19296 gcc_unreachable ();
19299 emit_insn (insn (operands[0], operands[2], val, flags, compare_op));
19301 return true;
19305 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
19306 but works for floating pointer parameters and nonoffsetable memories.
19307 For pushes, it returns just stack offsets; the values will be saved
19308 in the right order. Maximally three parts are generated. */
19310 static int
19311 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
19313 int size;
19315 if (!TARGET_64BIT)
19316 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
19317 else
19318 size = (GET_MODE_SIZE (mode) + 4) / 8;
19320 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
19321 gcc_assert (size >= 2 && size <= 4);
19323 /* Optimize constant pool reference to immediates. This is used by fp
19324 moves, that force all constants to memory to allow combining. */
19325 if (MEM_P (operand) && MEM_READONLY_P (operand))
19327 rtx tmp = maybe_get_pool_constant (operand);
19328 if (tmp)
19329 operand = tmp;
19332 if (MEM_P (operand) && !offsettable_memref_p (operand))
19334 /* The only non-offsetable memories we handle are pushes. */
19335 int ok = push_operand (operand, VOIDmode);
19337 gcc_assert (ok);
19339 operand = copy_rtx (operand);
19340 PUT_MODE (operand, Pmode);
19341 parts[0] = parts[1] = parts[2] = parts[3] = operand;
19342 return size;
19345 if (GET_CODE (operand) == CONST_VECTOR)
19347 enum machine_mode imode = int_mode_for_mode (mode);
19348 /* Caution: if we looked through a constant pool memory above,
19349 the operand may actually have a different mode now. That's
19350 ok, since we want to pun this all the way back to an integer. */
19351 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
19352 gcc_assert (operand != NULL);
19353 mode = imode;
19356 if (!TARGET_64BIT)
19358 if (mode == DImode)
19359 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
19360 else
19362 int i;
19364 if (REG_P (operand))
19366 gcc_assert (reload_completed);
19367 for (i = 0; i < size; i++)
19368 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
19370 else if (offsettable_memref_p (operand))
19372 operand = adjust_address (operand, SImode, 0);
19373 parts[0] = operand;
19374 for (i = 1; i < size; i++)
19375 parts[i] = adjust_address (operand, SImode, 4 * i);
19377 else if (GET_CODE (operand) == CONST_DOUBLE)
19379 REAL_VALUE_TYPE r;
19380 long l[4];
19382 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
19383 switch (mode)
19385 case TFmode:
19386 real_to_target (l, &r, mode);
19387 parts[3] = gen_int_mode (l[3], SImode);
19388 parts[2] = gen_int_mode (l[2], SImode);
19389 break;
19390 case XFmode:
19391 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
19392 parts[2] = gen_int_mode (l[2], SImode);
19393 break;
19394 case DFmode:
19395 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
19396 break;
19397 default:
19398 gcc_unreachable ();
19400 parts[1] = gen_int_mode (l[1], SImode);
19401 parts[0] = gen_int_mode (l[0], SImode);
19403 else
19404 gcc_unreachable ();
19407 else
19409 if (mode == TImode)
19410 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
19411 if (mode == XFmode || mode == TFmode)
19413 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
19414 if (REG_P (operand))
19416 gcc_assert (reload_completed);
19417 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
19418 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
19420 else if (offsettable_memref_p (operand))
19422 operand = adjust_address (operand, DImode, 0);
19423 parts[0] = operand;
19424 parts[1] = adjust_address (operand, upper_mode, 8);
19426 else if (GET_CODE (operand) == CONST_DOUBLE)
19428 REAL_VALUE_TYPE r;
19429 long l[4];
19431 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
19432 real_to_target (l, &r, mode);
19434 /* Do not use shift by 32 to avoid warning on 32bit systems. */
19435 if (HOST_BITS_PER_WIDE_INT >= 64)
19436 parts[0]
19437 = gen_int_mode
19438 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
19439 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
19440 DImode);
19441 else
19442 parts[0] = immed_double_const (l[0], l[1], DImode);
19444 if (upper_mode == SImode)
19445 parts[1] = gen_int_mode (l[2], SImode);
19446 else if (HOST_BITS_PER_WIDE_INT >= 64)
19447 parts[1]
19448 = gen_int_mode
19449 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
19450 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
19451 DImode);
19452 else
19453 parts[1] = immed_double_const (l[2], l[3], DImode);
19455 else
19456 gcc_unreachable ();
19460 return size;
19463 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
19464 Return false when normal moves are needed; true when all required
19465 insns have been emitted. Operands 2-4 contain the input values
19466 int the correct order; operands 5-7 contain the output values. */
19468 void
19469 ix86_split_long_move (rtx operands[])
19471 rtx part[2][4];
19472 int nparts, i, j;
19473 int push = 0;
19474 int collisions = 0;
19475 enum machine_mode mode = GET_MODE (operands[0]);
19476 bool collisionparts[4];
19478 /* The DFmode expanders may ask us to move double.
19479 For 64bit target this is single move. By hiding the fact
19480 here we simplify i386.md splitters. */
19481 if (TARGET_64BIT && GET_MODE_SIZE (GET_MODE (operands[0])) == 8)
19483 /* Optimize constant pool reference to immediates. This is used by
19484 fp moves, that force all constants to memory to allow combining. */
19486 if (MEM_P (operands[1])
19487 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
19488 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
19489 operands[1] = get_pool_constant (XEXP (operands[1], 0));
19490 if (push_operand (operands[0], VOIDmode))
19492 operands[0] = copy_rtx (operands[0]);
19493 PUT_MODE (operands[0], Pmode);
19495 else
19496 operands[0] = gen_lowpart (DImode, operands[0]);
19497 operands[1] = gen_lowpart (DImode, operands[1]);
19498 emit_move_insn (operands[0], operands[1]);
19499 return;
19502 /* The only non-offsettable memory we handle is push. */
19503 if (push_operand (operands[0], VOIDmode))
19504 push = 1;
19505 else
19506 gcc_assert (!MEM_P (operands[0])
19507 || offsettable_memref_p (operands[0]));
19509 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
19510 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
19512 /* When emitting push, take care for source operands on the stack. */
19513 if (push && MEM_P (operands[1])
19514 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
19516 rtx src_base = XEXP (part[1][nparts - 1], 0);
19518 /* Compensate for the stack decrement by 4. */
19519 if (!TARGET_64BIT && nparts == 3
19520 && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
19521 src_base = plus_constant (src_base, 4);
19523 /* src_base refers to the stack pointer and is
19524 automatically decreased by emitted push. */
19525 for (i = 0; i < nparts; i++)
19526 part[1][i] = change_address (part[1][i],
19527 GET_MODE (part[1][i]), src_base);
19530 /* We need to do copy in the right order in case an address register
19531 of the source overlaps the destination. */
19532 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
19534 rtx tmp;
19536 for (i = 0; i < nparts; i++)
19538 collisionparts[i]
19539 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
19540 if (collisionparts[i])
19541 collisions++;
19544 /* Collision in the middle part can be handled by reordering. */
19545 if (collisions == 1 && nparts == 3 && collisionparts [1])
19547 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
19548 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
19550 else if (collisions == 1
19551 && nparts == 4
19552 && (collisionparts [1] || collisionparts [2]))
19554 if (collisionparts [1])
19556 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
19557 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
19559 else
19561 tmp = part[0][2]; part[0][2] = part[0][3]; part[0][3] = tmp;
19562 tmp = part[1][2]; part[1][2] = part[1][3]; part[1][3] = tmp;
19566 /* If there are more collisions, we can't handle it by reordering.
19567 Do an lea to the last part and use only one colliding move. */
19568 else if (collisions > 1)
19570 rtx base;
19572 collisions = 1;
19574 base = part[0][nparts - 1];
19576 /* Handle the case when the last part isn't valid for lea.
19577 Happens in 64-bit mode storing the 12-byte XFmode. */
19578 if (GET_MODE (base) != Pmode)
19579 base = gen_rtx_REG (Pmode, REGNO (base));
19581 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
19582 part[1][0] = replace_equiv_address (part[1][0], base);
19583 for (i = 1; i < nparts; i++)
19585 tmp = plus_constant (base, UNITS_PER_WORD * i);
19586 part[1][i] = replace_equiv_address (part[1][i], tmp);
19591 if (push)
19593 if (!TARGET_64BIT)
19595 if (nparts == 3)
19597 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
19598 emit_insn (gen_addsi3 (stack_pointer_rtx,
19599 stack_pointer_rtx, GEN_INT (-4)));
19600 emit_move_insn (part[0][2], part[1][2]);
19602 else if (nparts == 4)
19604 emit_move_insn (part[0][3], part[1][3]);
19605 emit_move_insn (part[0][2], part[1][2]);
19608 else
19610 /* In 64bit mode we don't have 32bit push available. In case this is
19611 register, it is OK - we will just use larger counterpart. We also
19612 retype memory - these comes from attempt to avoid REX prefix on
19613 moving of second half of TFmode value. */
19614 if (GET_MODE (part[1][1]) == SImode)
19616 switch (GET_CODE (part[1][1]))
19618 case MEM:
19619 part[1][1] = adjust_address (part[1][1], DImode, 0);
19620 break;
19622 case REG:
19623 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
19624 break;
19626 default:
19627 gcc_unreachable ();
19630 if (GET_MODE (part[1][0]) == SImode)
19631 part[1][0] = part[1][1];
19634 emit_move_insn (part[0][1], part[1][1]);
19635 emit_move_insn (part[0][0], part[1][0]);
19636 return;
19639 /* Choose correct order to not overwrite the source before it is copied. */
19640 if ((REG_P (part[0][0])
19641 && REG_P (part[1][1])
19642 && (REGNO (part[0][0]) == REGNO (part[1][1])
19643 || (nparts == 3
19644 && REGNO (part[0][0]) == REGNO (part[1][2]))
19645 || (nparts == 4
19646 && REGNO (part[0][0]) == REGNO (part[1][3]))))
19647 || (collisions > 0
19648 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
19650 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
19652 operands[2 + i] = part[0][j];
19653 operands[6 + i] = part[1][j];
19656 else
19658 for (i = 0; i < nparts; i++)
19660 operands[2 + i] = part[0][i];
19661 operands[6 + i] = part[1][i];
19665 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
19666 if (optimize_insn_for_size_p ())
19668 for (j = 0; j < nparts - 1; j++)
19669 if (CONST_INT_P (operands[6 + j])
19670 && operands[6 + j] != const0_rtx
19671 && REG_P (operands[2 + j]))
19672 for (i = j; i < nparts - 1; i++)
19673 if (CONST_INT_P (operands[7 + i])
19674 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
19675 operands[7 + i] = operands[2 + j];
19678 for (i = 0; i < nparts; i++)
19679 emit_move_insn (operands[2 + i], operands[6 + i]);
19681 return;
19684 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
19685 left shift by a constant, either using a single shift or
19686 a sequence of add instructions. */
19688 static void
19689 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
19691 rtx (*insn)(rtx, rtx, rtx);
19693 if (count == 1
19694 || (count * ix86_cost->add <= ix86_cost->shift_const
19695 && !optimize_insn_for_size_p ()))
19697 insn = mode == DImode ? gen_addsi3 : gen_adddi3;
19698 while (count-- > 0)
19699 emit_insn (insn (operand, operand, operand));
19701 else
19703 insn = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
19704 emit_insn (insn (operand, operand, GEN_INT (count)));
19708 void
19709 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
19711 rtx (*gen_ashl3)(rtx, rtx, rtx);
19712 rtx (*gen_shld)(rtx, rtx, rtx);
19713 int half_width = GET_MODE_BITSIZE (mode) >> 1;
19715 rtx low[2], high[2];
19716 int count;
19718 if (CONST_INT_P (operands[2]))
19720 split_double_mode (mode, operands, 2, low, high);
19721 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
19723 if (count >= half_width)
19725 emit_move_insn (high[0], low[1]);
19726 emit_move_insn (low[0], const0_rtx);
19728 if (count > half_width)
19729 ix86_expand_ashl_const (high[0], count - half_width, mode);
19731 else
19733 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
19735 if (!rtx_equal_p (operands[0], operands[1]))
19736 emit_move_insn (operands[0], operands[1]);
19738 emit_insn (gen_shld (high[0], low[0], GEN_INT (count)));
19739 ix86_expand_ashl_const (low[0], count, mode);
19741 return;
19744 split_double_mode (mode, operands, 1, low, high);
19746 gen_ashl3 = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
19748 if (operands[1] == const1_rtx)
19750 /* Assuming we've chosen a QImode capable registers, then 1 << N
19751 can be done with two 32/64-bit shifts, no branches, no cmoves. */
19752 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
19754 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
19756 ix86_expand_clear (low[0]);
19757 ix86_expand_clear (high[0]);
19758 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (half_width)));
19760 d = gen_lowpart (QImode, low[0]);
19761 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
19762 s = gen_rtx_EQ (QImode, flags, const0_rtx);
19763 emit_insn (gen_rtx_SET (VOIDmode, d, s));
19765 d = gen_lowpart (QImode, high[0]);
19766 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
19767 s = gen_rtx_NE (QImode, flags, const0_rtx);
19768 emit_insn (gen_rtx_SET (VOIDmode, d, s));
19771 /* Otherwise, we can get the same results by manually performing
19772 a bit extract operation on bit 5/6, and then performing the two
19773 shifts. The two methods of getting 0/1 into low/high are exactly
19774 the same size. Avoiding the shift in the bit extract case helps
19775 pentium4 a bit; no one else seems to care much either way. */
19776 else
19778 enum machine_mode half_mode;
19779 rtx (*gen_lshr3)(rtx, rtx, rtx);
19780 rtx (*gen_and3)(rtx, rtx, rtx);
19781 rtx (*gen_xor3)(rtx, rtx, rtx);
19782 HOST_WIDE_INT bits;
19783 rtx x;
19785 if (mode == DImode)
19787 half_mode = SImode;
19788 gen_lshr3 = gen_lshrsi3;
19789 gen_and3 = gen_andsi3;
19790 gen_xor3 = gen_xorsi3;
19791 bits = 5;
19793 else
19795 half_mode = DImode;
19796 gen_lshr3 = gen_lshrdi3;
19797 gen_and3 = gen_anddi3;
19798 gen_xor3 = gen_xordi3;
19799 bits = 6;
19802 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
19803 x = gen_rtx_ZERO_EXTEND (half_mode, operands[2]);
19804 else
19805 x = gen_lowpart (half_mode, operands[2]);
19806 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
19808 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (bits)));
19809 emit_insn (gen_and3 (high[0], high[0], const1_rtx));
19810 emit_move_insn (low[0], high[0]);
19811 emit_insn (gen_xor3 (low[0], low[0], const1_rtx));
19814 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
19815 emit_insn (gen_ashl3 (high[0], high[0], operands[2]));
19816 return;
19819 if (operands[1] == constm1_rtx)
19821 /* For -1 << N, we can avoid the shld instruction, because we
19822 know that we're shifting 0...31/63 ones into a -1. */
19823 emit_move_insn (low[0], constm1_rtx);
19824 if (optimize_insn_for_size_p ())
19825 emit_move_insn (high[0], low[0]);
19826 else
19827 emit_move_insn (high[0], constm1_rtx);
19829 else
19831 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
19833 if (!rtx_equal_p (operands[0], operands[1]))
19834 emit_move_insn (operands[0], operands[1]);
19836 split_double_mode (mode, operands, 1, low, high);
19837 emit_insn (gen_shld (high[0], low[0], operands[2]));
19840 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
19842 if (TARGET_CMOVE && scratch)
19844 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
19845 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
19847 ix86_expand_clear (scratch);
19848 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
19850 else
19852 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
19853 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
19855 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
19859 void
19860 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
19862 rtx (*gen_ashr3)(rtx, rtx, rtx)
19863 = mode == DImode ? gen_ashrsi3 : gen_ashrdi3;
19864 rtx (*gen_shrd)(rtx, rtx, rtx);
19865 int half_width = GET_MODE_BITSIZE (mode) >> 1;
19867 rtx low[2], high[2];
19868 int count;
19870 if (CONST_INT_P (operands[2]))
19872 split_double_mode (mode, operands, 2, low, high);
19873 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
19875 if (count == GET_MODE_BITSIZE (mode) - 1)
19877 emit_move_insn (high[0], high[1]);
19878 emit_insn (gen_ashr3 (high[0], high[0],
19879 GEN_INT (half_width - 1)));
19880 emit_move_insn (low[0], high[0]);
19883 else if (count >= half_width)
19885 emit_move_insn (low[0], high[1]);
19886 emit_move_insn (high[0], low[0]);
19887 emit_insn (gen_ashr3 (high[0], high[0],
19888 GEN_INT (half_width - 1)));
19890 if (count > half_width)
19891 emit_insn (gen_ashr3 (low[0], low[0],
19892 GEN_INT (count - half_width)));
19894 else
19896 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
19898 if (!rtx_equal_p (operands[0], operands[1]))
19899 emit_move_insn (operands[0], operands[1]);
19901 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
19902 emit_insn (gen_ashr3 (high[0], high[0], GEN_INT (count)));
19905 else
19907 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
19909 if (!rtx_equal_p (operands[0], operands[1]))
19910 emit_move_insn (operands[0], operands[1]);
19912 split_double_mode (mode, operands, 1, low, high);
19914 emit_insn (gen_shrd (low[0], high[0], operands[2]));
19915 emit_insn (gen_ashr3 (high[0], high[0], operands[2]));
19917 if (TARGET_CMOVE && scratch)
19919 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
19920 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
19922 emit_move_insn (scratch, high[0]);
19923 emit_insn (gen_ashr3 (scratch, scratch,
19924 GEN_INT (half_width - 1)));
19925 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
19926 scratch));
19928 else
19930 rtx (*gen_x86_shift_adj_3)(rtx, rtx, rtx)
19931 = mode == DImode ? gen_x86_shiftsi_adj_3 : gen_x86_shiftdi_adj_3;
19933 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
19938 void
19939 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
19941 rtx (*gen_lshr3)(rtx, rtx, rtx)
19942 = mode == DImode ? gen_lshrsi3 : gen_lshrdi3;
19943 rtx (*gen_shrd)(rtx, rtx, rtx);
19944 int half_width = GET_MODE_BITSIZE (mode) >> 1;
19946 rtx low[2], high[2];
19947 int count;
19949 if (CONST_INT_P (operands[2]))
19951 split_double_mode (mode, operands, 2, low, high);
19952 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
19954 if (count >= half_width)
19956 emit_move_insn (low[0], high[1]);
19957 ix86_expand_clear (high[0]);
19959 if (count > half_width)
19960 emit_insn (gen_lshr3 (low[0], low[0],
19961 GEN_INT (count - half_width)));
19963 else
19965 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
19967 if (!rtx_equal_p (operands[0], operands[1]))
19968 emit_move_insn (operands[0], operands[1]);
19970 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
19971 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (count)));
19974 else
19976 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
19978 if (!rtx_equal_p (operands[0], operands[1]))
19979 emit_move_insn (operands[0], operands[1]);
19981 split_double_mode (mode, operands, 1, low, high);
19983 emit_insn (gen_shrd (low[0], high[0], operands[2]));
19984 emit_insn (gen_lshr3 (high[0], high[0], operands[2]));
19986 if (TARGET_CMOVE && scratch)
19988 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
19989 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
19991 ix86_expand_clear (scratch);
19992 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
19993 scratch));
19995 else
19997 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
19998 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
20000 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
20005 /* Predict just emitted jump instruction to be taken with probability PROB. */
20006 static void
20007 predict_jump (int prob)
20009 rtx insn = get_last_insn ();
20010 gcc_assert (JUMP_P (insn));
20011 add_reg_note (insn, REG_BR_PROB, GEN_INT (prob));
20014 /* Helper function for the string operations below. Dest VARIABLE whether
20015 it is aligned to VALUE bytes. If true, jump to the label. */
20016 static rtx
20017 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
20019 rtx label = gen_label_rtx ();
20020 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
20021 if (GET_MODE (variable) == DImode)
20022 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
20023 else
20024 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
20025 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
20026 1, label);
20027 if (epilogue)
20028 predict_jump (REG_BR_PROB_BASE * 50 / 100);
20029 else
20030 predict_jump (REG_BR_PROB_BASE * 90 / 100);
20031 return label;
20034 /* Adjust COUNTER by the VALUE. */
20035 static void
20036 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
20038 rtx (*gen_add)(rtx, rtx, rtx)
20039 = GET_MODE (countreg) == DImode ? gen_adddi3 : gen_addsi3;
20041 emit_insn (gen_add (countreg, countreg, GEN_INT (-value)));
20044 /* Zero extend possibly SImode EXP to Pmode register. */
20046 ix86_zero_extend_to_Pmode (rtx exp)
20048 rtx r;
20049 if (GET_MODE (exp) == VOIDmode)
20050 return force_reg (Pmode, exp);
20051 if (GET_MODE (exp) == Pmode)
20052 return copy_to_mode_reg (Pmode, exp);
20053 r = gen_reg_rtx (Pmode);
20054 emit_insn (gen_zero_extendsidi2 (r, exp));
20055 return r;
20058 /* Divide COUNTREG by SCALE. */
20059 static rtx
20060 scale_counter (rtx countreg, int scale)
20062 rtx sc;
20064 if (scale == 1)
20065 return countreg;
20066 if (CONST_INT_P (countreg))
20067 return GEN_INT (INTVAL (countreg) / scale);
20068 gcc_assert (REG_P (countreg));
20070 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
20071 GEN_INT (exact_log2 (scale)),
20072 NULL, 1, OPTAB_DIRECT);
20073 return sc;
20076 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
20077 DImode for constant loop counts. */
20079 static enum machine_mode
20080 counter_mode (rtx count_exp)
20082 if (GET_MODE (count_exp) != VOIDmode)
20083 return GET_MODE (count_exp);
20084 if (!CONST_INT_P (count_exp))
20085 return Pmode;
20086 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
20087 return DImode;
20088 return SImode;
20091 /* When SRCPTR is non-NULL, output simple loop to move memory
20092 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
20093 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
20094 equivalent loop to set memory by VALUE (supposed to be in MODE).
20096 The size is rounded down to whole number of chunk size moved at once.
20097 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
20100 static void
20101 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
20102 rtx destptr, rtx srcptr, rtx value,
20103 rtx count, enum machine_mode mode, int unroll,
20104 int expected_size)
20106 rtx out_label, top_label, iter, tmp;
20107 enum machine_mode iter_mode = counter_mode (count);
20108 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
20109 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
20110 rtx size;
20111 rtx x_addr;
20112 rtx y_addr;
20113 int i;
20115 top_label = gen_label_rtx ();
20116 out_label = gen_label_rtx ();
20117 iter = gen_reg_rtx (iter_mode);
20119 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
20120 NULL, 1, OPTAB_DIRECT);
20121 /* Those two should combine. */
20122 if (piece_size == const1_rtx)
20124 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
20125 true, out_label);
20126 predict_jump (REG_BR_PROB_BASE * 10 / 100);
20128 emit_move_insn (iter, const0_rtx);
20130 emit_label (top_label);
20132 tmp = convert_modes (Pmode, iter_mode, iter, true);
20133 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
20134 destmem = change_address (destmem, mode, x_addr);
20136 if (srcmem)
20138 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
20139 srcmem = change_address (srcmem, mode, y_addr);
20141 /* When unrolling for chips that reorder memory reads and writes,
20142 we can save registers by using single temporary.
20143 Also using 4 temporaries is overkill in 32bit mode. */
20144 if (!TARGET_64BIT && 0)
20146 for (i = 0; i < unroll; i++)
20148 if (i)
20150 destmem =
20151 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
20152 srcmem =
20153 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
20155 emit_move_insn (destmem, srcmem);
20158 else
20160 rtx tmpreg[4];
20161 gcc_assert (unroll <= 4);
20162 for (i = 0; i < unroll; i++)
20164 tmpreg[i] = gen_reg_rtx (mode);
20165 if (i)
20167 srcmem =
20168 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
20170 emit_move_insn (tmpreg[i], srcmem);
20172 for (i = 0; i < unroll; i++)
20174 if (i)
20176 destmem =
20177 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
20179 emit_move_insn (destmem, tmpreg[i]);
20183 else
20184 for (i = 0; i < unroll; i++)
20186 if (i)
20187 destmem =
20188 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
20189 emit_move_insn (destmem, value);
20192 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
20193 true, OPTAB_LIB_WIDEN);
20194 if (tmp != iter)
20195 emit_move_insn (iter, tmp);
20197 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
20198 true, top_label);
20199 if (expected_size != -1)
20201 expected_size /= GET_MODE_SIZE (mode) * unroll;
20202 if (expected_size == 0)
20203 predict_jump (0);
20204 else if (expected_size > REG_BR_PROB_BASE)
20205 predict_jump (REG_BR_PROB_BASE - 1);
20206 else
20207 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
20209 else
20210 predict_jump (REG_BR_PROB_BASE * 80 / 100);
20211 iter = ix86_zero_extend_to_Pmode (iter);
20212 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
20213 true, OPTAB_LIB_WIDEN);
20214 if (tmp != destptr)
20215 emit_move_insn (destptr, tmp);
20216 if (srcptr)
20218 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
20219 true, OPTAB_LIB_WIDEN);
20220 if (tmp != srcptr)
20221 emit_move_insn (srcptr, tmp);
20223 emit_label (out_label);
20226 /* Output "rep; mov" instruction.
20227 Arguments have same meaning as for previous function */
20228 static void
20229 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
20230 rtx destptr, rtx srcptr,
20231 rtx count,
20232 enum machine_mode mode)
20234 rtx destexp;
20235 rtx srcexp;
20236 rtx countreg;
20238 /* If the size is known, it is shorter to use rep movs. */
20239 if (mode == QImode && CONST_INT_P (count)
20240 && !(INTVAL (count) & 3))
20241 mode = SImode;
20243 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
20244 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
20245 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
20246 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
20247 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
20248 if (mode != QImode)
20250 destexp = gen_rtx_ASHIFT (Pmode, countreg,
20251 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
20252 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
20253 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
20254 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
20255 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
20257 else
20259 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
20260 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
20262 if (CONST_INT_P (count))
20264 count = GEN_INT (INTVAL (count)
20265 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
20266 destmem = shallow_copy_rtx (destmem);
20267 srcmem = shallow_copy_rtx (srcmem);
20268 set_mem_size (destmem, count);
20269 set_mem_size (srcmem, count);
20271 else
20273 if (MEM_SIZE (destmem))
20274 set_mem_size (destmem, NULL_RTX);
20275 if (MEM_SIZE (srcmem))
20276 set_mem_size (srcmem, NULL_RTX);
20278 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
20279 destexp, srcexp));
20282 /* Output "rep; stos" instruction.
20283 Arguments have same meaning as for previous function */
20284 static void
20285 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
20286 rtx count, enum machine_mode mode,
20287 rtx orig_value)
20289 rtx destexp;
20290 rtx countreg;
20292 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
20293 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
20294 value = force_reg (mode, gen_lowpart (mode, value));
20295 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
20296 if (mode != QImode)
20298 destexp = gen_rtx_ASHIFT (Pmode, countreg,
20299 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
20300 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
20302 else
20303 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
20304 if (orig_value == const0_rtx && CONST_INT_P (count))
20306 count = GEN_INT (INTVAL (count)
20307 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
20308 destmem = shallow_copy_rtx (destmem);
20309 set_mem_size (destmem, count);
20311 else if (MEM_SIZE (destmem))
20312 set_mem_size (destmem, NULL_RTX);
20313 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
20316 static void
20317 emit_strmov (rtx destmem, rtx srcmem,
20318 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
20320 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
20321 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
20322 emit_insn (gen_strmov (destptr, dest, srcptr, src));
20325 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
20326 static void
20327 expand_movmem_epilogue (rtx destmem, rtx srcmem,
20328 rtx destptr, rtx srcptr, rtx count, int max_size)
20330 rtx src, dest;
20331 if (CONST_INT_P (count))
20333 HOST_WIDE_INT countval = INTVAL (count);
20334 int offset = 0;
20336 if ((countval & 0x10) && max_size > 16)
20338 if (TARGET_64BIT)
20340 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
20341 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
20343 else
20344 gcc_unreachable ();
20345 offset += 16;
20347 if ((countval & 0x08) && max_size > 8)
20349 if (TARGET_64BIT)
20350 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
20351 else
20353 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
20354 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
20356 offset += 8;
20358 if ((countval & 0x04) && max_size > 4)
20360 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
20361 offset += 4;
20363 if ((countval & 0x02) && max_size > 2)
20365 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
20366 offset += 2;
20368 if ((countval & 0x01) && max_size > 1)
20370 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
20371 offset += 1;
20373 return;
20375 if (max_size > 8)
20377 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
20378 count, 1, OPTAB_DIRECT);
20379 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
20380 count, QImode, 1, 4);
20381 return;
20384 /* When there are stringops, we can cheaply increase dest and src pointers.
20385 Otherwise we save code size by maintaining offset (zero is readily
20386 available from preceding rep operation) and using x86 addressing modes.
20388 if (TARGET_SINGLE_STRINGOP)
20390 if (max_size > 4)
20392 rtx label = ix86_expand_aligntest (count, 4, true);
20393 src = change_address (srcmem, SImode, srcptr);
20394 dest = change_address (destmem, SImode, destptr);
20395 emit_insn (gen_strmov (destptr, dest, srcptr, src));
20396 emit_label (label);
20397 LABEL_NUSES (label) = 1;
20399 if (max_size > 2)
20401 rtx label = ix86_expand_aligntest (count, 2, true);
20402 src = change_address (srcmem, HImode, srcptr);
20403 dest = change_address (destmem, HImode, destptr);
20404 emit_insn (gen_strmov (destptr, dest, srcptr, src));
20405 emit_label (label);
20406 LABEL_NUSES (label) = 1;
20408 if (max_size > 1)
20410 rtx label = ix86_expand_aligntest (count, 1, true);
20411 src = change_address (srcmem, QImode, srcptr);
20412 dest = change_address (destmem, QImode, destptr);
20413 emit_insn (gen_strmov (destptr, dest, srcptr, src));
20414 emit_label (label);
20415 LABEL_NUSES (label) = 1;
20418 else
20420 rtx offset = force_reg (Pmode, const0_rtx);
20421 rtx tmp;
20423 if (max_size > 4)
20425 rtx label = ix86_expand_aligntest (count, 4, true);
20426 src = change_address (srcmem, SImode, srcptr);
20427 dest = change_address (destmem, SImode, destptr);
20428 emit_move_insn (dest, src);
20429 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
20430 true, OPTAB_LIB_WIDEN);
20431 if (tmp != offset)
20432 emit_move_insn (offset, tmp);
20433 emit_label (label);
20434 LABEL_NUSES (label) = 1;
20436 if (max_size > 2)
20438 rtx label = ix86_expand_aligntest (count, 2, true);
20439 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
20440 src = change_address (srcmem, HImode, tmp);
20441 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
20442 dest = change_address (destmem, HImode, tmp);
20443 emit_move_insn (dest, src);
20444 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
20445 true, OPTAB_LIB_WIDEN);
20446 if (tmp != offset)
20447 emit_move_insn (offset, tmp);
20448 emit_label (label);
20449 LABEL_NUSES (label) = 1;
20451 if (max_size > 1)
20453 rtx label = ix86_expand_aligntest (count, 1, true);
20454 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
20455 src = change_address (srcmem, QImode, tmp);
20456 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
20457 dest = change_address (destmem, QImode, tmp);
20458 emit_move_insn (dest, src);
20459 emit_label (label);
20460 LABEL_NUSES (label) = 1;
20465 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
20466 static void
20467 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
20468 rtx count, int max_size)
20470 count =
20471 expand_simple_binop (counter_mode (count), AND, count,
20472 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
20473 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
20474 gen_lowpart (QImode, value), count, QImode,
20475 1, max_size / 2);
20478 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
20479 static void
20480 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
20482 rtx dest;
20484 if (CONST_INT_P (count))
20486 HOST_WIDE_INT countval = INTVAL (count);
20487 int offset = 0;
20489 if ((countval & 0x10) && max_size > 16)
20491 if (TARGET_64BIT)
20493 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
20494 emit_insn (gen_strset (destptr, dest, value));
20495 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
20496 emit_insn (gen_strset (destptr, dest, value));
20498 else
20499 gcc_unreachable ();
20500 offset += 16;
20502 if ((countval & 0x08) && max_size > 8)
20504 if (TARGET_64BIT)
20506 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
20507 emit_insn (gen_strset (destptr, dest, value));
20509 else
20511 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
20512 emit_insn (gen_strset (destptr, dest, value));
20513 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
20514 emit_insn (gen_strset (destptr, dest, value));
20516 offset += 8;
20518 if ((countval & 0x04) && max_size > 4)
20520 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
20521 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
20522 offset += 4;
20524 if ((countval & 0x02) && max_size > 2)
20526 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
20527 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
20528 offset += 2;
20530 if ((countval & 0x01) && max_size > 1)
20532 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
20533 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
20534 offset += 1;
20536 return;
20538 if (max_size > 32)
20540 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
20541 return;
20543 if (max_size > 16)
20545 rtx label = ix86_expand_aligntest (count, 16, true);
20546 if (TARGET_64BIT)
20548 dest = change_address (destmem, DImode, destptr);
20549 emit_insn (gen_strset (destptr, dest, value));
20550 emit_insn (gen_strset (destptr, dest, value));
20552 else
20554 dest = change_address (destmem, SImode, destptr);
20555 emit_insn (gen_strset (destptr, dest, value));
20556 emit_insn (gen_strset (destptr, dest, value));
20557 emit_insn (gen_strset (destptr, dest, value));
20558 emit_insn (gen_strset (destptr, dest, value));
20560 emit_label (label);
20561 LABEL_NUSES (label) = 1;
20563 if (max_size > 8)
20565 rtx label = ix86_expand_aligntest (count, 8, true);
20566 if (TARGET_64BIT)
20568 dest = change_address (destmem, DImode, destptr);
20569 emit_insn (gen_strset (destptr, dest, value));
20571 else
20573 dest = change_address (destmem, SImode, destptr);
20574 emit_insn (gen_strset (destptr, dest, value));
20575 emit_insn (gen_strset (destptr, dest, value));
20577 emit_label (label);
20578 LABEL_NUSES (label) = 1;
20580 if (max_size > 4)
20582 rtx label = ix86_expand_aligntest (count, 4, true);
20583 dest = change_address (destmem, SImode, destptr);
20584 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
20585 emit_label (label);
20586 LABEL_NUSES (label) = 1;
20588 if (max_size > 2)
20590 rtx label = ix86_expand_aligntest (count, 2, true);
20591 dest = change_address (destmem, HImode, destptr);
20592 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
20593 emit_label (label);
20594 LABEL_NUSES (label) = 1;
20596 if (max_size > 1)
20598 rtx label = ix86_expand_aligntest (count, 1, true);
20599 dest = change_address (destmem, QImode, destptr);
20600 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
20601 emit_label (label);
20602 LABEL_NUSES (label) = 1;
20606 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
20607 DESIRED_ALIGNMENT. */
20608 static void
20609 expand_movmem_prologue (rtx destmem, rtx srcmem,
20610 rtx destptr, rtx srcptr, rtx count,
20611 int align, int desired_alignment)
20613 if (align <= 1 && desired_alignment > 1)
20615 rtx label = ix86_expand_aligntest (destptr, 1, false);
20616 srcmem = change_address (srcmem, QImode, srcptr);
20617 destmem = change_address (destmem, QImode, destptr);
20618 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
20619 ix86_adjust_counter (count, 1);
20620 emit_label (label);
20621 LABEL_NUSES (label) = 1;
20623 if (align <= 2 && desired_alignment > 2)
20625 rtx label = ix86_expand_aligntest (destptr, 2, false);
20626 srcmem = change_address (srcmem, HImode, srcptr);
20627 destmem = change_address (destmem, HImode, destptr);
20628 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
20629 ix86_adjust_counter (count, 2);
20630 emit_label (label);
20631 LABEL_NUSES (label) = 1;
20633 if (align <= 4 && desired_alignment > 4)
20635 rtx label = ix86_expand_aligntest (destptr, 4, false);
20636 srcmem = change_address (srcmem, SImode, srcptr);
20637 destmem = change_address (destmem, SImode, destptr);
20638 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
20639 ix86_adjust_counter (count, 4);
20640 emit_label (label);
20641 LABEL_NUSES (label) = 1;
20643 gcc_assert (desired_alignment <= 8);
20646 /* Copy enough from DST to SRC to align DST known to DESIRED_ALIGN.
20647 ALIGN_BYTES is how many bytes need to be copied. */
20648 static rtx
20649 expand_constant_movmem_prologue (rtx dst, rtx *srcp, rtx destreg, rtx srcreg,
20650 int desired_align, int align_bytes)
20652 rtx src = *srcp;
20653 rtx src_size, dst_size;
20654 int off = 0;
20655 int src_align_bytes = get_mem_align_offset (src, desired_align * BITS_PER_UNIT);
20656 if (src_align_bytes >= 0)
20657 src_align_bytes = desired_align - src_align_bytes;
20658 src_size = MEM_SIZE (src);
20659 dst_size = MEM_SIZE (dst);
20660 if (align_bytes & 1)
20662 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
20663 src = adjust_automodify_address_nv (src, QImode, srcreg, 0);
20664 off = 1;
20665 emit_insn (gen_strmov (destreg, dst, srcreg, src));
20667 if (align_bytes & 2)
20669 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
20670 src = adjust_automodify_address_nv (src, HImode, srcreg, off);
20671 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
20672 set_mem_align (dst, 2 * BITS_PER_UNIT);
20673 if (src_align_bytes >= 0
20674 && (src_align_bytes & 1) == (align_bytes & 1)
20675 && MEM_ALIGN (src) < 2 * BITS_PER_UNIT)
20676 set_mem_align (src, 2 * BITS_PER_UNIT);
20677 off = 2;
20678 emit_insn (gen_strmov (destreg, dst, srcreg, src));
20680 if (align_bytes & 4)
20682 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
20683 src = adjust_automodify_address_nv (src, SImode, srcreg, off);
20684 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
20685 set_mem_align (dst, 4 * BITS_PER_UNIT);
20686 if (src_align_bytes >= 0)
20688 unsigned int src_align = 0;
20689 if ((src_align_bytes & 3) == (align_bytes & 3))
20690 src_align = 4;
20691 else if ((src_align_bytes & 1) == (align_bytes & 1))
20692 src_align = 2;
20693 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
20694 set_mem_align (src, src_align * BITS_PER_UNIT);
20696 off = 4;
20697 emit_insn (gen_strmov (destreg, dst, srcreg, src));
20699 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
20700 src = adjust_automodify_address_nv (src, BLKmode, srcreg, off);
20701 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
20702 set_mem_align (dst, desired_align * BITS_PER_UNIT);
20703 if (src_align_bytes >= 0)
20705 unsigned int src_align = 0;
20706 if ((src_align_bytes & 7) == (align_bytes & 7))
20707 src_align = 8;
20708 else if ((src_align_bytes & 3) == (align_bytes & 3))
20709 src_align = 4;
20710 else if ((src_align_bytes & 1) == (align_bytes & 1))
20711 src_align = 2;
20712 if (src_align > (unsigned int) desired_align)
20713 src_align = desired_align;
20714 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
20715 set_mem_align (src, src_align * BITS_PER_UNIT);
20717 if (dst_size)
20718 set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
20719 if (src_size)
20720 set_mem_size (dst, GEN_INT (INTVAL (src_size) - align_bytes));
20721 *srcp = src;
20722 return dst;
20725 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
20726 DESIRED_ALIGNMENT. */
20727 static void
20728 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
20729 int align, int desired_alignment)
20731 if (align <= 1 && desired_alignment > 1)
20733 rtx label = ix86_expand_aligntest (destptr, 1, false);
20734 destmem = change_address (destmem, QImode, destptr);
20735 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
20736 ix86_adjust_counter (count, 1);
20737 emit_label (label);
20738 LABEL_NUSES (label) = 1;
20740 if (align <= 2 && desired_alignment > 2)
20742 rtx label = ix86_expand_aligntest (destptr, 2, false);
20743 destmem = change_address (destmem, HImode, destptr);
20744 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
20745 ix86_adjust_counter (count, 2);
20746 emit_label (label);
20747 LABEL_NUSES (label) = 1;
20749 if (align <= 4 && desired_alignment > 4)
20751 rtx label = ix86_expand_aligntest (destptr, 4, false);
20752 destmem = change_address (destmem, SImode, destptr);
20753 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
20754 ix86_adjust_counter (count, 4);
20755 emit_label (label);
20756 LABEL_NUSES (label) = 1;
20758 gcc_assert (desired_alignment <= 8);
20761 /* Set enough from DST to align DST known to by aligned by ALIGN to
20762 DESIRED_ALIGN. ALIGN_BYTES is how many bytes need to be stored. */
20763 static rtx
20764 expand_constant_setmem_prologue (rtx dst, rtx destreg, rtx value,
20765 int desired_align, int align_bytes)
20767 int off = 0;
20768 rtx dst_size = MEM_SIZE (dst);
20769 if (align_bytes & 1)
20771 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
20772 off = 1;
20773 emit_insn (gen_strset (destreg, dst,
20774 gen_lowpart (QImode, value)));
20776 if (align_bytes & 2)
20778 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
20779 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
20780 set_mem_align (dst, 2 * BITS_PER_UNIT);
20781 off = 2;
20782 emit_insn (gen_strset (destreg, dst,
20783 gen_lowpart (HImode, value)));
20785 if (align_bytes & 4)
20787 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
20788 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
20789 set_mem_align (dst, 4 * BITS_PER_UNIT);
20790 off = 4;
20791 emit_insn (gen_strset (destreg, dst,
20792 gen_lowpart (SImode, value)));
20794 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
20795 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
20796 set_mem_align (dst, desired_align * BITS_PER_UNIT);
20797 if (dst_size)
20798 set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
20799 return dst;
20802 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
20803 static enum stringop_alg
20804 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
20805 int *dynamic_check)
20807 const struct stringop_algs * algs;
20808 bool optimize_for_speed;
20809 /* Algorithms using the rep prefix want at least edi and ecx;
20810 additionally, memset wants eax and memcpy wants esi. Don't
20811 consider such algorithms if the user has appropriated those
20812 registers for their own purposes. */
20813 bool rep_prefix_usable = !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
20814 || (memset
20815 ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
20817 #define ALG_USABLE_P(alg) (rep_prefix_usable \
20818 || (alg != rep_prefix_1_byte \
20819 && alg != rep_prefix_4_byte \
20820 && alg != rep_prefix_8_byte))
20821 const struct processor_costs *cost;
20823 /* Even if the string operation call is cold, we still might spend a lot
20824 of time processing large blocks. */
20825 if (optimize_function_for_size_p (cfun)
20826 || (optimize_insn_for_size_p ()
20827 && expected_size != -1 && expected_size < 256))
20828 optimize_for_speed = false;
20829 else
20830 optimize_for_speed = true;
20832 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
20834 *dynamic_check = -1;
20835 if (memset)
20836 algs = &cost->memset[TARGET_64BIT != 0];
20837 else
20838 algs = &cost->memcpy[TARGET_64BIT != 0];
20839 if (ix86_stringop_alg != no_stringop && ALG_USABLE_P (ix86_stringop_alg))
20840 return ix86_stringop_alg;
20841 /* rep; movq or rep; movl is the smallest variant. */
20842 else if (!optimize_for_speed)
20844 if (!count || (count & 3))
20845 return rep_prefix_usable ? rep_prefix_1_byte : loop_1_byte;
20846 else
20847 return rep_prefix_usable ? rep_prefix_4_byte : loop;
20849 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
20851 else if (expected_size != -1 && expected_size < 4)
20852 return loop_1_byte;
20853 else if (expected_size != -1)
20855 unsigned int i;
20856 enum stringop_alg alg = libcall;
20857 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
20859 /* We get here if the algorithms that were not libcall-based
20860 were rep-prefix based and we are unable to use rep prefixes
20861 based on global register usage. Break out of the loop and
20862 use the heuristic below. */
20863 if (algs->size[i].max == 0)
20864 break;
20865 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
20867 enum stringop_alg candidate = algs->size[i].alg;
20869 if (candidate != libcall && ALG_USABLE_P (candidate))
20870 alg = candidate;
20871 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
20872 last non-libcall inline algorithm. */
20873 if (TARGET_INLINE_ALL_STRINGOPS)
20875 /* When the current size is best to be copied by a libcall,
20876 but we are still forced to inline, run the heuristic below
20877 that will pick code for medium sized blocks. */
20878 if (alg != libcall)
20879 return alg;
20880 break;
20882 else if (ALG_USABLE_P (candidate))
20883 return candidate;
20886 gcc_assert (TARGET_INLINE_ALL_STRINGOPS || !rep_prefix_usable);
20888 /* When asked to inline the call anyway, try to pick meaningful choice.
20889 We look for maximal size of block that is faster to copy by hand and
20890 take blocks of at most of that size guessing that average size will
20891 be roughly half of the block.
20893 If this turns out to be bad, we might simply specify the preferred
20894 choice in ix86_costs. */
20895 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
20896 && (algs->unknown_size == libcall || !ALG_USABLE_P (algs->unknown_size)))
20898 int max = -1;
20899 enum stringop_alg alg;
20900 int i;
20901 bool any_alg_usable_p = true;
20903 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
20905 enum stringop_alg candidate = algs->size[i].alg;
20906 any_alg_usable_p = any_alg_usable_p && ALG_USABLE_P (candidate);
20908 if (candidate != libcall && candidate
20909 && ALG_USABLE_P (candidate))
20910 max = algs->size[i].max;
20912 /* If there aren't any usable algorithms, then recursing on
20913 smaller sizes isn't going to find anything. Just return the
20914 simple byte-at-a-time copy loop. */
20915 if (!any_alg_usable_p)
20917 /* Pick something reasonable. */
20918 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
20919 *dynamic_check = 128;
20920 return loop_1_byte;
20922 if (max == -1)
20923 max = 4096;
20924 alg = decide_alg (count, max / 2, memset, dynamic_check);
20925 gcc_assert (*dynamic_check == -1);
20926 gcc_assert (alg != libcall);
20927 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
20928 *dynamic_check = max;
20929 return alg;
20931 return ALG_USABLE_P (algs->unknown_size) ? algs->unknown_size : libcall;
20932 #undef ALG_USABLE_P
20935 /* Decide on alignment. We know that the operand is already aligned to ALIGN
20936 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
20937 static int
20938 decide_alignment (int align,
20939 enum stringop_alg alg,
20940 int expected_size)
20942 int desired_align = 0;
20943 switch (alg)
20945 case no_stringop:
20946 gcc_unreachable ();
20947 case loop:
20948 case unrolled_loop:
20949 desired_align = GET_MODE_SIZE (Pmode);
20950 break;
20951 case rep_prefix_8_byte:
20952 desired_align = 8;
20953 break;
20954 case rep_prefix_4_byte:
20955 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
20956 copying whole cacheline at once. */
20957 if (TARGET_PENTIUMPRO)
20958 desired_align = 8;
20959 else
20960 desired_align = 4;
20961 break;
20962 case rep_prefix_1_byte:
20963 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
20964 copying whole cacheline at once. */
20965 if (TARGET_PENTIUMPRO)
20966 desired_align = 8;
20967 else
20968 desired_align = 1;
20969 break;
20970 case loop_1_byte:
20971 desired_align = 1;
20972 break;
20973 case libcall:
20974 return 0;
20977 if (optimize_size)
20978 desired_align = 1;
20979 if (desired_align < align)
20980 desired_align = align;
20981 if (expected_size != -1 && expected_size < 4)
20982 desired_align = align;
20983 return desired_align;
20986 /* Return the smallest power of 2 greater than VAL. */
20987 static int
20988 smallest_pow2_greater_than (int val)
20990 int ret = 1;
20991 while (ret <= val)
20992 ret <<= 1;
20993 return ret;
20996 /* Expand string move (memcpy) operation. Use i386 string operations
20997 when profitable. expand_setmem contains similar code. The code
20998 depends upon architecture, block size and alignment, but always has
20999 the same overall structure:
21001 1) Prologue guard: Conditional that jumps up to epilogues for small
21002 blocks that can be handled by epilogue alone. This is faster
21003 but also needed for correctness, since prologue assume the block
21004 is larger than the desired alignment.
21006 Optional dynamic check for size and libcall for large
21007 blocks is emitted here too, with -minline-stringops-dynamically.
21009 2) Prologue: copy first few bytes in order to get destination
21010 aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less
21011 than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be
21012 copied. We emit either a jump tree on power of two sized
21013 blocks, or a byte loop.
21015 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
21016 with specified algorithm.
21018 4) Epilogue: code copying tail of the block that is too small to be
21019 handled by main body (or up to size guarded by prologue guard). */
21021 bool
21022 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
21023 rtx expected_align_exp, rtx expected_size_exp)
21025 rtx destreg;
21026 rtx srcreg;
21027 rtx label = NULL;
21028 rtx tmp;
21029 rtx jump_around_label = NULL;
21030 HOST_WIDE_INT align = 1;
21031 unsigned HOST_WIDE_INT count = 0;
21032 HOST_WIDE_INT expected_size = -1;
21033 int size_needed = 0, epilogue_size_needed;
21034 int desired_align = 0, align_bytes = 0;
21035 enum stringop_alg alg;
21036 int dynamic_check;
21037 bool need_zero_guard = false;
21039 if (CONST_INT_P (align_exp))
21040 align = INTVAL (align_exp);
21041 /* i386 can do misaligned access on reasonably increased cost. */
21042 if (CONST_INT_P (expected_align_exp)
21043 && INTVAL (expected_align_exp) > align)
21044 align = INTVAL (expected_align_exp);
21045 /* ALIGN is the minimum of destination and source alignment, but we care here
21046 just about destination alignment. */
21047 else if (MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
21048 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
21050 if (CONST_INT_P (count_exp))
21051 count = expected_size = INTVAL (count_exp);
21052 if (CONST_INT_P (expected_size_exp) && count == 0)
21053 expected_size = INTVAL (expected_size_exp);
21055 /* Make sure we don't need to care about overflow later on. */
21056 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
21057 return false;
21059 /* Step 0: Decide on preferred algorithm, desired alignment and
21060 size of chunks to be copied by main loop. */
21062 alg = decide_alg (count, expected_size, false, &dynamic_check);
21063 desired_align = decide_alignment (align, alg, expected_size);
21065 if (!TARGET_ALIGN_STRINGOPS)
21066 align = desired_align;
21068 if (alg == libcall)
21069 return false;
21070 gcc_assert (alg != no_stringop);
21071 if (!count)
21072 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
21073 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
21074 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
21075 switch (alg)
21077 case libcall:
21078 case no_stringop:
21079 gcc_unreachable ();
21080 case loop:
21081 need_zero_guard = true;
21082 size_needed = GET_MODE_SIZE (Pmode);
21083 break;
21084 case unrolled_loop:
21085 need_zero_guard = true;
21086 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
21087 break;
21088 case rep_prefix_8_byte:
21089 size_needed = 8;
21090 break;
21091 case rep_prefix_4_byte:
21092 size_needed = 4;
21093 break;
21094 case rep_prefix_1_byte:
21095 size_needed = 1;
21096 break;
21097 case loop_1_byte:
21098 need_zero_guard = true;
21099 size_needed = 1;
21100 break;
21103 epilogue_size_needed = size_needed;
21105 /* Step 1: Prologue guard. */
21107 /* Alignment code needs count to be in register. */
21108 if (CONST_INT_P (count_exp) && desired_align > align)
21110 if (INTVAL (count_exp) > desired_align
21111 && INTVAL (count_exp) > size_needed)
21113 align_bytes
21114 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
21115 if (align_bytes <= 0)
21116 align_bytes = 0;
21117 else
21118 align_bytes = desired_align - align_bytes;
21120 if (align_bytes == 0)
21121 count_exp = force_reg (counter_mode (count_exp), count_exp);
21123 gcc_assert (desired_align >= 1 && align >= 1);
21125 /* Ensure that alignment prologue won't copy past end of block. */
21126 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
21128 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
21129 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
21130 Make sure it is power of 2. */
21131 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
21133 if (count)
21135 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
21137 /* If main algorithm works on QImode, no epilogue is needed.
21138 For small sizes just don't align anything. */
21139 if (size_needed == 1)
21140 desired_align = align;
21141 else
21142 goto epilogue;
21145 else
21147 label = gen_label_rtx ();
21148 emit_cmp_and_jump_insns (count_exp,
21149 GEN_INT (epilogue_size_needed),
21150 LTU, 0, counter_mode (count_exp), 1, label);
21151 if (expected_size == -1 || expected_size < epilogue_size_needed)
21152 predict_jump (REG_BR_PROB_BASE * 60 / 100);
21153 else
21154 predict_jump (REG_BR_PROB_BASE * 20 / 100);
21158 /* Emit code to decide on runtime whether library call or inline should be
21159 used. */
21160 if (dynamic_check != -1)
21162 if (CONST_INT_P (count_exp))
21164 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
21166 emit_block_move_via_libcall (dst, src, count_exp, false);
21167 count_exp = const0_rtx;
21168 goto epilogue;
21171 else
21173 rtx hot_label = gen_label_rtx ();
21174 jump_around_label = gen_label_rtx ();
21175 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
21176 LEU, 0, GET_MODE (count_exp), 1, hot_label);
21177 predict_jump (REG_BR_PROB_BASE * 90 / 100);
21178 emit_block_move_via_libcall (dst, src, count_exp, false);
21179 emit_jump (jump_around_label);
21180 emit_label (hot_label);
21184 /* Step 2: Alignment prologue. */
21186 if (desired_align > align)
21188 if (align_bytes == 0)
21190 /* Except for the first move in epilogue, we no longer know
21191 constant offset in aliasing info. It don't seems to worth
21192 the pain to maintain it for the first move, so throw away
21193 the info early. */
21194 src = change_address (src, BLKmode, srcreg);
21195 dst = change_address (dst, BLKmode, destreg);
21196 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
21197 desired_align);
21199 else
21201 /* If we know how many bytes need to be stored before dst is
21202 sufficiently aligned, maintain aliasing info accurately. */
21203 dst = expand_constant_movmem_prologue (dst, &src, destreg, srcreg,
21204 desired_align, align_bytes);
21205 count_exp = plus_constant (count_exp, -align_bytes);
21206 count -= align_bytes;
21208 if (need_zero_guard
21209 && (count < (unsigned HOST_WIDE_INT) size_needed
21210 || (align_bytes == 0
21211 && count < ((unsigned HOST_WIDE_INT) size_needed
21212 + desired_align - align))))
21214 /* It is possible that we copied enough so the main loop will not
21215 execute. */
21216 gcc_assert (size_needed > 1);
21217 if (label == NULL_RTX)
21218 label = gen_label_rtx ();
21219 emit_cmp_and_jump_insns (count_exp,
21220 GEN_INT (size_needed),
21221 LTU, 0, counter_mode (count_exp), 1, label);
21222 if (expected_size == -1
21223 || expected_size < (desired_align - align) / 2 + size_needed)
21224 predict_jump (REG_BR_PROB_BASE * 20 / 100);
21225 else
21226 predict_jump (REG_BR_PROB_BASE * 60 / 100);
21229 if (label && size_needed == 1)
21231 emit_label (label);
21232 LABEL_NUSES (label) = 1;
21233 label = NULL;
21234 epilogue_size_needed = 1;
21236 else if (label == NULL_RTX)
21237 epilogue_size_needed = size_needed;
21239 /* Step 3: Main loop. */
21241 switch (alg)
21243 case libcall:
21244 case no_stringop:
21245 gcc_unreachable ();
21246 case loop_1_byte:
21247 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
21248 count_exp, QImode, 1, expected_size);
21249 break;
21250 case loop:
21251 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
21252 count_exp, Pmode, 1, expected_size);
21253 break;
21254 case unrolled_loop:
21255 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
21256 registers for 4 temporaries anyway. */
21257 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
21258 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
21259 expected_size);
21260 break;
21261 case rep_prefix_8_byte:
21262 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
21263 DImode);
21264 break;
21265 case rep_prefix_4_byte:
21266 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
21267 SImode);
21268 break;
21269 case rep_prefix_1_byte:
21270 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
21271 QImode);
21272 break;
21274 /* Adjust properly the offset of src and dest memory for aliasing. */
21275 if (CONST_INT_P (count_exp))
21277 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
21278 (count / size_needed) * size_needed);
21279 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
21280 (count / size_needed) * size_needed);
21282 else
21284 src = change_address (src, BLKmode, srcreg);
21285 dst = change_address (dst, BLKmode, destreg);
21288 /* Step 4: Epilogue to copy the remaining bytes. */
21289 epilogue:
21290 if (label)
21292 /* When the main loop is done, COUNT_EXP might hold original count,
21293 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
21294 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
21295 bytes. Compensate if needed. */
21297 if (size_needed < epilogue_size_needed)
21299 tmp =
21300 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
21301 GEN_INT (size_needed - 1), count_exp, 1,
21302 OPTAB_DIRECT);
21303 if (tmp != count_exp)
21304 emit_move_insn (count_exp, tmp);
21306 emit_label (label);
21307 LABEL_NUSES (label) = 1;
21310 if (count_exp != const0_rtx && epilogue_size_needed > 1)
21311 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
21312 epilogue_size_needed);
21313 if (jump_around_label)
21314 emit_label (jump_around_label);
21315 return true;
21318 /* Helper function for memcpy. For QImode value 0xXY produce
21319 0xXYXYXYXY of wide specified by MODE. This is essentially
21320 a * 0x10101010, but we can do slightly better than
21321 synth_mult by unwinding the sequence by hand on CPUs with
21322 slow multiply. */
21323 static rtx
21324 promote_duplicated_reg (enum machine_mode mode, rtx val)
21326 enum machine_mode valmode = GET_MODE (val);
21327 rtx tmp;
21328 int nops = mode == DImode ? 3 : 2;
21330 gcc_assert (mode == SImode || mode == DImode);
21331 if (val == const0_rtx)
21332 return copy_to_mode_reg (mode, const0_rtx);
21333 if (CONST_INT_P (val))
21335 HOST_WIDE_INT v = INTVAL (val) & 255;
21337 v |= v << 8;
21338 v |= v << 16;
21339 if (mode == DImode)
21340 v |= (v << 16) << 16;
21341 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
21344 if (valmode == VOIDmode)
21345 valmode = QImode;
21346 if (valmode != QImode)
21347 val = gen_lowpart (QImode, val);
21348 if (mode == QImode)
21349 return val;
21350 if (!TARGET_PARTIAL_REG_STALL)
21351 nops--;
21352 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
21353 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
21354 <= (ix86_cost->shift_const + ix86_cost->add) * nops
21355 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
21357 rtx reg = convert_modes (mode, QImode, val, true);
21358 tmp = promote_duplicated_reg (mode, const1_rtx);
21359 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
21360 OPTAB_DIRECT);
21362 else
21364 rtx reg = convert_modes (mode, QImode, val, true);
21366 if (!TARGET_PARTIAL_REG_STALL)
21367 if (mode == SImode)
21368 emit_insn (gen_movsi_insv_1 (reg, reg));
21369 else
21370 emit_insn (gen_movdi_insv_1 (reg, reg));
21371 else
21373 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
21374 NULL, 1, OPTAB_DIRECT);
21375 reg =
21376 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
21378 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
21379 NULL, 1, OPTAB_DIRECT);
21380 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
21381 if (mode == SImode)
21382 return reg;
21383 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
21384 NULL, 1, OPTAB_DIRECT);
21385 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
21386 return reg;
21390 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
21391 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
21392 alignment from ALIGN to DESIRED_ALIGN. */
21393 static rtx
21394 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
21396 rtx promoted_val;
21398 if (TARGET_64BIT
21399 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
21400 promoted_val = promote_duplicated_reg (DImode, val);
21401 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
21402 promoted_val = promote_duplicated_reg (SImode, val);
21403 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
21404 promoted_val = promote_duplicated_reg (HImode, val);
21405 else
21406 promoted_val = val;
21408 return promoted_val;
21411 /* Expand string clear operation (bzero). Use i386 string operations when
21412 profitable. See expand_movmem comment for explanation of individual
21413 steps performed. */
21414 bool
21415 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
21416 rtx expected_align_exp, rtx expected_size_exp)
21418 rtx destreg;
21419 rtx label = NULL;
21420 rtx tmp;
21421 rtx jump_around_label = NULL;
21422 HOST_WIDE_INT align = 1;
21423 unsigned HOST_WIDE_INT count = 0;
21424 HOST_WIDE_INT expected_size = -1;
21425 int size_needed = 0, epilogue_size_needed;
21426 int desired_align = 0, align_bytes = 0;
21427 enum stringop_alg alg;
21428 rtx promoted_val = NULL;
21429 bool force_loopy_epilogue = false;
21430 int dynamic_check;
21431 bool need_zero_guard = false;
21433 if (CONST_INT_P (align_exp))
21434 align = INTVAL (align_exp);
21435 /* i386 can do misaligned access on reasonably increased cost. */
21436 if (CONST_INT_P (expected_align_exp)
21437 && INTVAL (expected_align_exp) > align)
21438 align = INTVAL (expected_align_exp);
21439 if (CONST_INT_P (count_exp))
21440 count = expected_size = INTVAL (count_exp);
21441 if (CONST_INT_P (expected_size_exp) && count == 0)
21442 expected_size = INTVAL (expected_size_exp);
21444 /* Make sure we don't need to care about overflow later on. */
21445 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
21446 return false;
21448 /* Step 0: Decide on preferred algorithm, desired alignment and
21449 size of chunks to be copied by main loop. */
21451 alg = decide_alg (count, expected_size, true, &dynamic_check);
21452 desired_align = decide_alignment (align, alg, expected_size);
21454 if (!TARGET_ALIGN_STRINGOPS)
21455 align = desired_align;
21457 if (alg == libcall)
21458 return false;
21459 gcc_assert (alg != no_stringop);
21460 if (!count)
21461 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
21462 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
21463 switch (alg)
21465 case libcall:
21466 case no_stringop:
21467 gcc_unreachable ();
21468 case loop:
21469 need_zero_guard = true;
21470 size_needed = GET_MODE_SIZE (Pmode);
21471 break;
21472 case unrolled_loop:
21473 need_zero_guard = true;
21474 size_needed = GET_MODE_SIZE (Pmode) * 4;
21475 break;
21476 case rep_prefix_8_byte:
21477 size_needed = 8;
21478 break;
21479 case rep_prefix_4_byte:
21480 size_needed = 4;
21481 break;
21482 case rep_prefix_1_byte:
21483 size_needed = 1;
21484 break;
21485 case loop_1_byte:
21486 need_zero_guard = true;
21487 size_needed = 1;
21488 break;
21490 epilogue_size_needed = size_needed;
21492 /* Step 1: Prologue guard. */
21494 /* Alignment code needs count to be in register. */
21495 if (CONST_INT_P (count_exp) && desired_align > align)
21497 if (INTVAL (count_exp) > desired_align
21498 && INTVAL (count_exp) > size_needed)
21500 align_bytes
21501 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
21502 if (align_bytes <= 0)
21503 align_bytes = 0;
21504 else
21505 align_bytes = desired_align - align_bytes;
21507 if (align_bytes == 0)
21509 enum machine_mode mode = SImode;
21510 if (TARGET_64BIT && (count & ~0xffffffff))
21511 mode = DImode;
21512 count_exp = force_reg (mode, count_exp);
21515 /* Do the cheap promotion to allow better CSE across the
21516 main loop and epilogue (ie one load of the big constant in the
21517 front of all code. */
21518 if (CONST_INT_P (val_exp))
21519 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
21520 desired_align, align);
21521 /* Ensure that alignment prologue won't copy past end of block. */
21522 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
21524 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
21525 /* Epilogue always copies COUNT_EXP & (EPILOGUE_SIZE_NEEDED - 1) bytes.
21526 Make sure it is power of 2. */
21527 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
21529 /* To improve performance of small blocks, we jump around the VAL
21530 promoting mode. This mean that if the promoted VAL is not constant,
21531 we might not use it in the epilogue and have to use byte
21532 loop variant. */
21533 if (epilogue_size_needed > 2 && !promoted_val)
21534 force_loopy_epilogue = true;
21535 if (count)
21537 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
21539 /* If main algorithm works on QImode, no epilogue is needed.
21540 For small sizes just don't align anything. */
21541 if (size_needed == 1)
21542 desired_align = align;
21543 else
21544 goto epilogue;
21547 else
21549 label = gen_label_rtx ();
21550 emit_cmp_and_jump_insns (count_exp,
21551 GEN_INT (epilogue_size_needed),
21552 LTU, 0, counter_mode (count_exp), 1, label);
21553 if (expected_size == -1 || expected_size <= epilogue_size_needed)
21554 predict_jump (REG_BR_PROB_BASE * 60 / 100);
21555 else
21556 predict_jump (REG_BR_PROB_BASE * 20 / 100);
21559 if (dynamic_check != -1)
21561 rtx hot_label = gen_label_rtx ();
21562 jump_around_label = gen_label_rtx ();
21563 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
21564 LEU, 0, counter_mode (count_exp), 1, hot_label);
21565 predict_jump (REG_BR_PROB_BASE * 90 / 100);
21566 set_storage_via_libcall (dst, count_exp, val_exp, false);
21567 emit_jump (jump_around_label);
21568 emit_label (hot_label);
21571 /* Step 2: Alignment prologue. */
21573 /* Do the expensive promotion once we branched off the small blocks. */
21574 if (!promoted_val)
21575 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
21576 desired_align, align);
21577 gcc_assert (desired_align >= 1 && align >= 1);
21579 if (desired_align > align)
21581 if (align_bytes == 0)
21583 /* Except for the first move in epilogue, we no longer know
21584 constant offset in aliasing info. It don't seems to worth
21585 the pain to maintain it for the first move, so throw away
21586 the info early. */
21587 dst = change_address (dst, BLKmode, destreg);
21588 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
21589 desired_align);
21591 else
21593 /* If we know how many bytes need to be stored before dst is
21594 sufficiently aligned, maintain aliasing info accurately. */
21595 dst = expand_constant_setmem_prologue (dst, destreg, promoted_val,
21596 desired_align, align_bytes);
21597 count_exp = plus_constant (count_exp, -align_bytes);
21598 count -= align_bytes;
21600 if (need_zero_guard
21601 && (count < (unsigned HOST_WIDE_INT) size_needed
21602 || (align_bytes == 0
21603 && count < ((unsigned HOST_WIDE_INT) size_needed
21604 + desired_align - align))))
21606 /* It is possible that we copied enough so the main loop will not
21607 execute. */
21608 gcc_assert (size_needed > 1);
21609 if (label == NULL_RTX)
21610 label = gen_label_rtx ();
21611 emit_cmp_and_jump_insns (count_exp,
21612 GEN_INT (size_needed),
21613 LTU, 0, counter_mode (count_exp), 1, label);
21614 if (expected_size == -1
21615 || expected_size < (desired_align - align) / 2 + size_needed)
21616 predict_jump (REG_BR_PROB_BASE * 20 / 100);
21617 else
21618 predict_jump (REG_BR_PROB_BASE * 60 / 100);
21621 if (label && size_needed == 1)
21623 emit_label (label);
21624 LABEL_NUSES (label) = 1;
21625 label = NULL;
21626 promoted_val = val_exp;
21627 epilogue_size_needed = 1;
21629 else if (label == NULL_RTX)
21630 epilogue_size_needed = size_needed;
21632 /* Step 3: Main loop. */
21634 switch (alg)
21636 case libcall:
21637 case no_stringop:
21638 gcc_unreachable ();
21639 case loop_1_byte:
21640 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
21641 count_exp, QImode, 1, expected_size);
21642 break;
21643 case loop:
21644 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
21645 count_exp, Pmode, 1, expected_size);
21646 break;
21647 case unrolled_loop:
21648 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
21649 count_exp, Pmode, 4, expected_size);
21650 break;
21651 case rep_prefix_8_byte:
21652 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
21653 DImode, val_exp);
21654 break;
21655 case rep_prefix_4_byte:
21656 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
21657 SImode, val_exp);
21658 break;
21659 case rep_prefix_1_byte:
21660 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
21661 QImode, val_exp);
21662 break;
21664 /* Adjust properly the offset of src and dest memory for aliasing. */
21665 if (CONST_INT_P (count_exp))
21666 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
21667 (count / size_needed) * size_needed);
21668 else
21669 dst = change_address (dst, BLKmode, destreg);
21671 /* Step 4: Epilogue to copy the remaining bytes. */
21673 if (label)
21675 /* When the main loop is done, COUNT_EXP might hold original count,
21676 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
21677 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
21678 bytes. Compensate if needed. */
21680 if (size_needed < epilogue_size_needed)
21682 tmp =
21683 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
21684 GEN_INT (size_needed - 1), count_exp, 1,
21685 OPTAB_DIRECT);
21686 if (tmp != count_exp)
21687 emit_move_insn (count_exp, tmp);
21689 emit_label (label);
21690 LABEL_NUSES (label) = 1;
21692 epilogue:
21693 if (count_exp != const0_rtx && epilogue_size_needed > 1)
21695 if (force_loopy_epilogue)
21696 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
21697 epilogue_size_needed);
21698 else
21699 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
21700 epilogue_size_needed);
21702 if (jump_around_label)
21703 emit_label (jump_around_label);
21704 return true;
21707 /* Expand the appropriate insns for doing strlen if not just doing
21708 repnz; scasb
21710 out = result, initialized with the start address
21711 align_rtx = alignment of the address.
21712 scratch = scratch register, initialized with the startaddress when
21713 not aligned, otherwise undefined
21715 This is just the body. It needs the initializations mentioned above and
21716 some address computing at the end. These things are done in i386.md. */
21718 static void
21719 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
21721 int align;
21722 rtx tmp;
21723 rtx align_2_label = NULL_RTX;
21724 rtx align_3_label = NULL_RTX;
21725 rtx align_4_label = gen_label_rtx ();
21726 rtx end_0_label = gen_label_rtx ();
21727 rtx mem;
21728 rtx tmpreg = gen_reg_rtx (SImode);
21729 rtx scratch = gen_reg_rtx (SImode);
21730 rtx cmp;
21732 align = 0;
21733 if (CONST_INT_P (align_rtx))
21734 align = INTVAL (align_rtx);
21736 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
21738 /* Is there a known alignment and is it less than 4? */
21739 if (align < 4)
21741 rtx scratch1 = gen_reg_rtx (Pmode);
21742 emit_move_insn (scratch1, out);
21743 /* Is there a known alignment and is it not 2? */
21744 if (align != 2)
21746 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
21747 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
21749 /* Leave just the 3 lower bits. */
21750 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
21751 NULL_RTX, 0, OPTAB_WIDEN);
21753 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
21754 Pmode, 1, align_4_label);
21755 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
21756 Pmode, 1, align_2_label);
21757 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
21758 Pmode, 1, align_3_label);
21760 else
21762 /* Since the alignment is 2, we have to check 2 or 0 bytes;
21763 check if is aligned to 4 - byte. */
21765 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
21766 NULL_RTX, 0, OPTAB_WIDEN);
21768 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
21769 Pmode, 1, align_4_label);
21772 mem = change_address (src, QImode, out);
21774 /* Now compare the bytes. */
21776 /* Compare the first n unaligned byte on a byte per byte basis. */
21777 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
21778 QImode, 1, end_0_label);
21780 /* Increment the address. */
21781 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
21783 /* Not needed with an alignment of 2 */
21784 if (align != 2)
21786 emit_label (align_2_label);
21788 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
21789 end_0_label);
21791 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
21793 emit_label (align_3_label);
21796 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
21797 end_0_label);
21799 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
21802 /* Generate loop to check 4 bytes at a time. It is not a good idea to
21803 align this loop. It gives only huge programs, but does not help to
21804 speed up. */
21805 emit_label (align_4_label);
21807 mem = change_address (src, SImode, out);
21808 emit_move_insn (scratch, mem);
21809 emit_insn (ix86_gen_add3 (out, out, GEN_INT (4)));
21811 /* This formula yields a nonzero result iff one of the bytes is zero.
21812 This saves three branches inside loop and many cycles. */
21814 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
21815 emit_insn (gen_one_cmplsi2 (scratch, scratch));
21816 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
21817 emit_insn (gen_andsi3 (tmpreg, tmpreg,
21818 gen_int_mode (0x80808080, SImode)));
21819 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
21820 align_4_label);
21822 if (TARGET_CMOVE)
21824 rtx reg = gen_reg_rtx (SImode);
21825 rtx reg2 = gen_reg_rtx (Pmode);
21826 emit_move_insn (reg, tmpreg);
21827 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
21829 /* If zero is not in the first two bytes, move two bytes forward. */
21830 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
21831 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
21832 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
21833 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
21834 gen_rtx_IF_THEN_ELSE (SImode, tmp,
21835 reg,
21836 tmpreg)));
21837 /* Emit lea manually to avoid clobbering of flags. */
21838 emit_insn (gen_rtx_SET (SImode, reg2,
21839 gen_rtx_PLUS (Pmode, out, const2_rtx)));
21841 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
21842 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
21843 emit_insn (gen_rtx_SET (VOIDmode, out,
21844 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
21845 reg2,
21846 out)));
21848 else
21850 rtx end_2_label = gen_label_rtx ();
21851 /* Is zero in the first two bytes? */
21853 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
21854 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
21855 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
21856 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
21857 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
21858 pc_rtx);
21859 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
21860 JUMP_LABEL (tmp) = end_2_label;
21862 /* Not in the first two. Move two bytes forward. */
21863 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
21864 emit_insn (ix86_gen_add3 (out, out, const2_rtx));
21866 emit_label (end_2_label);
21870 /* Avoid branch in fixing the byte. */
21871 tmpreg = gen_lowpart (QImode, tmpreg);
21872 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
21873 tmp = gen_rtx_REG (CCmode, FLAGS_REG);
21874 cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
21875 emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp));
21877 emit_label (end_0_label);
21880 /* Expand strlen. */
21882 bool
21883 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
21885 rtx addr, scratch1, scratch2, scratch3, scratch4;
21887 /* The generic case of strlen expander is long. Avoid it's
21888 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
21890 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
21891 && !TARGET_INLINE_ALL_STRINGOPS
21892 && !optimize_insn_for_size_p ()
21893 && (!CONST_INT_P (align) || INTVAL (align) < 4))
21894 return false;
21896 addr = force_reg (Pmode, XEXP (src, 0));
21897 scratch1 = gen_reg_rtx (Pmode);
21899 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
21900 && !optimize_insn_for_size_p ())
21902 /* Well it seems that some optimizer does not combine a call like
21903 foo(strlen(bar), strlen(bar));
21904 when the move and the subtraction is done here. It does calculate
21905 the length just once when these instructions are done inside of
21906 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
21907 often used and I use one fewer register for the lifetime of
21908 output_strlen_unroll() this is better. */
21910 emit_move_insn (out, addr);
21912 ix86_expand_strlensi_unroll_1 (out, src, align);
21914 /* strlensi_unroll_1 returns the address of the zero at the end of
21915 the string, like memchr(), so compute the length by subtracting
21916 the start address. */
21917 emit_insn (ix86_gen_sub3 (out, out, addr));
21919 else
21921 rtx unspec;
21923 /* Can't use this if the user has appropriated eax, ecx, or edi. */
21924 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
21925 return false;
21927 scratch2 = gen_reg_rtx (Pmode);
21928 scratch3 = gen_reg_rtx (Pmode);
21929 scratch4 = force_reg (Pmode, constm1_rtx);
21931 emit_move_insn (scratch3, addr);
21932 eoschar = force_reg (QImode, eoschar);
21934 src = replace_equiv_address_nv (src, scratch3);
21936 /* If .md starts supporting :P, this can be done in .md. */
21937 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
21938 scratch4), UNSPEC_SCAS);
21939 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
21940 emit_insn (ix86_gen_one_cmpl2 (scratch2, scratch1));
21941 emit_insn (ix86_gen_add3 (out, scratch2, constm1_rtx));
21943 return true;
21946 /* For given symbol (function) construct code to compute address of it's PLT
21947 entry in large x86-64 PIC model. */
21949 construct_plt_address (rtx symbol)
21951 rtx tmp = gen_reg_rtx (Pmode);
21952 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
21954 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
21955 gcc_assert (ix86_cmodel == CM_LARGE_PIC);
21957 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
21958 emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
21959 return tmp;
21963 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
21964 rtx callarg2,
21965 rtx pop, bool sibcall)
21967 rtx use = NULL, call;
21969 if (pop == const0_rtx)
21970 pop = NULL;
21971 gcc_assert (!TARGET_64BIT || !pop);
21973 if (TARGET_MACHO && !TARGET_64BIT)
21975 #if TARGET_MACHO
21976 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
21977 fnaddr = machopic_indirect_call_target (fnaddr);
21978 #endif
21980 else
21982 /* Static functions and indirect calls don't need the pic register. */
21983 if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
21984 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
21985 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
21986 use_reg (&use, pic_offset_table_rtx);
21989 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
21991 rtx al = gen_rtx_REG (QImode, AX_REG);
21992 emit_move_insn (al, callarg2);
21993 use_reg (&use, al);
21996 if (ix86_cmodel == CM_LARGE_PIC
21997 && MEM_P (fnaddr)
21998 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
21999 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
22000 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
22001 else if (sibcall
22002 ? !sibcall_insn_operand (XEXP (fnaddr, 0), Pmode)
22003 : !call_insn_operand (XEXP (fnaddr, 0), Pmode))
22005 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
22006 fnaddr = gen_rtx_MEM (QImode, fnaddr);
22009 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
22010 if (retval)
22011 call = gen_rtx_SET (VOIDmode, retval, call);
22012 if (pop)
22014 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
22015 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
22016 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
22018 if (TARGET_64BIT_MS_ABI
22019 && (!callarg2 || INTVAL (callarg2) != -2))
22021 /* We need to represent that SI and DI registers are clobbered
22022 by SYSV calls. */
22023 static int clobbered_registers[] = {
22024 XMM6_REG, XMM7_REG, XMM8_REG,
22025 XMM9_REG, XMM10_REG, XMM11_REG,
22026 XMM12_REG, XMM13_REG, XMM14_REG,
22027 XMM15_REG, SI_REG, DI_REG
22029 unsigned int i;
22030 rtx vec[ARRAY_SIZE (clobbered_registers) + 2];
22031 rtx unspec = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx),
22032 UNSPEC_MS_TO_SYSV_CALL);
22034 vec[0] = call;
22035 vec[1] = unspec;
22036 for (i = 0; i < ARRAY_SIZE (clobbered_registers); i++)
22037 vec[i + 2] = gen_rtx_CLOBBER (SSE_REGNO_P (clobbered_registers[i])
22038 ? TImode : DImode,
22039 gen_rtx_REG
22040 (SSE_REGNO_P (clobbered_registers[i])
22041 ? TImode : DImode,
22042 clobbered_registers[i]));
22044 call = gen_rtx_PARALLEL (VOIDmode,
22045 gen_rtvec_v (ARRAY_SIZE (clobbered_registers)
22046 + 2, vec));
22049 /* Add UNSPEC_CALL_NEEDS_VZEROUPPER decoration. */
22050 if (TARGET_VZEROUPPER)
22052 rtx unspec;
22053 int avx256;
22055 if (cfun->machine->callee_pass_avx256_p)
22057 if (cfun->machine->callee_return_avx256_p)
22058 avx256 = callee_return_pass_avx256;
22059 else
22060 avx256 = callee_pass_avx256;
22062 else if (cfun->machine->callee_return_avx256_p)
22063 avx256 = callee_return_avx256;
22064 else
22065 avx256 = call_no_avx256;
22067 if (reload_completed)
22068 emit_insn (gen_avx_vzeroupper (GEN_INT (avx256)));
22069 else
22071 unspec = gen_rtx_UNSPEC (VOIDmode,
22072 gen_rtvec (1, GEN_INT (avx256)),
22073 UNSPEC_CALL_NEEDS_VZEROUPPER);
22074 call = gen_rtx_PARALLEL (VOIDmode,
22075 gen_rtvec (2, call, unspec));
22079 call = emit_call_insn (call);
22080 if (use)
22081 CALL_INSN_FUNCTION_USAGE (call) = use;
22083 return call;
22086 void
22087 ix86_split_call_vzeroupper (rtx insn, rtx vzeroupper)
22089 rtx call = XVECEXP (PATTERN (insn), 0, 0);
22090 emit_insn (gen_avx_vzeroupper (vzeroupper));
22091 emit_call_insn (call);
22094 /* Output the assembly for a call instruction. */
22096 const char *
22097 ix86_output_call_insn (rtx insn, rtx call_op)
22099 bool direct_p = constant_call_address_operand (call_op, Pmode);
22100 bool seh_nop_p = false;
22101 const char *xasm;
22103 if (SIBLING_CALL_P (insn))
22105 if (direct_p)
22106 xasm = "jmp\t%P0";
22107 /* SEH epilogue detection requires the indirect branch case
22108 to include REX.W. */
22109 else if (TARGET_SEH)
22110 xasm = "rex.W jmp %A0";
22111 else
22112 xasm = "jmp\t%A0";
22114 output_asm_insn (xasm, &call_op);
22115 return "";
22118 /* SEH unwinding can require an extra nop to be emitted in several
22119 circumstances. Determine if we have one of those. */
22120 if (TARGET_SEH)
22122 rtx i;
22124 for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i))
22126 /* If we get to another real insn, we don't need the nop. */
22127 if (INSN_P (i))
22128 break;
22130 /* If we get to the epilogue note, prevent a catch region from
22131 being adjacent to the standard epilogue sequence. If non-
22132 call-exceptions, we'll have done this during epilogue emission. */
22133 if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG
22134 && !flag_non_call_exceptions
22135 && !can_throw_internal (insn))
22137 seh_nop_p = true;
22138 break;
22142 /* If we didn't find a real insn following the call, prevent the
22143 unwinder from looking into the next function. */
22144 if (i == NULL)
22145 seh_nop_p = true;
22148 if (direct_p)
22149 xasm = "call\t%P0";
22150 else
22151 xasm = "call\t%A0";
22153 output_asm_insn (xasm, &call_op);
22155 if (seh_nop_p)
22156 return "nop";
22158 return "";
22161 /* Clear stack slot assignments remembered from previous functions.
22162 This is called from INIT_EXPANDERS once before RTL is emitted for each
22163 function. */
22165 static struct machine_function *
22166 ix86_init_machine_status (void)
22168 struct machine_function *f;
22170 f = ggc_alloc_cleared_machine_function ();
22171 f->use_fast_prologue_epilogue_nregs = -1;
22172 f->tls_descriptor_call_expanded_p = 0;
22173 f->call_abi = ix86_abi;
22175 return f;
22178 /* Return a MEM corresponding to a stack slot with mode MODE.
22179 Allocate a new slot if necessary.
22181 The RTL for a function can have several slots available: N is
22182 which slot to use. */
22185 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
22187 struct stack_local_entry *s;
22189 gcc_assert (n < MAX_386_STACK_LOCALS);
22191 /* Virtual slot is valid only before vregs are instantiated. */
22192 gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
22194 for (s = ix86_stack_locals; s; s = s->next)
22195 if (s->mode == mode && s->n == n)
22196 return copy_rtx (s->rtl);
22198 s = ggc_alloc_stack_local_entry ();
22199 s->n = n;
22200 s->mode = mode;
22201 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
22203 s->next = ix86_stack_locals;
22204 ix86_stack_locals = s;
22205 return s->rtl;
22208 /* Calculate the length of the memory address in the instruction
22209 encoding. Does not include the one-byte modrm, opcode, or prefix. */
22212 memory_address_length (rtx addr)
22214 struct ix86_address parts;
22215 rtx base, index, disp;
22216 int len;
22217 int ok;
22219 if (GET_CODE (addr) == PRE_DEC
22220 || GET_CODE (addr) == POST_INC
22221 || GET_CODE (addr) == PRE_MODIFY
22222 || GET_CODE (addr) == POST_MODIFY)
22223 return 0;
22225 ok = ix86_decompose_address (addr, &parts);
22226 gcc_assert (ok);
22228 if (parts.base && GET_CODE (parts.base) == SUBREG)
22229 parts.base = SUBREG_REG (parts.base);
22230 if (parts.index && GET_CODE (parts.index) == SUBREG)
22231 parts.index = SUBREG_REG (parts.index);
22233 base = parts.base;
22234 index = parts.index;
22235 disp = parts.disp;
22236 len = 0;
22238 /* Rule of thumb:
22239 - esp as the base always wants an index,
22240 - ebp as the base always wants a displacement,
22241 - r12 as the base always wants an index,
22242 - r13 as the base always wants a displacement. */
22244 /* Register Indirect. */
22245 if (base && !index && !disp)
22247 /* esp (for its index) and ebp (for its displacement) need
22248 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
22249 code. */
22250 if (REG_P (addr)
22251 && (addr == arg_pointer_rtx
22252 || addr == frame_pointer_rtx
22253 || REGNO (addr) == SP_REG
22254 || REGNO (addr) == BP_REG
22255 || REGNO (addr) == R12_REG
22256 || REGNO (addr) == R13_REG))
22257 len = 1;
22260 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
22261 is not disp32, but disp32(%rip), so for disp32
22262 SIB byte is needed, unless print_operand_address
22263 optimizes it into disp32(%rip) or (%rip) is implied
22264 by UNSPEC. */
22265 else if (disp && !base && !index)
22267 len = 4;
22268 if (TARGET_64BIT)
22270 rtx symbol = disp;
22272 if (GET_CODE (disp) == CONST)
22273 symbol = XEXP (disp, 0);
22274 if (GET_CODE (symbol) == PLUS
22275 && CONST_INT_P (XEXP (symbol, 1)))
22276 symbol = XEXP (symbol, 0);
22278 if (GET_CODE (symbol) != LABEL_REF
22279 && (GET_CODE (symbol) != SYMBOL_REF
22280 || SYMBOL_REF_TLS_MODEL (symbol) != 0)
22281 && (GET_CODE (symbol) != UNSPEC
22282 || (XINT (symbol, 1) != UNSPEC_GOTPCREL
22283 && XINT (symbol, 1) != UNSPEC_PCREL
22284 && XINT (symbol, 1) != UNSPEC_GOTNTPOFF)))
22285 len += 1;
22289 else
22291 /* Find the length of the displacement constant. */
22292 if (disp)
22294 if (base && satisfies_constraint_K (disp))
22295 len = 1;
22296 else
22297 len = 4;
22299 /* ebp always wants a displacement. Similarly r13. */
22300 else if (base && REG_P (base)
22301 && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
22302 len = 1;
22304 /* An index requires the two-byte modrm form.... */
22305 if (index
22306 /* ...like esp (or r12), which always wants an index. */
22307 || base == arg_pointer_rtx
22308 || base == frame_pointer_rtx
22309 || (base && REG_P (base)
22310 && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
22311 len += 1;
22314 switch (parts.seg)
22316 case SEG_FS:
22317 case SEG_GS:
22318 len += 1;
22319 break;
22320 default:
22321 break;
22324 return len;
22327 /* Compute default value for "length_immediate" attribute. When SHORTFORM
22328 is set, expect that insn have 8bit immediate alternative. */
22330 ix86_attr_length_immediate_default (rtx insn, bool shortform)
22332 int len = 0;
22333 int i;
22334 extract_insn_cached (insn);
22335 for (i = recog_data.n_operands - 1; i >= 0; --i)
22336 if (CONSTANT_P (recog_data.operand[i]))
22338 enum attr_mode mode = get_attr_mode (insn);
22340 gcc_assert (!len);
22341 if (shortform && CONST_INT_P (recog_data.operand[i]))
22343 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
22344 switch (mode)
22346 case MODE_QI:
22347 len = 1;
22348 continue;
22349 case MODE_HI:
22350 ival = trunc_int_for_mode (ival, HImode);
22351 break;
22352 case MODE_SI:
22353 ival = trunc_int_for_mode (ival, SImode);
22354 break;
22355 default:
22356 break;
22358 if (IN_RANGE (ival, -128, 127))
22360 len = 1;
22361 continue;
22364 switch (mode)
22366 case MODE_QI:
22367 len = 1;
22368 break;
22369 case MODE_HI:
22370 len = 2;
22371 break;
22372 case MODE_SI:
22373 len = 4;
22374 break;
22375 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
22376 case MODE_DI:
22377 len = 4;
22378 break;
22379 default:
22380 fatal_insn ("unknown insn mode", insn);
22383 return len;
22385 /* Compute default value for "length_address" attribute. */
22387 ix86_attr_length_address_default (rtx insn)
22389 int i;
22391 if (get_attr_type (insn) == TYPE_LEA)
22393 rtx set = PATTERN (insn), addr;
22395 if (GET_CODE (set) == PARALLEL)
22396 set = XVECEXP (set, 0, 0);
22398 gcc_assert (GET_CODE (set) == SET);
22400 addr = SET_SRC (set);
22401 if (TARGET_64BIT && get_attr_mode (insn) == MODE_SI)
22403 if (GET_CODE (addr) == ZERO_EXTEND)
22404 addr = XEXP (addr, 0);
22405 if (GET_CODE (addr) == SUBREG)
22406 addr = SUBREG_REG (addr);
22409 return memory_address_length (addr);
22412 extract_insn_cached (insn);
22413 for (i = recog_data.n_operands - 1; i >= 0; --i)
22414 if (MEM_P (recog_data.operand[i]))
22416 constrain_operands_cached (reload_completed);
22417 if (which_alternative != -1)
22419 const char *constraints = recog_data.constraints[i];
22420 int alt = which_alternative;
22422 while (*constraints == '=' || *constraints == '+')
22423 constraints++;
22424 while (alt-- > 0)
22425 while (*constraints++ != ',')
22427 /* Skip ignored operands. */
22428 if (*constraints == 'X')
22429 continue;
22431 return memory_address_length (XEXP (recog_data.operand[i], 0));
22433 return 0;
22436 /* Compute default value for "length_vex" attribute. It includes
22437 2 or 3 byte VEX prefix and 1 opcode byte. */
22440 ix86_attr_length_vex_default (rtx insn, bool has_0f_opcode, bool has_vex_w)
22442 int i;
22444 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
22445 byte VEX prefix. */
22446 if (!has_0f_opcode || has_vex_w)
22447 return 3 + 1;
22449 /* We can always use 2 byte VEX prefix in 32bit. */
22450 if (!TARGET_64BIT)
22451 return 2 + 1;
22453 extract_insn_cached (insn);
22455 for (i = recog_data.n_operands - 1; i >= 0; --i)
22456 if (REG_P (recog_data.operand[i]))
22458 /* REX.W bit uses 3 byte VEX prefix. */
22459 if (GET_MODE (recog_data.operand[i]) == DImode
22460 && GENERAL_REG_P (recog_data.operand[i]))
22461 return 3 + 1;
22463 else
22465 /* REX.X or REX.B bits use 3 byte VEX prefix. */
22466 if (MEM_P (recog_data.operand[i])
22467 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
22468 return 3 + 1;
22471 return 2 + 1;
22474 /* Return the maximum number of instructions a cpu can issue. */
22476 static int
22477 ix86_issue_rate (void)
22479 switch (ix86_tune)
22481 case PROCESSOR_PENTIUM:
22482 case PROCESSOR_ATOM:
22483 case PROCESSOR_K6:
22484 return 2;
22486 case PROCESSOR_PENTIUMPRO:
22487 case PROCESSOR_PENTIUM4:
22488 case PROCESSOR_CORE2_32:
22489 case PROCESSOR_CORE2_64:
22490 case PROCESSOR_COREI7_32:
22491 case PROCESSOR_COREI7_64:
22492 case PROCESSOR_ATHLON:
22493 case PROCESSOR_K8:
22494 case PROCESSOR_AMDFAM10:
22495 case PROCESSOR_NOCONA:
22496 case PROCESSOR_GENERIC32:
22497 case PROCESSOR_GENERIC64:
22498 case PROCESSOR_BDVER1:
22499 case PROCESSOR_BTVER1:
22500 return 3;
22502 default:
22503 return 1;
22507 /* A subroutine of ix86_adjust_cost -- return TRUE iff INSN reads flags set
22508 by DEP_INSN and nothing set by DEP_INSN. */
22510 static bool
22511 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
22513 rtx set, set2;
22515 /* Simplify the test for uninteresting insns. */
22516 if (insn_type != TYPE_SETCC
22517 && insn_type != TYPE_ICMOV
22518 && insn_type != TYPE_FCMOV
22519 && insn_type != TYPE_IBR)
22520 return false;
22522 if ((set = single_set (dep_insn)) != 0)
22524 set = SET_DEST (set);
22525 set2 = NULL_RTX;
22527 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
22528 && XVECLEN (PATTERN (dep_insn), 0) == 2
22529 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
22530 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
22532 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
22533 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
22535 else
22536 return false;
22538 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
22539 return false;
22541 /* This test is true if the dependent insn reads the flags but
22542 not any other potentially set register. */
22543 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
22544 return false;
22546 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
22547 return false;
22549 return true;
22552 /* Return true iff USE_INSN has a memory address with operands set by
22553 SET_INSN. */
22555 bool
22556 ix86_agi_dependent (rtx set_insn, rtx use_insn)
22558 int i;
22559 extract_insn_cached (use_insn);
22560 for (i = recog_data.n_operands - 1; i >= 0; --i)
22561 if (MEM_P (recog_data.operand[i]))
22563 rtx addr = XEXP (recog_data.operand[i], 0);
22564 return modified_in_p (addr, set_insn) != 0;
22566 return false;
22569 static int
22570 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
22572 enum attr_type insn_type, dep_insn_type;
22573 enum attr_memory memory;
22574 rtx set, set2;
22575 int dep_insn_code_number;
22577 /* Anti and output dependencies have zero cost on all CPUs. */
22578 if (REG_NOTE_KIND (link) != 0)
22579 return 0;
22581 dep_insn_code_number = recog_memoized (dep_insn);
22583 /* If we can't recognize the insns, we can't really do anything. */
22584 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
22585 return cost;
22587 insn_type = get_attr_type (insn);
22588 dep_insn_type = get_attr_type (dep_insn);
22590 switch (ix86_tune)
22592 case PROCESSOR_PENTIUM:
22593 /* Address Generation Interlock adds a cycle of latency. */
22594 if (insn_type == TYPE_LEA)
22596 rtx addr = PATTERN (insn);
22598 if (GET_CODE (addr) == PARALLEL)
22599 addr = XVECEXP (addr, 0, 0);
22601 gcc_assert (GET_CODE (addr) == SET);
22603 addr = SET_SRC (addr);
22604 if (modified_in_p (addr, dep_insn))
22605 cost += 1;
22607 else if (ix86_agi_dependent (dep_insn, insn))
22608 cost += 1;
22610 /* ??? Compares pair with jump/setcc. */
22611 if (ix86_flags_dependent (insn, dep_insn, insn_type))
22612 cost = 0;
22614 /* Floating point stores require value to be ready one cycle earlier. */
22615 if (insn_type == TYPE_FMOV
22616 && get_attr_memory (insn) == MEMORY_STORE
22617 && !ix86_agi_dependent (dep_insn, insn))
22618 cost += 1;
22619 break;
22621 case PROCESSOR_PENTIUMPRO:
22622 memory = get_attr_memory (insn);
22624 /* INT->FP conversion is expensive. */
22625 if (get_attr_fp_int_src (dep_insn))
22626 cost += 5;
22628 /* There is one cycle extra latency between an FP op and a store. */
22629 if (insn_type == TYPE_FMOV
22630 && (set = single_set (dep_insn)) != NULL_RTX
22631 && (set2 = single_set (insn)) != NULL_RTX
22632 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
22633 && MEM_P (SET_DEST (set2)))
22634 cost += 1;
22636 /* Show ability of reorder buffer to hide latency of load by executing
22637 in parallel with previous instruction in case
22638 previous instruction is not needed to compute the address. */
22639 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
22640 && !ix86_agi_dependent (dep_insn, insn))
22642 /* Claim moves to take one cycle, as core can issue one load
22643 at time and the next load can start cycle later. */
22644 if (dep_insn_type == TYPE_IMOV
22645 || dep_insn_type == TYPE_FMOV)
22646 cost = 1;
22647 else if (cost > 1)
22648 cost--;
22650 break;
22652 case PROCESSOR_K6:
22653 memory = get_attr_memory (insn);
22655 /* The esp dependency is resolved before the instruction is really
22656 finished. */
22657 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
22658 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
22659 return 1;
22661 /* INT->FP conversion is expensive. */
22662 if (get_attr_fp_int_src (dep_insn))
22663 cost += 5;
22665 /* Show ability of reorder buffer to hide latency of load by executing
22666 in parallel with previous instruction in case
22667 previous instruction is not needed to compute the address. */
22668 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
22669 && !ix86_agi_dependent (dep_insn, insn))
22671 /* Claim moves to take one cycle, as core can issue one load
22672 at time and the next load can start cycle later. */
22673 if (dep_insn_type == TYPE_IMOV
22674 || dep_insn_type == TYPE_FMOV)
22675 cost = 1;
22676 else if (cost > 2)
22677 cost -= 2;
22678 else
22679 cost = 1;
22681 break;
22683 case PROCESSOR_ATHLON:
22684 case PROCESSOR_K8:
22685 case PROCESSOR_AMDFAM10:
22686 case PROCESSOR_BDVER1:
22687 case PROCESSOR_BTVER1:
22688 case PROCESSOR_ATOM:
22689 case PROCESSOR_GENERIC32:
22690 case PROCESSOR_GENERIC64:
22691 memory = get_attr_memory (insn);
22693 /* Show ability of reorder buffer to hide latency of load by executing
22694 in parallel with previous instruction in case
22695 previous instruction is not needed to compute the address. */
22696 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
22697 && !ix86_agi_dependent (dep_insn, insn))
22699 enum attr_unit unit = get_attr_unit (insn);
22700 int loadcost = 3;
22702 /* Because of the difference between the length of integer and
22703 floating unit pipeline preparation stages, the memory operands
22704 for floating point are cheaper.
22706 ??? For Athlon it the difference is most probably 2. */
22707 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
22708 loadcost = 3;
22709 else
22710 loadcost = TARGET_ATHLON ? 2 : 0;
22712 if (cost >= loadcost)
22713 cost -= loadcost;
22714 else
22715 cost = 0;
22718 default:
22719 break;
22722 return cost;
22725 /* How many alternative schedules to try. This should be as wide as the
22726 scheduling freedom in the DFA, but no wider. Making this value too
22727 large results extra work for the scheduler. */
22729 static int
22730 ia32_multipass_dfa_lookahead (void)
22732 switch (ix86_tune)
22734 case PROCESSOR_PENTIUM:
22735 return 2;
22737 case PROCESSOR_PENTIUMPRO:
22738 case PROCESSOR_K6:
22739 return 1;
22741 case PROCESSOR_CORE2_32:
22742 case PROCESSOR_CORE2_64:
22743 case PROCESSOR_COREI7_32:
22744 case PROCESSOR_COREI7_64:
22745 /* Generally, we want haifa-sched:max_issue() to look ahead as far
22746 as many instructions can be executed on a cycle, i.e.,
22747 issue_rate. I wonder why tuning for many CPUs does not do this. */
22748 return ix86_issue_rate ();
22750 default:
22751 return 0;
22757 /* Model decoder of Core 2/i7.
22758 Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
22759 track the instruction fetch block boundaries and make sure that long
22760 (9+ bytes) instructions are assigned to D0. */
22762 /* Maximum length of an insn that can be handled by
22763 a secondary decoder unit. '8' for Core 2/i7. */
22764 static int core2i7_secondary_decoder_max_insn_size;
22766 /* Ifetch block size, i.e., number of bytes decoder reads per cycle.
22767 '16' for Core 2/i7. */
22768 static int core2i7_ifetch_block_size;
22770 /* Maximum number of instructions decoder can handle per cycle.
22771 '6' for Core 2/i7. */
22772 static int core2i7_ifetch_block_max_insns;
22774 typedef struct ix86_first_cycle_multipass_data_ *
22775 ix86_first_cycle_multipass_data_t;
22776 typedef const struct ix86_first_cycle_multipass_data_ *
22777 const_ix86_first_cycle_multipass_data_t;
22779 /* A variable to store target state across calls to max_issue within
22780 one cycle. */
22781 static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data,
22782 *ix86_first_cycle_multipass_data = &_ix86_first_cycle_multipass_data;
22784 /* Initialize DATA. */
22785 static void
22786 core2i7_first_cycle_multipass_init (void *_data)
22788 ix86_first_cycle_multipass_data_t data
22789 = (ix86_first_cycle_multipass_data_t) _data;
22791 data->ifetch_block_len = 0;
22792 data->ifetch_block_n_insns = 0;
22793 data->ready_try_change = NULL;
22794 data->ready_try_change_size = 0;
22797 /* Advancing the cycle; reset ifetch block counts. */
22798 static void
22799 core2i7_dfa_post_advance_cycle (void)
22801 ix86_first_cycle_multipass_data_t data = ix86_first_cycle_multipass_data;
22803 gcc_assert (data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
22805 data->ifetch_block_len = 0;
22806 data->ifetch_block_n_insns = 0;
22809 static int min_insn_size (rtx);
22811 /* Filter out insns from ready_try that the core will not be able to issue
22812 on current cycle due to decoder. */
22813 static void
22814 core2i7_first_cycle_multipass_filter_ready_try
22815 (const_ix86_first_cycle_multipass_data_t data,
22816 char *ready_try, int n_ready, bool first_cycle_insn_p)
22818 while (n_ready--)
22820 rtx insn;
22821 int insn_size;
22823 if (ready_try[n_ready])
22824 continue;
22826 insn = get_ready_element (n_ready);
22827 insn_size = min_insn_size (insn);
22829 if (/* If this is a too long an insn for a secondary decoder ... */
22830 (!first_cycle_insn_p
22831 && insn_size > core2i7_secondary_decoder_max_insn_size)
22832 /* ... or it would not fit into the ifetch block ... */
22833 || data->ifetch_block_len + insn_size > core2i7_ifetch_block_size
22834 /* ... or the decoder is full already ... */
22835 || data->ifetch_block_n_insns + 1 > core2i7_ifetch_block_max_insns)
22836 /* ... mask the insn out. */
22838 ready_try[n_ready] = 1;
22840 if (data->ready_try_change)
22841 SET_BIT (data->ready_try_change, n_ready);
22846 /* Prepare for a new round of multipass lookahead scheduling. */
22847 static void
22848 core2i7_first_cycle_multipass_begin (void *_data, char *ready_try, int n_ready,
22849 bool first_cycle_insn_p)
22851 ix86_first_cycle_multipass_data_t data
22852 = (ix86_first_cycle_multipass_data_t) _data;
22853 const_ix86_first_cycle_multipass_data_t prev_data
22854 = ix86_first_cycle_multipass_data;
22856 /* Restore the state from the end of the previous round. */
22857 data->ifetch_block_len = prev_data->ifetch_block_len;
22858 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns;
22860 /* Filter instructions that cannot be issued on current cycle due to
22861 decoder restrictions. */
22862 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
22863 first_cycle_insn_p);
22866 /* INSN is being issued in current solution. Account for its impact on
22867 the decoder model. */
22868 static void
22869 core2i7_first_cycle_multipass_issue (void *_data, char *ready_try, int n_ready,
22870 rtx insn, const void *_prev_data)
22872 ix86_first_cycle_multipass_data_t data
22873 = (ix86_first_cycle_multipass_data_t) _data;
22874 const_ix86_first_cycle_multipass_data_t prev_data
22875 = (const_ix86_first_cycle_multipass_data_t) _prev_data;
22877 int insn_size = min_insn_size (insn);
22879 data->ifetch_block_len = prev_data->ifetch_block_len + insn_size;
22880 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns + 1;
22881 gcc_assert (data->ifetch_block_len <= core2i7_ifetch_block_size
22882 && data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
22884 /* Allocate or resize the bitmap for storing INSN's effect on ready_try. */
22885 if (!data->ready_try_change)
22887 data->ready_try_change = sbitmap_alloc (n_ready);
22888 data->ready_try_change_size = n_ready;
22890 else if (data->ready_try_change_size < n_ready)
22892 data->ready_try_change = sbitmap_resize (data->ready_try_change,
22893 n_ready, 0);
22894 data->ready_try_change_size = n_ready;
22896 sbitmap_zero (data->ready_try_change);
22898 /* Filter out insns from ready_try that the core will not be able to issue
22899 on current cycle due to decoder. */
22900 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
22901 false);
22904 /* Revert the effect on ready_try. */
22905 static void
22906 core2i7_first_cycle_multipass_backtrack (const void *_data,
22907 char *ready_try,
22908 int n_ready ATTRIBUTE_UNUSED)
22910 const_ix86_first_cycle_multipass_data_t data
22911 = (const_ix86_first_cycle_multipass_data_t) _data;
22912 unsigned int i = 0;
22913 sbitmap_iterator sbi;
22915 gcc_assert (sbitmap_last_set_bit (data->ready_try_change) < n_ready);
22916 EXECUTE_IF_SET_IN_SBITMAP (data->ready_try_change, 0, i, sbi)
22918 ready_try[i] = 0;
22922 /* Save the result of multipass lookahead scheduling for the next round. */
22923 static void
22924 core2i7_first_cycle_multipass_end (const void *_data)
22926 const_ix86_first_cycle_multipass_data_t data
22927 = (const_ix86_first_cycle_multipass_data_t) _data;
22928 ix86_first_cycle_multipass_data_t next_data
22929 = ix86_first_cycle_multipass_data;
22931 if (data != NULL)
22933 next_data->ifetch_block_len = data->ifetch_block_len;
22934 next_data->ifetch_block_n_insns = data->ifetch_block_n_insns;
22938 /* Deallocate target data. */
22939 static void
22940 core2i7_first_cycle_multipass_fini (void *_data)
22942 ix86_first_cycle_multipass_data_t data
22943 = (ix86_first_cycle_multipass_data_t) _data;
22945 if (data->ready_try_change)
22947 sbitmap_free (data->ready_try_change);
22948 data->ready_try_change = NULL;
22949 data->ready_try_change_size = 0;
22953 /* Prepare for scheduling pass. */
22954 static void
22955 ix86_sched_init_global (FILE *dump ATTRIBUTE_UNUSED,
22956 int verbose ATTRIBUTE_UNUSED,
22957 int max_uid ATTRIBUTE_UNUSED)
22959 /* Install scheduling hooks for current CPU. Some of these hooks are used
22960 in time-critical parts of the scheduler, so we only set them up when
22961 they are actually used. */
22962 switch (ix86_tune)
22964 case PROCESSOR_CORE2_32:
22965 case PROCESSOR_CORE2_64:
22966 case PROCESSOR_COREI7_32:
22967 case PROCESSOR_COREI7_64:
22968 targetm.sched.dfa_post_advance_cycle
22969 = core2i7_dfa_post_advance_cycle;
22970 targetm.sched.first_cycle_multipass_init
22971 = core2i7_first_cycle_multipass_init;
22972 targetm.sched.first_cycle_multipass_begin
22973 = core2i7_first_cycle_multipass_begin;
22974 targetm.sched.first_cycle_multipass_issue
22975 = core2i7_first_cycle_multipass_issue;
22976 targetm.sched.first_cycle_multipass_backtrack
22977 = core2i7_first_cycle_multipass_backtrack;
22978 targetm.sched.first_cycle_multipass_end
22979 = core2i7_first_cycle_multipass_end;
22980 targetm.sched.first_cycle_multipass_fini
22981 = core2i7_first_cycle_multipass_fini;
22983 /* Set decoder parameters. */
22984 core2i7_secondary_decoder_max_insn_size = 8;
22985 core2i7_ifetch_block_size = 16;
22986 core2i7_ifetch_block_max_insns = 6;
22987 break;
22989 default:
22990 targetm.sched.dfa_post_advance_cycle = NULL;
22991 targetm.sched.first_cycle_multipass_init = NULL;
22992 targetm.sched.first_cycle_multipass_begin = NULL;
22993 targetm.sched.first_cycle_multipass_issue = NULL;
22994 targetm.sched.first_cycle_multipass_backtrack = NULL;
22995 targetm.sched.first_cycle_multipass_end = NULL;
22996 targetm.sched.first_cycle_multipass_fini = NULL;
22997 break;
23002 /* Compute the alignment given to a constant that is being placed in memory.
23003 EXP is the constant and ALIGN is the alignment that the object would
23004 ordinarily have.
23005 The value of this function is used instead of that alignment to align
23006 the object. */
23009 ix86_constant_alignment (tree exp, int align)
23011 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
23012 || TREE_CODE (exp) == INTEGER_CST)
23014 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
23015 return 64;
23016 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
23017 return 128;
23019 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
23020 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
23021 return BITS_PER_WORD;
23023 return align;
23026 /* Compute the alignment for a static variable.
23027 TYPE is the data type, and ALIGN is the alignment that
23028 the object would ordinarily have. The value of this function is used
23029 instead of that alignment to align the object. */
23032 ix86_data_alignment (tree type, int align)
23034 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
23036 if (AGGREGATE_TYPE_P (type)
23037 && TYPE_SIZE (type)
23038 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
23039 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
23040 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
23041 && align < max_align)
23042 align = max_align;
23044 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
23045 to 16byte boundary. */
23046 if (TARGET_64BIT)
23048 if (AGGREGATE_TYPE_P (type)
23049 && TYPE_SIZE (type)
23050 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
23051 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
23052 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
23053 return 128;
23056 if (TREE_CODE (type) == ARRAY_TYPE)
23058 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
23059 return 64;
23060 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
23061 return 128;
23063 else if (TREE_CODE (type) == COMPLEX_TYPE)
23066 if (TYPE_MODE (type) == DCmode && align < 64)
23067 return 64;
23068 if ((TYPE_MODE (type) == XCmode
23069 || TYPE_MODE (type) == TCmode) && align < 128)
23070 return 128;
23072 else if ((TREE_CODE (type) == RECORD_TYPE
23073 || TREE_CODE (type) == UNION_TYPE
23074 || TREE_CODE (type) == QUAL_UNION_TYPE)
23075 && TYPE_FIELDS (type))
23077 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
23078 return 64;
23079 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
23080 return 128;
23082 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
23083 || TREE_CODE (type) == INTEGER_TYPE)
23085 if (TYPE_MODE (type) == DFmode && align < 64)
23086 return 64;
23087 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
23088 return 128;
23091 return align;
23094 /* Compute the alignment for a local variable or a stack slot. EXP is
23095 the data type or decl itself, MODE is the widest mode available and
23096 ALIGN is the alignment that the object would ordinarily have. The
23097 value of this macro is used instead of that alignment to align the
23098 object. */
23100 unsigned int
23101 ix86_local_alignment (tree exp, enum machine_mode mode,
23102 unsigned int align)
23104 tree type, decl;
23106 if (exp && DECL_P (exp))
23108 type = TREE_TYPE (exp);
23109 decl = exp;
23111 else
23113 type = exp;
23114 decl = NULL;
23117 /* Don't do dynamic stack realignment for long long objects with
23118 -mpreferred-stack-boundary=2. */
23119 if (!TARGET_64BIT
23120 && align == 64
23121 && ix86_preferred_stack_boundary < 64
23122 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
23123 && (!type || !TYPE_USER_ALIGN (type))
23124 && (!decl || !DECL_USER_ALIGN (decl)))
23125 align = 32;
23127 /* If TYPE is NULL, we are allocating a stack slot for caller-save
23128 register in MODE. We will return the largest alignment of XF
23129 and DF. */
23130 if (!type)
23132 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
23133 align = GET_MODE_ALIGNMENT (DFmode);
23134 return align;
23137 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
23138 to 16byte boundary. Exact wording is:
23140 An array uses the same alignment as its elements, except that a local or
23141 global array variable of length at least 16 bytes or
23142 a C99 variable-length array variable always has alignment of at least 16 bytes.
23144 This was added to allow use of aligned SSE instructions at arrays. This
23145 rule is meant for static storage (where compiler can not do the analysis
23146 by itself). We follow it for automatic variables only when convenient.
23147 We fully control everything in the function compiled and functions from
23148 other unit can not rely on the alignment.
23150 Exclude va_list type. It is the common case of local array where
23151 we can not benefit from the alignment. */
23152 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
23153 && TARGET_SSE)
23155 if (AGGREGATE_TYPE_P (type)
23156 && (va_list_type_node == NULL_TREE
23157 || (TYPE_MAIN_VARIANT (type)
23158 != TYPE_MAIN_VARIANT (va_list_type_node)))
23159 && TYPE_SIZE (type)
23160 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
23161 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
23162 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
23163 return 128;
23165 if (TREE_CODE (type) == ARRAY_TYPE)
23167 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
23168 return 64;
23169 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
23170 return 128;
23172 else if (TREE_CODE (type) == COMPLEX_TYPE)
23174 if (TYPE_MODE (type) == DCmode && align < 64)
23175 return 64;
23176 if ((TYPE_MODE (type) == XCmode
23177 || TYPE_MODE (type) == TCmode) && align < 128)
23178 return 128;
23180 else if ((TREE_CODE (type) == RECORD_TYPE
23181 || TREE_CODE (type) == UNION_TYPE
23182 || TREE_CODE (type) == QUAL_UNION_TYPE)
23183 && TYPE_FIELDS (type))
23185 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
23186 return 64;
23187 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
23188 return 128;
23190 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
23191 || TREE_CODE (type) == INTEGER_TYPE)
23194 if (TYPE_MODE (type) == DFmode && align < 64)
23195 return 64;
23196 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
23197 return 128;
23199 return align;
23202 /* Compute the minimum required alignment for dynamic stack realignment
23203 purposes for a local variable, parameter or a stack slot. EXP is
23204 the data type or decl itself, MODE is its mode and ALIGN is the
23205 alignment that the object would ordinarily have. */
23207 unsigned int
23208 ix86_minimum_alignment (tree exp, enum machine_mode mode,
23209 unsigned int align)
23211 tree type, decl;
23213 if (exp && DECL_P (exp))
23215 type = TREE_TYPE (exp);
23216 decl = exp;
23218 else
23220 type = exp;
23221 decl = NULL;
23224 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
23225 return align;
23227 /* Don't do dynamic stack realignment for long long objects with
23228 -mpreferred-stack-boundary=2. */
23229 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
23230 && (!type || !TYPE_USER_ALIGN (type))
23231 && (!decl || !DECL_USER_ALIGN (decl)))
23232 return 32;
23234 return align;
23237 /* Find a location for the static chain incoming to a nested function.
23238 This is a register, unless all free registers are used by arguments. */
23240 static rtx
23241 ix86_static_chain (const_tree fndecl, bool incoming_p)
23243 unsigned regno;
23245 if (!DECL_STATIC_CHAIN (fndecl))
23246 return NULL;
23248 if (TARGET_64BIT)
23250 /* We always use R10 in 64-bit mode. */
23251 regno = R10_REG;
23253 else
23255 tree fntype;
23256 unsigned int ccvt;
23258 /* By default in 32-bit mode we use ECX to pass the static chain. */
23259 regno = CX_REG;
23261 fntype = TREE_TYPE (fndecl);
23262 ccvt = ix86_get_callcvt (fntype);
23263 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) != 0)
23265 /* Fastcall functions use ecx/edx for arguments, which leaves
23266 us with EAX for the static chain.
23267 Thiscall functions use ecx for arguments, which also
23268 leaves us with EAX for the static chain. */
23269 regno = AX_REG;
23271 else if (ix86_function_regparm (fntype, fndecl) == 3)
23273 /* For regparm 3, we have no free call-clobbered registers in
23274 which to store the static chain. In order to implement this,
23275 we have the trampoline push the static chain to the stack.
23276 However, we can't push a value below the return address when
23277 we call the nested function directly, so we have to use an
23278 alternate entry point. For this we use ESI, and have the
23279 alternate entry point push ESI, so that things appear the
23280 same once we're executing the nested function. */
23281 if (incoming_p)
23283 if (fndecl == current_function_decl)
23284 ix86_static_chain_on_stack = true;
23285 return gen_frame_mem (SImode,
23286 plus_constant (arg_pointer_rtx, -8));
23288 regno = SI_REG;
23292 return gen_rtx_REG (Pmode, regno);
23295 /* Emit RTL insns to initialize the variable parts of a trampoline.
23296 FNDECL is the decl of the target address; M_TRAMP is a MEM for
23297 the trampoline, and CHAIN_VALUE is an RTX for the static chain
23298 to be passed to the target function. */
23300 static void
23301 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
23303 rtx mem, fnaddr;
23305 fnaddr = XEXP (DECL_RTL (fndecl), 0);
23307 if (!TARGET_64BIT)
23309 rtx disp, chain;
23310 int opcode;
23312 /* Depending on the static chain location, either load a register
23313 with a constant, or push the constant to the stack. All of the
23314 instructions are the same size. */
23315 chain = ix86_static_chain (fndecl, true);
23316 if (REG_P (chain))
23318 if (REGNO (chain) == CX_REG)
23319 opcode = 0xb9;
23320 else if (REGNO (chain) == AX_REG)
23321 opcode = 0xb8;
23322 else
23323 gcc_unreachable ();
23325 else
23326 opcode = 0x68;
23328 mem = adjust_address (m_tramp, QImode, 0);
23329 emit_move_insn (mem, gen_int_mode (opcode, QImode));
23331 mem = adjust_address (m_tramp, SImode, 1);
23332 emit_move_insn (mem, chain_value);
23334 /* Compute offset from the end of the jmp to the target function.
23335 In the case in which the trampoline stores the static chain on
23336 the stack, we need to skip the first insn which pushes the
23337 (call-saved) register static chain; this push is 1 byte. */
23338 disp = expand_binop (SImode, sub_optab, fnaddr,
23339 plus_constant (XEXP (m_tramp, 0),
23340 MEM_P (chain) ? 9 : 10),
23341 NULL_RTX, 1, OPTAB_DIRECT);
23343 mem = adjust_address (m_tramp, QImode, 5);
23344 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
23346 mem = adjust_address (m_tramp, SImode, 6);
23347 emit_move_insn (mem, disp);
23349 else
23351 int offset = 0;
23353 /* Load the function address to r11. Try to load address using
23354 the shorter movl instead of movabs. We may want to support
23355 movq for kernel mode, but kernel does not use trampolines at
23356 the moment. */
23357 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
23359 fnaddr = copy_to_mode_reg (DImode, fnaddr);
23361 mem = adjust_address (m_tramp, HImode, offset);
23362 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
23364 mem = adjust_address (m_tramp, SImode, offset + 2);
23365 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
23366 offset += 6;
23368 else
23370 mem = adjust_address (m_tramp, HImode, offset);
23371 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
23373 mem = adjust_address (m_tramp, DImode, offset + 2);
23374 emit_move_insn (mem, fnaddr);
23375 offset += 10;
23378 /* Load static chain using movabs to r10. */
23379 mem = adjust_address (m_tramp, HImode, offset);
23380 emit_move_insn (mem, gen_int_mode (0xba49, HImode));
23382 mem = adjust_address (m_tramp, DImode, offset + 2);
23383 emit_move_insn (mem, chain_value);
23384 offset += 10;
23386 /* Jump to r11; the last (unused) byte is a nop, only there to
23387 pad the write out to a single 32-bit store. */
23388 mem = adjust_address (m_tramp, SImode, offset);
23389 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
23390 offset += 4;
23392 gcc_assert (offset <= TRAMPOLINE_SIZE);
23395 #ifdef ENABLE_EXECUTE_STACK
23396 #ifdef CHECK_EXECUTE_STACK_ENABLED
23397 if (CHECK_EXECUTE_STACK_ENABLED)
23398 #endif
23399 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
23400 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
23401 #endif
23404 /* The following file contains several enumerations and data structures
23405 built from the definitions in i386-builtin-types.def. */
23407 #include "i386-builtin-types.inc"
23409 /* Table for the ix86 builtin non-function types. */
23410 static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
23412 /* Retrieve an element from the above table, building some of
23413 the types lazily. */
23415 static tree
23416 ix86_get_builtin_type (enum ix86_builtin_type tcode)
23418 unsigned int index;
23419 tree type, itype;
23421 gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab));
23423 type = ix86_builtin_type_tab[(int) tcode];
23424 if (type != NULL)
23425 return type;
23427 gcc_assert (tcode > IX86_BT_LAST_PRIM);
23428 if (tcode <= IX86_BT_LAST_VECT)
23430 enum machine_mode mode;
23432 index = tcode - IX86_BT_LAST_PRIM - 1;
23433 itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]);
23434 mode = ix86_builtin_type_vect_mode[index];
23436 type = build_vector_type_for_mode (itype, mode);
23438 else
23440 int quals;
23442 index = tcode - IX86_BT_LAST_VECT - 1;
23443 if (tcode <= IX86_BT_LAST_PTR)
23444 quals = TYPE_UNQUALIFIED;
23445 else
23446 quals = TYPE_QUAL_CONST;
23448 itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]);
23449 if (quals != TYPE_UNQUALIFIED)
23450 itype = build_qualified_type (itype, quals);
23452 type = build_pointer_type (itype);
23455 ix86_builtin_type_tab[(int) tcode] = type;
23456 return type;
23459 /* Table for the ix86 builtin function types. */
23460 static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1];
23462 /* Retrieve an element from the above table, building some of
23463 the types lazily. */
23465 static tree
23466 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode)
23468 tree type;
23470 gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab));
23472 type = ix86_builtin_func_type_tab[(int) tcode];
23473 if (type != NULL)
23474 return type;
23476 if (tcode <= IX86_BT_LAST_FUNC)
23478 unsigned start = ix86_builtin_func_start[(int) tcode];
23479 unsigned after = ix86_builtin_func_start[(int) tcode + 1];
23480 tree rtype, atype, args = void_list_node;
23481 unsigned i;
23483 rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]);
23484 for (i = after - 1; i > start; --i)
23486 atype = ix86_get_builtin_type (ix86_builtin_func_args[i]);
23487 args = tree_cons (NULL, atype, args);
23490 type = build_function_type (rtype, args);
23492 else
23494 unsigned index = tcode - IX86_BT_LAST_FUNC - 1;
23495 enum ix86_builtin_func_type icode;
23497 icode = ix86_builtin_func_alias_base[index];
23498 type = ix86_get_builtin_func_type (icode);
23501 ix86_builtin_func_type_tab[(int) tcode] = type;
23502 return type;
23506 /* Codes for all the SSE/MMX builtins. */
23507 enum ix86_builtins
23509 IX86_BUILTIN_ADDPS,
23510 IX86_BUILTIN_ADDSS,
23511 IX86_BUILTIN_DIVPS,
23512 IX86_BUILTIN_DIVSS,
23513 IX86_BUILTIN_MULPS,
23514 IX86_BUILTIN_MULSS,
23515 IX86_BUILTIN_SUBPS,
23516 IX86_BUILTIN_SUBSS,
23518 IX86_BUILTIN_CMPEQPS,
23519 IX86_BUILTIN_CMPLTPS,
23520 IX86_BUILTIN_CMPLEPS,
23521 IX86_BUILTIN_CMPGTPS,
23522 IX86_BUILTIN_CMPGEPS,
23523 IX86_BUILTIN_CMPNEQPS,
23524 IX86_BUILTIN_CMPNLTPS,
23525 IX86_BUILTIN_CMPNLEPS,
23526 IX86_BUILTIN_CMPNGTPS,
23527 IX86_BUILTIN_CMPNGEPS,
23528 IX86_BUILTIN_CMPORDPS,
23529 IX86_BUILTIN_CMPUNORDPS,
23530 IX86_BUILTIN_CMPEQSS,
23531 IX86_BUILTIN_CMPLTSS,
23532 IX86_BUILTIN_CMPLESS,
23533 IX86_BUILTIN_CMPNEQSS,
23534 IX86_BUILTIN_CMPNLTSS,
23535 IX86_BUILTIN_CMPNLESS,
23536 IX86_BUILTIN_CMPNGTSS,
23537 IX86_BUILTIN_CMPNGESS,
23538 IX86_BUILTIN_CMPORDSS,
23539 IX86_BUILTIN_CMPUNORDSS,
23541 IX86_BUILTIN_COMIEQSS,
23542 IX86_BUILTIN_COMILTSS,
23543 IX86_BUILTIN_COMILESS,
23544 IX86_BUILTIN_COMIGTSS,
23545 IX86_BUILTIN_COMIGESS,
23546 IX86_BUILTIN_COMINEQSS,
23547 IX86_BUILTIN_UCOMIEQSS,
23548 IX86_BUILTIN_UCOMILTSS,
23549 IX86_BUILTIN_UCOMILESS,
23550 IX86_BUILTIN_UCOMIGTSS,
23551 IX86_BUILTIN_UCOMIGESS,
23552 IX86_BUILTIN_UCOMINEQSS,
23554 IX86_BUILTIN_CVTPI2PS,
23555 IX86_BUILTIN_CVTPS2PI,
23556 IX86_BUILTIN_CVTSI2SS,
23557 IX86_BUILTIN_CVTSI642SS,
23558 IX86_BUILTIN_CVTSS2SI,
23559 IX86_BUILTIN_CVTSS2SI64,
23560 IX86_BUILTIN_CVTTPS2PI,
23561 IX86_BUILTIN_CVTTSS2SI,
23562 IX86_BUILTIN_CVTTSS2SI64,
23564 IX86_BUILTIN_MAXPS,
23565 IX86_BUILTIN_MAXSS,
23566 IX86_BUILTIN_MINPS,
23567 IX86_BUILTIN_MINSS,
23569 IX86_BUILTIN_LOADUPS,
23570 IX86_BUILTIN_STOREUPS,
23571 IX86_BUILTIN_MOVSS,
23573 IX86_BUILTIN_MOVHLPS,
23574 IX86_BUILTIN_MOVLHPS,
23575 IX86_BUILTIN_LOADHPS,
23576 IX86_BUILTIN_LOADLPS,
23577 IX86_BUILTIN_STOREHPS,
23578 IX86_BUILTIN_STORELPS,
23580 IX86_BUILTIN_MASKMOVQ,
23581 IX86_BUILTIN_MOVMSKPS,
23582 IX86_BUILTIN_PMOVMSKB,
23584 IX86_BUILTIN_MOVNTPS,
23585 IX86_BUILTIN_MOVNTQ,
23587 IX86_BUILTIN_LOADDQU,
23588 IX86_BUILTIN_STOREDQU,
23590 IX86_BUILTIN_PACKSSWB,
23591 IX86_BUILTIN_PACKSSDW,
23592 IX86_BUILTIN_PACKUSWB,
23594 IX86_BUILTIN_PADDB,
23595 IX86_BUILTIN_PADDW,
23596 IX86_BUILTIN_PADDD,
23597 IX86_BUILTIN_PADDQ,
23598 IX86_BUILTIN_PADDSB,
23599 IX86_BUILTIN_PADDSW,
23600 IX86_BUILTIN_PADDUSB,
23601 IX86_BUILTIN_PADDUSW,
23602 IX86_BUILTIN_PSUBB,
23603 IX86_BUILTIN_PSUBW,
23604 IX86_BUILTIN_PSUBD,
23605 IX86_BUILTIN_PSUBQ,
23606 IX86_BUILTIN_PSUBSB,
23607 IX86_BUILTIN_PSUBSW,
23608 IX86_BUILTIN_PSUBUSB,
23609 IX86_BUILTIN_PSUBUSW,
23611 IX86_BUILTIN_PAND,
23612 IX86_BUILTIN_PANDN,
23613 IX86_BUILTIN_POR,
23614 IX86_BUILTIN_PXOR,
23616 IX86_BUILTIN_PAVGB,
23617 IX86_BUILTIN_PAVGW,
23619 IX86_BUILTIN_PCMPEQB,
23620 IX86_BUILTIN_PCMPEQW,
23621 IX86_BUILTIN_PCMPEQD,
23622 IX86_BUILTIN_PCMPGTB,
23623 IX86_BUILTIN_PCMPGTW,
23624 IX86_BUILTIN_PCMPGTD,
23626 IX86_BUILTIN_PMADDWD,
23628 IX86_BUILTIN_PMAXSW,
23629 IX86_BUILTIN_PMAXUB,
23630 IX86_BUILTIN_PMINSW,
23631 IX86_BUILTIN_PMINUB,
23633 IX86_BUILTIN_PMULHUW,
23634 IX86_BUILTIN_PMULHW,
23635 IX86_BUILTIN_PMULLW,
23637 IX86_BUILTIN_PSADBW,
23638 IX86_BUILTIN_PSHUFW,
23640 IX86_BUILTIN_PSLLW,
23641 IX86_BUILTIN_PSLLD,
23642 IX86_BUILTIN_PSLLQ,
23643 IX86_BUILTIN_PSRAW,
23644 IX86_BUILTIN_PSRAD,
23645 IX86_BUILTIN_PSRLW,
23646 IX86_BUILTIN_PSRLD,
23647 IX86_BUILTIN_PSRLQ,
23648 IX86_BUILTIN_PSLLWI,
23649 IX86_BUILTIN_PSLLDI,
23650 IX86_BUILTIN_PSLLQI,
23651 IX86_BUILTIN_PSRAWI,
23652 IX86_BUILTIN_PSRADI,
23653 IX86_BUILTIN_PSRLWI,
23654 IX86_BUILTIN_PSRLDI,
23655 IX86_BUILTIN_PSRLQI,
23657 IX86_BUILTIN_PUNPCKHBW,
23658 IX86_BUILTIN_PUNPCKHWD,
23659 IX86_BUILTIN_PUNPCKHDQ,
23660 IX86_BUILTIN_PUNPCKLBW,
23661 IX86_BUILTIN_PUNPCKLWD,
23662 IX86_BUILTIN_PUNPCKLDQ,
23664 IX86_BUILTIN_SHUFPS,
23666 IX86_BUILTIN_RCPPS,
23667 IX86_BUILTIN_RCPSS,
23668 IX86_BUILTIN_RSQRTPS,
23669 IX86_BUILTIN_RSQRTPS_NR,
23670 IX86_BUILTIN_RSQRTSS,
23671 IX86_BUILTIN_RSQRTF,
23672 IX86_BUILTIN_SQRTPS,
23673 IX86_BUILTIN_SQRTPS_NR,
23674 IX86_BUILTIN_SQRTSS,
23676 IX86_BUILTIN_UNPCKHPS,
23677 IX86_BUILTIN_UNPCKLPS,
23679 IX86_BUILTIN_ANDPS,
23680 IX86_BUILTIN_ANDNPS,
23681 IX86_BUILTIN_ORPS,
23682 IX86_BUILTIN_XORPS,
23684 IX86_BUILTIN_EMMS,
23685 IX86_BUILTIN_LDMXCSR,
23686 IX86_BUILTIN_STMXCSR,
23687 IX86_BUILTIN_SFENCE,
23689 /* 3DNow! Original */
23690 IX86_BUILTIN_FEMMS,
23691 IX86_BUILTIN_PAVGUSB,
23692 IX86_BUILTIN_PF2ID,
23693 IX86_BUILTIN_PFACC,
23694 IX86_BUILTIN_PFADD,
23695 IX86_BUILTIN_PFCMPEQ,
23696 IX86_BUILTIN_PFCMPGE,
23697 IX86_BUILTIN_PFCMPGT,
23698 IX86_BUILTIN_PFMAX,
23699 IX86_BUILTIN_PFMIN,
23700 IX86_BUILTIN_PFMUL,
23701 IX86_BUILTIN_PFRCP,
23702 IX86_BUILTIN_PFRCPIT1,
23703 IX86_BUILTIN_PFRCPIT2,
23704 IX86_BUILTIN_PFRSQIT1,
23705 IX86_BUILTIN_PFRSQRT,
23706 IX86_BUILTIN_PFSUB,
23707 IX86_BUILTIN_PFSUBR,
23708 IX86_BUILTIN_PI2FD,
23709 IX86_BUILTIN_PMULHRW,
23711 /* 3DNow! Athlon Extensions */
23712 IX86_BUILTIN_PF2IW,
23713 IX86_BUILTIN_PFNACC,
23714 IX86_BUILTIN_PFPNACC,
23715 IX86_BUILTIN_PI2FW,
23716 IX86_BUILTIN_PSWAPDSI,
23717 IX86_BUILTIN_PSWAPDSF,
23719 /* SSE2 */
23720 IX86_BUILTIN_ADDPD,
23721 IX86_BUILTIN_ADDSD,
23722 IX86_BUILTIN_DIVPD,
23723 IX86_BUILTIN_DIVSD,
23724 IX86_BUILTIN_MULPD,
23725 IX86_BUILTIN_MULSD,
23726 IX86_BUILTIN_SUBPD,
23727 IX86_BUILTIN_SUBSD,
23729 IX86_BUILTIN_CMPEQPD,
23730 IX86_BUILTIN_CMPLTPD,
23731 IX86_BUILTIN_CMPLEPD,
23732 IX86_BUILTIN_CMPGTPD,
23733 IX86_BUILTIN_CMPGEPD,
23734 IX86_BUILTIN_CMPNEQPD,
23735 IX86_BUILTIN_CMPNLTPD,
23736 IX86_BUILTIN_CMPNLEPD,
23737 IX86_BUILTIN_CMPNGTPD,
23738 IX86_BUILTIN_CMPNGEPD,
23739 IX86_BUILTIN_CMPORDPD,
23740 IX86_BUILTIN_CMPUNORDPD,
23741 IX86_BUILTIN_CMPEQSD,
23742 IX86_BUILTIN_CMPLTSD,
23743 IX86_BUILTIN_CMPLESD,
23744 IX86_BUILTIN_CMPNEQSD,
23745 IX86_BUILTIN_CMPNLTSD,
23746 IX86_BUILTIN_CMPNLESD,
23747 IX86_BUILTIN_CMPORDSD,
23748 IX86_BUILTIN_CMPUNORDSD,
23750 IX86_BUILTIN_COMIEQSD,
23751 IX86_BUILTIN_COMILTSD,
23752 IX86_BUILTIN_COMILESD,
23753 IX86_BUILTIN_COMIGTSD,
23754 IX86_BUILTIN_COMIGESD,
23755 IX86_BUILTIN_COMINEQSD,
23756 IX86_BUILTIN_UCOMIEQSD,
23757 IX86_BUILTIN_UCOMILTSD,
23758 IX86_BUILTIN_UCOMILESD,
23759 IX86_BUILTIN_UCOMIGTSD,
23760 IX86_BUILTIN_UCOMIGESD,
23761 IX86_BUILTIN_UCOMINEQSD,
23763 IX86_BUILTIN_MAXPD,
23764 IX86_BUILTIN_MAXSD,
23765 IX86_BUILTIN_MINPD,
23766 IX86_BUILTIN_MINSD,
23768 IX86_BUILTIN_ANDPD,
23769 IX86_BUILTIN_ANDNPD,
23770 IX86_BUILTIN_ORPD,
23771 IX86_BUILTIN_XORPD,
23773 IX86_BUILTIN_SQRTPD,
23774 IX86_BUILTIN_SQRTSD,
23776 IX86_BUILTIN_UNPCKHPD,
23777 IX86_BUILTIN_UNPCKLPD,
23779 IX86_BUILTIN_SHUFPD,
23781 IX86_BUILTIN_LOADUPD,
23782 IX86_BUILTIN_STOREUPD,
23783 IX86_BUILTIN_MOVSD,
23785 IX86_BUILTIN_LOADHPD,
23786 IX86_BUILTIN_LOADLPD,
23788 IX86_BUILTIN_CVTDQ2PD,
23789 IX86_BUILTIN_CVTDQ2PS,
23791 IX86_BUILTIN_CVTPD2DQ,
23792 IX86_BUILTIN_CVTPD2PI,
23793 IX86_BUILTIN_CVTPD2PS,
23794 IX86_BUILTIN_CVTTPD2DQ,
23795 IX86_BUILTIN_CVTTPD2PI,
23797 IX86_BUILTIN_CVTPI2PD,
23798 IX86_BUILTIN_CVTSI2SD,
23799 IX86_BUILTIN_CVTSI642SD,
23801 IX86_BUILTIN_CVTSD2SI,
23802 IX86_BUILTIN_CVTSD2SI64,
23803 IX86_BUILTIN_CVTSD2SS,
23804 IX86_BUILTIN_CVTSS2SD,
23805 IX86_BUILTIN_CVTTSD2SI,
23806 IX86_BUILTIN_CVTTSD2SI64,
23808 IX86_BUILTIN_CVTPS2DQ,
23809 IX86_BUILTIN_CVTPS2PD,
23810 IX86_BUILTIN_CVTTPS2DQ,
23812 IX86_BUILTIN_MOVNTI,
23813 IX86_BUILTIN_MOVNTPD,
23814 IX86_BUILTIN_MOVNTDQ,
23816 IX86_BUILTIN_MOVQ128,
23818 /* SSE2 MMX */
23819 IX86_BUILTIN_MASKMOVDQU,
23820 IX86_BUILTIN_MOVMSKPD,
23821 IX86_BUILTIN_PMOVMSKB128,
23823 IX86_BUILTIN_PACKSSWB128,
23824 IX86_BUILTIN_PACKSSDW128,
23825 IX86_BUILTIN_PACKUSWB128,
23827 IX86_BUILTIN_PADDB128,
23828 IX86_BUILTIN_PADDW128,
23829 IX86_BUILTIN_PADDD128,
23830 IX86_BUILTIN_PADDQ128,
23831 IX86_BUILTIN_PADDSB128,
23832 IX86_BUILTIN_PADDSW128,
23833 IX86_BUILTIN_PADDUSB128,
23834 IX86_BUILTIN_PADDUSW128,
23835 IX86_BUILTIN_PSUBB128,
23836 IX86_BUILTIN_PSUBW128,
23837 IX86_BUILTIN_PSUBD128,
23838 IX86_BUILTIN_PSUBQ128,
23839 IX86_BUILTIN_PSUBSB128,
23840 IX86_BUILTIN_PSUBSW128,
23841 IX86_BUILTIN_PSUBUSB128,
23842 IX86_BUILTIN_PSUBUSW128,
23844 IX86_BUILTIN_PAND128,
23845 IX86_BUILTIN_PANDN128,
23846 IX86_BUILTIN_POR128,
23847 IX86_BUILTIN_PXOR128,
23849 IX86_BUILTIN_PAVGB128,
23850 IX86_BUILTIN_PAVGW128,
23852 IX86_BUILTIN_PCMPEQB128,
23853 IX86_BUILTIN_PCMPEQW128,
23854 IX86_BUILTIN_PCMPEQD128,
23855 IX86_BUILTIN_PCMPGTB128,
23856 IX86_BUILTIN_PCMPGTW128,
23857 IX86_BUILTIN_PCMPGTD128,
23859 IX86_BUILTIN_PMADDWD128,
23861 IX86_BUILTIN_PMAXSW128,
23862 IX86_BUILTIN_PMAXUB128,
23863 IX86_BUILTIN_PMINSW128,
23864 IX86_BUILTIN_PMINUB128,
23866 IX86_BUILTIN_PMULUDQ,
23867 IX86_BUILTIN_PMULUDQ128,
23868 IX86_BUILTIN_PMULHUW128,
23869 IX86_BUILTIN_PMULHW128,
23870 IX86_BUILTIN_PMULLW128,
23872 IX86_BUILTIN_PSADBW128,
23873 IX86_BUILTIN_PSHUFHW,
23874 IX86_BUILTIN_PSHUFLW,
23875 IX86_BUILTIN_PSHUFD,
23877 IX86_BUILTIN_PSLLDQI128,
23878 IX86_BUILTIN_PSLLWI128,
23879 IX86_BUILTIN_PSLLDI128,
23880 IX86_BUILTIN_PSLLQI128,
23881 IX86_BUILTIN_PSRAWI128,
23882 IX86_BUILTIN_PSRADI128,
23883 IX86_BUILTIN_PSRLDQI128,
23884 IX86_BUILTIN_PSRLWI128,
23885 IX86_BUILTIN_PSRLDI128,
23886 IX86_BUILTIN_PSRLQI128,
23888 IX86_BUILTIN_PSLLDQ128,
23889 IX86_BUILTIN_PSLLW128,
23890 IX86_BUILTIN_PSLLD128,
23891 IX86_BUILTIN_PSLLQ128,
23892 IX86_BUILTIN_PSRAW128,
23893 IX86_BUILTIN_PSRAD128,
23894 IX86_BUILTIN_PSRLW128,
23895 IX86_BUILTIN_PSRLD128,
23896 IX86_BUILTIN_PSRLQ128,
23898 IX86_BUILTIN_PUNPCKHBW128,
23899 IX86_BUILTIN_PUNPCKHWD128,
23900 IX86_BUILTIN_PUNPCKHDQ128,
23901 IX86_BUILTIN_PUNPCKHQDQ128,
23902 IX86_BUILTIN_PUNPCKLBW128,
23903 IX86_BUILTIN_PUNPCKLWD128,
23904 IX86_BUILTIN_PUNPCKLDQ128,
23905 IX86_BUILTIN_PUNPCKLQDQ128,
23907 IX86_BUILTIN_CLFLUSH,
23908 IX86_BUILTIN_MFENCE,
23909 IX86_BUILTIN_LFENCE,
23910 IX86_BUILTIN_PAUSE,
23912 IX86_BUILTIN_BSRSI,
23913 IX86_BUILTIN_BSRDI,
23914 IX86_BUILTIN_RDPMC,
23915 IX86_BUILTIN_RDTSC,
23916 IX86_BUILTIN_RDTSCP,
23917 IX86_BUILTIN_ROLQI,
23918 IX86_BUILTIN_ROLHI,
23919 IX86_BUILTIN_RORQI,
23920 IX86_BUILTIN_RORHI,
23922 /* SSE3. */
23923 IX86_BUILTIN_ADDSUBPS,
23924 IX86_BUILTIN_HADDPS,
23925 IX86_BUILTIN_HSUBPS,
23926 IX86_BUILTIN_MOVSHDUP,
23927 IX86_BUILTIN_MOVSLDUP,
23928 IX86_BUILTIN_ADDSUBPD,
23929 IX86_BUILTIN_HADDPD,
23930 IX86_BUILTIN_HSUBPD,
23931 IX86_BUILTIN_LDDQU,
23933 IX86_BUILTIN_MONITOR,
23934 IX86_BUILTIN_MWAIT,
23936 /* SSSE3. */
23937 IX86_BUILTIN_PHADDW,
23938 IX86_BUILTIN_PHADDD,
23939 IX86_BUILTIN_PHADDSW,
23940 IX86_BUILTIN_PHSUBW,
23941 IX86_BUILTIN_PHSUBD,
23942 IX86_BUILTIN_PHSUBSW,
23943 IX86_BUILTIN_PMADDUBSW,
23944 IX86_BUILTIN_PMULHRSW,
23945 IX86_BUILTIN_PSHUFB,
23946 IX86_BUILTIN_PSIGNB,
23947 IX86_BUILTIN_PSIGNW,
23948 IX86_BUILTIN_PSIGND,
23949 IX86_BUILTIN_PALIGNR,
23950 IX86_BUILTIN_PABSB,
23951 IX86_BUILTIN_PABSW,
23952 IX86_BUILTIN_PABSD,
23954 IX86_BUILTIN_PHADDW128,
23955 IX86_BUILTIN_PHADDD128,
23956 IX86_BUILTIN_PHADDSW128,
23957 IX86_BUILTIN_PHSUBW128,
23958 IX86_BUILTIN_PHSUBD128,
23959 IX86_BUILTIN_PHSUBSW128,
23960 IX86_BUILTIN_PMADDUBSW128,
23961 IX86_BUILTIN_PMULHRSW128,
23962 IX86_BUILTIN_PSHUFB128,
23963 IX86_BUILTIN_PSIGNB128,
23964 IX86_BUILTIN_PSIGNW128,
23965 IX86_BUILTIN_PSIGND128,
23966 IX86_BUILTIN_PALIGNR128,
23967 IX86_BUILTIN_PABSB128,
23968 IX86_BUILTIN_PABSW128,
23969 IX86_BUILTIN_PABSD128,
23971 /* AMDFAM10 - SSE4A New Instructions. */
23972 IX86_BUILTIN_MOVNTSD,
23973 IX86_BUILTIN_MOVNTSS,
23974 IX86_BUILTIN_EXTRQI,
23975 IX86_BUILTIN_EXTRQ,
23976 IX86_BUILTIN_INSERTQI,
23977 IX86_BUILTIN_INSERTQ,
23979 /* SSE4.1. */
23980 IX86_BUILTIN_BLENDPD,
23981 IX86_BUILTIN_BLENDPS,
23982 IX86_BUILTIN_BLENDVPD,
23983 IX86_BUILTIN_BLENDVPS,
23984 IX86_BUILTIN_PBLENDVB128,
23985 IX86_BUILTIN_PBLENDW128,
23987 IX86_BUILTIN_DPPD,
23988 IX86_BUILTIN_DPPS,
23990 IX86_BUILTIN_INSERTPS128,
23992 IX86_BUILTIN_MOVNTDQA,
23993 IX86_BUILTIN_MPSADBW128,
23994 IX86_BUILTIN_PACKUSDW128,
23995 IX86_BUILTIN_PCMPEQQ,
23996 IX86_BUILTIN_PHMINPOSUW128,
23998 IX86_BUILTIN_PMAXSB128,
23999 IX86_BUILTIN_PMAXSD128,
24000 IX86_BUILTIN_PMAXUD128,
24001 IX86_BUILTIN_PMAXUW128,
24003 IX86_BUILTIN_PMINSB128,
24004 IX86_BUILTIN_PMINSD128,
24005 IX86_BUILTIN_PMINUD128,
24006 IX86_BUILTIN_PMINUW128,
24008 IX86_BUILTIN_PMOVSXBW128,
24009 IX86_BUILTIN_PMOVSXBD128,
24010 IX86_BUILTIN_PMOVSXBQ128,
24011 IX86_BUILTIN_PMOVSXWD128,
24012 IX86_BUILTIN_PMOVSXWQ128,
24013 IX86_BUILTIN_PMOVSXDQ128,
24015 IX86_BUILTIN_PMOVZXBW128,
24016 IX86_BUILTIN_PMOVZXBD128,
24017 IX86_BUILTIN_PMOVZXBQ128,
24018 IX86_BUILTIN_PMOVZXWD128,
24019 IX86_BUILTIN_PMOVZXWQ128,
24020 IX86_BUILTIN_PMOVZXDQ128,
24022 IX86_BUILTIN_PMULDQ128,
24023 IX86_BUILTIN_PMULLD128,
24025 IX86_BUILTIN_ROUNDPD,
24026 IX86_BUILTIN_ROUNDPS,
24027 IX86_BUILTIN_ROUNDSD,
24028 IX86_BUILTIN_ROUNDSS,
24030 IX86_BUILTIN_FLOORPD,
24031 IX86_BUILTIN_CEILPD,
24032 IX86_BUILTIN_TRUNCPD,
24033 IX86_BUILTIN_RINTPD,
24034 IX86_BUILTIN_FLOORPS,
24035 IX86_BUILTIN_CEILPS,
24036 IX86_BUILTIN_TRUNCPS,
24037 IX86_BUILTIN_RINTPS,
24039 IX86_BUILTIN_PTESTZ,
24040 IX86_BUILTIN_PTESTC,
24041 IX86_BUILTIN_PTESTNZC,
24043 IX86_BUILTIN_VEC_INIT_V2SI,
24044 IX86_BUILTIN_VEC_INIT_V4HI,
24045 IX86_BUILTIN_VEC_INIT_V8QI,
24046 IX86_BUILTIN_VEC_EXT_V2DF,
24047 IX86_BUILTIN_VEC_EXT_V2DI,
24048 IX86_BUILTIN_VEC_EXT_V4SF,
24049 IX86_BUILTIN_VEC_EXT_V4SI,
24050 IX86_BUILTIN_VEC_EXT_V8HI,
24051 IX86_BUILTIN_VEC_EXT_V2SI,
24052 IX86_BUILTIN_VEC_EXT_V4HI,
24053 IX86_BUILTIN_VEC_EXT_V16QI,
24054 IX86_BUILTIN_VEC_SET_V2DI,
24055 IX86_BUILTIN_VEC_SET_V4SF,
24056 IX86_BUILTIN_VEC_SET_V4SI,
24057 IX86_BUILTIN_VEC_SET_V8HI,
24058 IX86_BUILTIN_VEC_SET_V4HI,
24059 IX86_BUILTIN_VEC_SET_V16QI,
24061 IX86_BUILTIN_VEC_PACK_SFIX,
24063 /* SSE4.2. */
24064 IX86_BUILTIN_CRC32QI,
24065 IX86_BUILTIN_CRC32HI,
24066 IX86_BUILTIN_CRC32SI,
24067 IX86_BUILTIN_CRC32DI,
24069 IX86_BUILTIN_PCMPESTRI128,
24070 IX86_BUILTIN_PCMPESTRM128,
24071 IX86_BUILTIN_PCMPESTRA128,
24072 IX86_BUILTIN_PCMPESTRC128,
24073 IX86_BUILTIN_PCMPESTRO128,
24074 IX86_BUILTIN_PCMPESTRS128,
24075 IX86_BUILTIN_PCMPESTRZ128,
24076 IX86_BUILTIN_PCMPISTRI128,
24077 IX86_BUILTIN_PCMPISTRM128,
24078 IX86_BUILTIN_PCMPISTRA128,
24079 IX86_BUILTIN_PCMPISTRC128,
24080 IX86_BUILTIN_PCMPISTRO128,
24081 IX86_BUILTIN_PCMPISTRS128,
24082 IX86_BUILTIN_PCMPISTRZ128,
24084 IX86_BUILTIN_PCMPGTQ,
24086 /* AES instructions */
24087 IX86_BUILTIN_AESENC128,
24088 IX86_BUILTIN_AESENCLAST128,
24089 IX86_BUILTIN_AESDEC128,
24090 IX86_BUILTIN_AESDECLAST128,
24091 IX86_BUILTIN_AESIMC128,
24092 IX86_BUILTIN_AESKEYGENASSIST128,
24094 /* PCLMUL instruction */
24095 IX86_BUILTIN_PCLMULQDQ128,
24097 /* AVX */
24098 IX86_BUILTIN_ADDPD256,
24099 IX86_BUILTIN_ADDPS256,
24100 IX86_BUILTIN_ADDSUBPD256,
24101 IX86_BUILTIN_ADDSUBPS256,
24102 IX86_BUILTIN_ANDPD256,
24103 IX86_BUILTIN_ANDPS256,
24104 IX86_BUILTIN_ANDNPD256,
24105 IX86_BUILTIN_ANDNPS256,
24106 IX86_BUILTIN_BLENDPD256,
24107 IX86_BUILTIN_BLENDPS256,
24108 IX86_BUILTIN_BLENDVPD256,
24109 IX86_BUILTIN_BLENDVPS256,
24110 IX86_BUILTIN_DIVPD256,
24111 IX86_BUILTIN_DIVPS256,
24112 IX86_BUILTIN_DPPS256,
24113 IX86_BUILTIN_HADDPD256,
24114 IX86_BUILTIN_HADDPS256,
24115 IX86_BUILTIN_HSUBPD256,
24116 IX86_BUILTIN_HSUBPS256,
24117 IX86_BUILTIN_MAXPD256,
24118 IX86_BUILTIN_MAXPS256,
24119 IX86_BUILTIN_MINPD256,
24120 IX86_BUILTIN_MINPS256,
24121 IX86_BUILTIN_MULPD256,
24122 IX86_BUILTIN_MULPS256,
24123 IX86_BUILTIN_ORPD256,
24124 IX86_BUILTIN_ORPS256,
24125 IX86_BUILTIN_SHUFPD256,
24126 IX86_BUILTIN_SHUFPS256,
24127 IX86_BUILTIN_SUBPD256,
24128 IX86_BUILTIN_SUBPS256,
24129 IX86_BUILTIN_XORPD256,
24130 IX86_BUILTIN_XORPS256,
24131 IX86_BUILTIN_CMPSD,
24132 IX86_BUILTIN_CMPSS,
24133 IX86_BUILTIN_CMPPD,
24134 IX86_BUILTIN_CMPPS,
24135 IX86_BUILTIN_CMPPD256,
24136 IX86_BUILTIN_CMPPS256,
24137 IX86_BUILTIN_CVTDQ2PD256,
24138 IX86_BUILTIN_CVTDQ2PS256,
24139 IX86_BUILTIN_CVTPD2PS256,
24140 IX86_BUILTIN_CVTPS2DQ256,
24141 IX86_BUILTIN_CVTPS2PD256,
24142 IX86_BUILTIN_CVTTPD2DQ256,
24143 IX86_BUILTIN_CVTPD2DQ256,
24144 IX86_BUILTIN_CVTTPS2DQ256,
24145 IX86_BUILTIN_EXTRACTF128PD256,
24146 IX86_BUILTIN_EXTRACTF128PS256,
24147 IX86_BUILTIN_EXTRACTF128SI256,
24148 IX86_BUILTIN_VZEROALL,
24149 IX86_BUILTIN_VZEROUPPER,
24150 IX86_BUILTIN_VPERMILVARPD,
24151 IX86_BUILTIN_VPERMILVARPS,
24152 IX86_BUILTIN_VPERMILVARPD256,
24153 IX86_BUILTIN_VPERMILVARPS256,
24154 IX86_BUILTIN_VPERMILPD,
24155 IX86_BUILTIN_VPERMILPS,
24156 IX86_BUILTIN_VPERMILPD256,
24157 IX86_BUILTIN_VPERMILPS256,
24158 IX86_BUILTIN_VPERMIL2PD,
24159 IX86_BUILTIN_VPERMIL2PS,
24160 IX86_BUILTIN_VPERMIL2PD256,
24161 IX86_BUILTIN_VPERMIL2PS256,
24162 IX86_BUILTIN_VPERM2F128PD256,
24163 IX86_BUILTIN_VPERM2F128PS256,
24164 IX86_BUILTIN_VPERM2F128SI256,
24165 IX86_BUILTIN_VBROADCASTSS,
24166 IX86_BUILTIN_VBROADCASTSD256,
24167 IX86_BUILTIN_VBROADCASTSS256,
24168 IX86_BUILTIN_VBROADCASTPD256,
24169 IX86_BUILTIN_VBROADCASTPS256,
24170 IX86_BUILTIN_VINSERTF128PD256,
24171 IX86_BUILTIN_VINSERTF128PS256,
24172 IX86_BUILTIN_VINSERTF128SI256,
24173 IX86_BUILTIN_LOADUPD256,
24174 IX86_BUILTIN_LOADUPS256,
24175 IX86_BUILTIN_STOREUPD256,
24176 IX86_BUILTIN_STOREUPS256,
24177 IX86_BUILTIN_LDDQU256,
24178 IX86_BUILTIN_MOVNTDQ256,
24179 IX86_BUILTIN_MOVNTPD256,
24180 IX86_BUILTIN_MOVNTPS256,
24181 IX86_BUILTIN_LOADDQU256,
24182 IX86_BUILTIN_STOREDQU256,
24183 IX86_BUILTIN_MASKLOADPD,
24184 IX86_BUILTIN_MASKLOADPS,
24185 IX86_BUILTIN_MASKSTOREPD,
24186 IX86_BUILTIN_MASKSTOREPS,
24187 IX86_BUILTIN_MASKLOADPD256,
24188 IX86_BUILTIN_MASKLOADPS256,
24189 IX86_BUILTIN_MASKSTOREPD256,
24190 IX86_BUILTIN_MASKSTOREPS256,
24191 IX86_BUILTIN_MOVSHDUP256,
24192 IX86_BUILTIN_MOVSLDUP256,
24193 IX86_BUILTIN_MOVDDUP256,
24195 IX86_BUILTIN_SQRTPD256,
24196 IX86_BUILTIN_SQRTPS256,
24197 IX86_BUILTIN_SQRTPS_NR256,
24198 IX86_BUILTIN_RSQRTPS256,
24199 IX86_BUILTIN_RSQRTPS_NR256,
24201 IX86_BUILTIN_RCPPS256,
24203 IX86_BUILTIN_ROUNDPD256,
24204 IX86_BUILTIN_ROUNDPS256,
24206 IX86_BUILTIN_FLOORPD256,
24207 IX86_BUILTIN_CEILPD256,
24208 IX86_BUILTIN_TRUNCPD256,
24209 IX86_BUILTIN_RINTPD256,
24210 IX86_BUILTIN_FLOORPS256,
24211 IX86_BUILTIN_CEILPS256,
24212 IX86_BUILTIN_TRUNCPS256,
24213 IX86_BUILTIN_RINTPS256,
24215 IX86_BUILTIN_UNPCKHPD256,
24216 IX86_BUILTIN_UNPCKLPD256,
24217 IX86_BUILTIN_UNPCKHPS256,
24218 IX86_BUILTIN_UNPCKLPS256,
24220 IX86_BUILTIN_SI256_SI,
24221 IX86_BUILTIN_PS256_PS,
24222 IX86_BUILTIN_PD256_PD,
24223 IX86_BUILTIN_SI_SI256,
24224 IX86_BUILTIN_PS_PS256,
24225 IX86_BUILTIN_PD_PD256,
24227 IX86_BUILTIN_VTESTZPD,
24228 IX86_BUILTIN_VTESTCPD,
24229 IX86_BUILTIN_VTESTNZCPD,
24230 IX86_BUILTIN_VTESTZPS,
24231 IX86_BUILTIN_VTESTCPS,
24232 IX86_BUILTIN_VTESTNZCPS,
24233 IX86_BUILTIN_VTESTZPD256,
24234 IX86_BUILTIN_VTESTCPD256,
24235 IX86_BUILTIN_VTESTNZCPD256,
24236 IX86_BUILTIN_VTESTZPS256,
24237 IX86_BUILTIN_VTESTCPS256,
24238 IX86_BUILTIN_VTESTNZCPS256,
24239 IX86_BUILTIN_PTESTZ256,
24240 IX86_BUILTIN_PTESTC256,
24241 IX86_BUILTIN_PTESTNZC256,
24243 IX86_BUILTIN_MOVMSKPD256,
24244 IX86_BUILTIN_MOVMSKPS256,
24246 /* TFmode support builtins. */
24247 IX86_BUILTIN_INFQ,
24248 IX86_BUILTIN_HUGE_VALQ,
24249 IX86_BUILTIN_FABSQ,
24250 IX86_BUILTIN_COPYSIGNQ,
24252 /* Vectorizer support builtins. */
24253 IX86_BUILTIN_CPYSGNPS,
24254 IX86_BUILTIN_CPYSGNPD,
24255 IX86_BUILTIN_CPYSGNPS256,
24256 IX86_BUILTIN_CPYSGNPD256,
24258 IX86_BUILTIN_CVTUDQ2PS,
24260 IX86_BUILTIN_VEC_PERM_V2DF,
24261 IX86_BUILTIN_VEC_PERM_V4SF,
24262 IX86_BUILTIN_VEC_PERM_V2DI,
24263 IX86_BUILTIN_VEC_PERM_V4SI,
24264 IX86_BUILTIN_VEC_PERM_V8HI,
24265 IX86_BUILTIN_VEC_PERM_V16QI,
24266 IX86_BUILTIN_VEC_PERM_V2DI_U,
24267 IX86_BUILTIN_VEC_PERM_V4SI_U,
24268 IX86_BUILTIN_VEC_PERM_V8HI_U,
24269 IX86_BUILTIN_VEC_PERM_V16QI_U,
24270 IX86_BUILTIN_VEC_PERM_V4DF,
24271 IX86_BUILTIN_VEC_PERM_V8SF,
24273 /* FMA4 and XOP instructions. */
24274 IX86_BUILTIN_VFMADDSS,
24275 IX86_BUILTIN_VFMADDSD,
24276 IX86_BUILTIN_VFMADDPS,
24277 IX86_BUILTIN_VFMADDPD,
24278 IX86_BUILTIN_VFMADDPS256,
24279 IX86_BUILTIN_VFMADDPD256,
24280 IX86_BUILTIN_VFMADDSUBPS,
24281 IX86_BUILTIN_VFMADDSUBPD,
24282 IX86_BUILTIN_VFMADDSUBPS256,
24283 IX86_BUILTIN_VFMADDSUBPD256,
24285 IX86_BUILTIN_VPCMOV,
24286 IX86_BUILTIN_VPCMOV_V2DI,
24287 IX86_BUILTIN_VPCMOV_V4SI,
24288 IX86_BUILTIN_VPCMOV_V8HI,
24289 IX86_BUILTIN_VPCMOV_V16QI,
24290 IX86_BUILTIN_VPCMOV_V4SF,
24291 IX86_BUILTIN_VPCMOV_V2DF,
24292 IX86_BUILTIN_VPCMOV256,
24293 IX86_BUILTIN_VPCMOV_V4DI256,
24294 IX86_BUILTIN_VPCMOV_V8SI256,
24295 IX86_BUILTIN_VPCMOV_V16HI256,
24296 IX86_BUILTIN_VPCMOV_V32QI256,
24297 IX86_BUILTIN_VPCMOV_V8SF256,
24298 IX86_BUILTIN_VPCMOV_V4DF256,
24300 IX86_BUILTIN_VPPERM,
24302 IX86_BUILTIN_VPMACSSWW,
24303 IX86_BUILTIN_VPMACSWW,
24304 IX86_BUILTIN_VPMACSSWD,
24305 IX86_BUILTIN_VPMACSWD,
24306 IX86_BUILTIN_VPMACSSDD,
24307 IX86_BUILTIN_VPMACSDD,
24308 IX86_BUILTIN_VPMACSSDQL,
24309 IX86_BUILTIN_VPMACSSDQH,
24310 IX86_BUILTIN_VPMACSDQL,
24311 IX86_BUILTIN_VPMACSDQH,
24312 IX86_BUILTIN_VPMADCSSWD,
24313 IX86_BUILTIN_VPMADCSWD,
24315 IX86_BUILTIN_VPHADDBW,
24316 IX86_BUILTIN_VPHADDBD,
24317 IX86_BUILTIN_VPHADDBQ,
24318 IX86_BUILTIN_VPHADDWD,
24319 IX86_BUILTIN_VPHADDWQ,
24320 IX86_BUILTIN_VPHADDDQ,
24321 IX86_BUILTIN_VPHADDUBW,
24322 IX86_BUILTIN_VPHADDUBD,
24323 IX86_BUILTIN_VPHADDUBQ,
24324 IX86_BUILTIN_VPHADDUWD,
24325 IX86_BUILTIN_VPHADDUWQ,
24326 IX86_BUILTIN_VPHADDUDQ,
24327 IX86_BUILTIN_VPHSUBBW,
24328 IX86_BUILTIN_VPHSUBWD,
24329 IX86_BUILTIN_VPHSUBDQ,
24331 IX86_BUILTIN_VPROTB,
24332 IX86_BUILTIN_VPROTW,
24333 IX86_BUILTIN_VPROTD,
24334 IX86_BUILTIN_VPROTQ,
24335 IX86_BUILTIN_VPROTB_IMM,
24336 IX86_BUILTIN_VPROTW_IMM,
24337 IX86_BUILTIN_VPROTD_IMM,
24338 IX86_BUILTIN_VPROTQ_IMM,
24340 IX86_BUILTIN_VPSHLB,
24341 IX86_BUILTIN_VPSHLW,
24342 IX86_BUILTIN_VPSHLD,
24343 IX86_BUILTIN_VPSHLQ,
24344 IX86_BUILTIN_VPSHAB,
24345 IX86_BUILTIN_VPSHAW,
24346 IX86_BUILTIN_VPSHAD,
24347 IX86_BUILTIN_VPSHAQ,
24349 IX86_BUILTIN_VFRCZSS,
24350 IX86_BUILTIN_VFRCZSD,
24351 IX86_BUILTIN_VFRCZPS,
24352 IX86_BUILTIN_VFRCZPD,
24353 IX86_BUILTIN_VFRCZPS256,
24354 IX86_BUILTIN_VFRCZPD256,
24356 IX86_BUILTIN_VPCOMEQUB,
24357 IX86_BUILTIN_VPCOMNEUB,
24358 IX86_BUILTIN_VPCOMLTUB,
24359 IX86_BUILTIN_VPCOMLEUB,
24360 IX86_BUILTIN_VPCOMGTUB,
24361 IX86_BUILTIN_VPCOMGEUB,
24362 IX86_BUILTIN_VPCOMFALSEUB,
24363 IX86_BUILTIN_VPCOMTRUEUB,
24365 IX86_BUILTIN_VPCOMEQUW,
24366 IX86_BUILTIN_VPCOMNEUW,
24367 IX86_BUILTIN_VPCOMLTUW,
24368 IX86_BUILTIN_VPCOMLEUW,
24369 IX86_BUILTIN_VPCOMGTUW,
24370 IX86_BUILTIN_VPCOMGEUW,
24371 IX86_BUILTIN_VPCOMFALSEUW,
24372 IX86_BUILTIN_VPCOMTRUEUW,
24374 IX86_BUILTIN_VPCOMEQUD,
24375 IX86_BUILTIN_VPCOMNEUD,
24376 IX86_BUILTIN_VPCOMLTUD,
24377 IX86_BUILTIN_VPCOMLEUD,
24378 IX86_BUILTIN_VPCOMGTUD,
24379 IX86_BUILTIN_VPCOMGEUD,
24380 IX86_BUILTIN_VPCOMFALSEUD,
24381 IX86_BUILTIN_VPCOMTRUEUD,
24383 IX86_BUILTIN_VPCOMEQUQ,
24384 IX86_BUILTIN_VPCOMNEUQ,
24385 IX86_BUILTIN_VPCOMLTUQ,
24386 IX86_BUILTIN_VPCOMLEUQ,
24387 IX86_BUILTIN_VPCOMGTUQ,
24388 IX86_BUILTIN_VPCOMGEUQ,
24389 IX86_BUILTIN_VPCOMFALSEUQ,
24390 IX86_BUILTIN_VPCOMTRUEUQ,
24392 IX86_BUILTIN_VPCOMEQB,
24393 IX86_BUILTIN_VPCOMNEB,
24394 IX86_BUILTIN_VPCOMLTB,
24395 IX86_BUILTIN_VPCOMLEB,
24396 IX86_BUILTIN_VPCOMGTB,
24397 IX86_BUILTIN_VPCOMGEB,
24398 IX86_BUILTIN_VPCOMFALSEB,
24399 IX86_BUILTIN_VPCOMTRUEB,
24401 IX86_BUILTIN_VPCOMEQW,
24402 IX86_BUILTIN_VPCOMNEW,
24403 IX86_BUILTIN_VPCOMLTW,
24404 IX86_BUILTIN_VPCOMLEW,
24405 IX86_BUILTIN_VPCOMGTW,
24406 IX86_BUILTIN_VPCOMGEW,
24407 IX86_BUILTIN_VPCOMFALSEW,
24408 IX86_BUILTIN_VPCOMTRUEW,
24410 IX86_BUILTIN_VPCOMEQD,
24411 IX86_BUILTIN_VPCOMNED,
24412 IX86_BUILTIN_VPCOMLTD,
24413 IX86_BUILTIN_VPCOMLED,
24414 IX86_BUILTIN_VPCOMGTD,
24415 IX86_BUILTIN_VPCOMGED,
24416 IX86_BUILTIN_VPCOMFALSED,
24417 IX86_BUILTIN_VPCOMTRUED,
24419 IX86_BUILTIN_VPCOMEQQ,
24420 IX86_BUILTIN_VPCOMNEQ,
24421 IX86_BUILTIN_VPCOMLTQ,
24422 IX86_BUILTIN_VPCOMLEQ,
24423 IX86_BUILTIN_VPCOMGTQ,
24424 IX86_BUILTIN_VPCOMGEQ,
24425 IX86_BUILTIN_VPCOMFALSEQ,
24426 IX86_BUILTIN_VPCOMTRUEQ,
24428 /* LWP instructions. */
24429 IX86_BUILTIN_LLWPCB,
24430 IX86_BUILTIN_SLWPCB,
24431 IX86_BUILTIN_LWPVAL32,
24432 IX86_BUILTIN_LWPVAL64,
24433 IX86_BUILTIN_LWPINS32,
24434 IX86_BUILTIN_LWPINS64,
24436 IX86_BUILTIN_CLZS,
24438 /* BMI instructions. */
24439 IX86_BUILTIN_BEXTR32,
24440 IX86_BUILTIN_BEXTR64,
24441 IX86_BUILTIN_CTZS,
24443 /* TBM instructions. */
24444 IX86_BUILTIN_BEXTRI32,
24445 IX86_BUILTIN_BEXTRI64,
24448 /* FSGSBASE instructions. */
24449 IX86_BUILTIN_RDFSBASE32,
24450 IX86_BUILTIN_RDFSBASE64,
24451 IX86_BUILTIN_RDGSBASE32,
24452 IX86_BUILTIN_RDGSBASE64,
24453 IX86_BUILTIN_WRFSBASE32,
24454 IX86_BUILTIN_WRFSBASE64,
24455 IX86_BUILTIN_WRGSBASE32,
24456 IX86_BUILTIN_WRGSBASE64,
24458 /* RDRND instructions. */
24459 IX86_BUILTIN_RDRAND16_STEP,
24460 IX86_BUILTIN_RDRAND32_STEP,
24461 IX86_BUILTIN_RDRAND64_STEP,
24463 /* F16C instructions. */
24464 IX86_BUILTIN_CVTPH2PS,
24465 IX86_BUILTIN_CVTPH2PS256,
24466 IX86_BUILTIN_CVTPS2PH,
24467 IX86_BUILTIN_CVTPS2PH256,
24469 /* CFString built-in for darwin */
24470 IX86_BUILTIN_CFSTRING,
24472 IX86_BUILTIN_MAX
24475 /* Table for the ix86 builtin decls. */
24476 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
24478 /* Table of all of the builtin functions that are possible with different ISA's
24479 but are waiting to be built until a function is declared to use that
24480 ISA. */
24481 struct builtin_isa {
24482 const char *name; /* function name */
24483 enum ix86_builtin_func_type tcode; /* type to use in the declaration */
24484 int isa; /* isa_flags this builtin is defined for */
24485 bool const_p; /* true if the declaration is constant */
24486 bool set_and_not_built_p;
24489 static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
24492 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
24493 of which isa_flags to use in the ix86_builtins_isa array. Stores the
24494 function decl in the ix86_builtins array. Returns the function decl or
24495 NULL_TREE, if the builtin was not added.
24497 If the front end has a special hook for builtin functions, delay adding
24498 builtin functions that aren't in the current ISA until the ISA is changed
24499 with function specific optimization. Doing so, can save about 300K for the
24500 default compiler. When the builtin is expanded, check at that time whether
24501 it is valid.
24503 If the front end doesn't have a special hook, record all builtins, even if
24504 it isn't an instruction set in the current ISA in case the user uses
24505 function specific options for a different ISA, so that we don't get scope
24506 errors if a builtin is added in the middle of a function scope. */
24508 static inline tree
24509 def_builtin (int mask, const char *name, enum ix86_builtin_func_type tcode,
24510 enum ix86_builtins code)
24512 tree decl = NULL_TREE;
24514 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
24516 ix86_builtins_isa[(int) code].isa = mask;
24518 mask &= ~OPTION_MASK_ISA_64BIT;
24519 if (mask == 0
24520 || (mask & ix86_isa_flags) != 0
24521 || (lang_hooks.builtin_function
24522 == lang_hooks.builtin_function_ext_scope))
24525 tree type = ix86_get_builtin_func_type (tcode);
24526 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
24527 NULL, NULL_TREE);
24528 ix86_builtins[(int) code] = decl;
24529 ix86_builtins_isa[(int) code].set_and_not_built_p = false;
24531 else
24533 ix86_builtins[(int) code] = NULL_TREE;
24534 ix86_builtins_isa[(int) code].tcode = tcode;
24535 ix86_builtins_isa[(int) code].name = name;
24536 ix86_builtins_isa[(int) code].const_p = false;
24537 ix86_builtins_isa[(int) code].set_and_not_built_p = true;
24541 return decl;
24544 /* Like def_builtin, but also marks the function decl "const". */
24546 static inline tree
24547 def_builtin_const (int mask, const char *name,
24548 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
24550 tree decl = def_builtin (mask, name, tcode, code);
24551 if (decl)
24552 TREE_READONLY (decl) = 1;
24553 else
24554 ix86_builtins_isa[(int) code].const_p = true;
24556 return decl;
24559 /* Add any new builtin functions for a given ISA that may not have been
24560 declared. This saves a bit of space compared to adding all of the
24561 declarations to the tree, even if we didn't use them. */
24563 static void
24564 ix86_add_new_builtins (int isa)
24566 int i;
24568 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
24570 if ((ix86_builtins_isa[i].isa & isa) != 0
24571 && ix86_builtins_isa[i].set_and_not_built_p)
24573 tree decl, type;
24575 /* Don't define the builtin again. */
24576 ix86_builtins_isa[i].set_and_not_built_p = false;
24578 type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode);
24579 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
24580 type, i, BUILT_IN_MD, NULL,
24581 NULL_TREE);
24583 ix86_builtins[i] = decl;
24584 if (ix86_builtins_isa[i].const_p)
24585 TREE_READONLY (decl) = 1;
24590 /* Bits for builtin_description.flag. */
24592 /* Set when we don't support the comparison natively, and should
24593 swap_comparison in order to support it. */
24594 #define BUILTIN_DESC_SWAP_OPERANDS 1
24596 struct builtin_description
24598 const unsigned int mask;
24599 const enum insn_code icode;
24600 const char *const name;
24601 const enum ix86_builtins code;
24602 const enum rtx_code comparison;
24603 const int flag;
24606 static const struct builtin_description bdesc_comi[] =
24608 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
24609 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
24610 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
24611 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
24612 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
24613 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
24614 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
24615 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
24616 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
24617 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
24618 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
24619 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
24620 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
24621 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
24622 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
24623 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
24624 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
24625 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
24626 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
24627 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
24628 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
24629 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
24630 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
24631 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
24634 static const struct builtin_description bdesc_pcmpestr[] =
24636 /* SSE4.2 */
24637 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
24638 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
24639 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
24640 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
24641 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
24642 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
24643 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
24646 static const struct builtin_description bdesc_pcmpistr[] =
24648 /* SSE4.2 */
24649 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
24650 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
24651 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
24652 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
24653 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
24654 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
24655 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
24658 /* Special builtins with variable number of arguments. */
24659 static const struct builtin_description bdesc_special_args[] =
24661 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdtsc, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC, UNKNOWN, (int) UINT64_FTYPE_VOID },
24662 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdtscp, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP, UNKNOWN, (int) UINT64_FTYPE_PUNSIGNED },
24663 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_pause, "__builtin_ia32_pause", IX86_BUILTIN_PAUSE, UNKNOWN, (int) VOID_FTYPE_VOID },
24665 /* MMX */
24666 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
24668 /* 3DNow! */
24669 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
24671 /* SSE */
24672 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
24673 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
24674 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
24676 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
24677 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
24678 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
24679 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
24681 /* SSE or 3DNow!A */
24682 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
24683 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntdi, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PULONGLONG_ULONGLONG },
24685 /* SSE2 */
24686 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
24687 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
24688 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
24689 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
24690 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
24691 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
24692 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntsi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
24693 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
24694 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
24696 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
24697 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
24699 /* SSE3 */
24700 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
24702 /* SSE4.1 */
24703 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
24705 /* SSE4A */
24706 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
24707 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
24709 /* AVX */
24710 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
24711 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
24713 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4sf, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
24714 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4df, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
24715 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv8sf, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
24716 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v4df, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
24717 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v8sf, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
24719 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
24720 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
24721 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
24722 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
24723 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
24724 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
24725 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
24727 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
24728 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
24729 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
24731 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DI },
24732 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SI },
24733 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DI },
24734 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SI },
24735 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DI_V2DF },
24736 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SI_V4SF },
24737 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DI_V4DF },
24738 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SI_V8SF },
24740 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID },
24741 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID },
24742 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT },
24743 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvaldi3, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT64_UINT_UINT },
24744 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT },
24745 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT },
24747 /* FSGSBASE */
24748 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasesi, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
24749 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasedi, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
24750 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasesi, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
24751 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasedi, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
24752 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasesi, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
24753 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasedi, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
24754 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasesi, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
24755 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasedi, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
24758 /* Builtins with variable number of arguments. */
24759 static const struct builtin_description bdesc_args[] =
24761 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_bsr, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI, UNKNOWN, (int) INT_FTYPE_INT },
24762 { OPTION_MASK_ISA_64BIT, CODE_FOR_bsr_rex64, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI, UNKNOWN, (int) INT64_FTYPE_INT64 },
24763 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdpmc, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC, UNKNOWN, (int) UINT64_FTYPE_INT },
24764 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlqi3, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
24765 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlhi3, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
24766 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
24767 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
24769 /* MMX */
24770 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24771 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24772 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24773 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24774 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24775 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24777 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24778 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24779 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24780 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24781 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24782 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24783 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24784 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24786 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24787 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24789 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24790 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24791 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24792 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24794 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24795 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24796 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24797 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24798 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24799 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24801 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24802 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24803 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24804 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24805 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
24806 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
24808 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
24809 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
24810 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
24812 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
24814 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
24815 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
24816 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
24817 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
24818 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
24819 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
24821 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
24822 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
24823 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
24824 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
24825 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
24826 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
24828 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
24829 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
24830 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
24831 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
24833 /* 3DNow! */
24834 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
24835 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
24836 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
24837 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
24839 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24840 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24841 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24842 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
24843 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
24844 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
24845 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24846 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24847 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24848 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24849 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24850 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24851 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24852 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24853 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24855 /* 3DNow!A */
24856 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
24857 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
24858 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
24859 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
24860 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24861 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24863 /* SSE */
24864 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
24865 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
24866 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
24867 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
24868 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
24869 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
24870 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
24871 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
24872 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
24873 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
24874 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
24875 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
24877 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
24879 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24880 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24881 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24882 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24883 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24884 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24885 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24886 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24888 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
24889 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
24890 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
24891 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
24892 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
24893 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
24894 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
24895 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
24896 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
24897 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
24898 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
24899 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
24900 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
24901 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
24902 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
24903 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
24904 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
24905 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
24906 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
24907 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
24908 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
24909 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
24911 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24912 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24913 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24914 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24916 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24917 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24918 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24919 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24921 { OPTION_MASK_ISA_SSE, CODE_FOR_copysignv4sf3, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24923 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24924 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24925 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24926 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_highv4sf, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24927 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_lowv4sf, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24929 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
24930 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
24931 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
24933 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
24935 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
24936 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
24937 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
24939 /* SSE MMX or 3Dnow!A */
24940 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24941 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24942 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24944 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24945 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24946 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24947 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24949 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
24950 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
24952 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
24954 /* SSE2 */
24955 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
24957 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2df", IX86_BUILTIN_VEC_PERM_V2DF, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI },
24958 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4sf", IX86_BUILTIN_VEC_PERM_V4SF, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI },
24959 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2di", IX86_BUILTIN_VEC_PERM_V2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI },
24960 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4si", IX86_BUILTIN_VEC_PERM_V4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI },
24961 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8hi", IX86_BUILTIN_VEC_PERM_V8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI },
24962 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v16qi", IX86_BUILTIN_VEC_PERM_V16QI, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
24963 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2di_u", IX86_BUILTIN_VEC_PERM_V2DI_U, UNKNOWN, (int) V2UDI_FTYPE_V2UDI_V2UDI_V2UDI },
24964 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4si_u", IX86_BUILTIN_VEC_PERM_V4SI_U, UNKNOWN, (int) V4USI_FTYPE_V4USI_V4USI_V4USI },
24965 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8hi_u", IX86_BUILTIN_VEC_PERM_V8HI_U, UNKNOWN, (int) V8UHI_FTYPE_V8UHI_V8UHI_V8UHI },
24966 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v16qi_u", IX86_BUILTIN_VEC_PERM_V16QI_U, UNKNOWN, (int) V16UQI_FTYPE_V16UQI_V16UQI_V16UQI },
24967 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4df", IX86_BUILTIN_VEC_PERM_V4DF, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI },
24968 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8sf", IX86_BUILTIN_VEC_PERM_V8SF, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI },
24970 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
24971 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
24972 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
24973 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
24974 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2ps, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
24975 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtudq2ps, "__builtin_ia32_cvtudq2ps", IX86_BUILTIN_CVTUDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
24977 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
24978 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
24979 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
24980 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
24981 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
24983 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
24985 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
24986 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
24987 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
24988 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
24990 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2dq, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
24991 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
24992 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttps2dq, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
24994 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24995 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24996 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24997 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24998 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24999 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25000 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25001 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25003 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
25004 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
25005 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
25006 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
25007 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
25008 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
25009 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
25010 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
25011 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
25012 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
25013 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
25014 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
25015 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
25016 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
25017 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
25018 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
25019 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
25020 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
25021 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
25022 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
25024 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25025 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25026 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25027 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25029 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25030 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25031 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25032 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25034 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysignv2df3, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25036 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25037 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2df, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25038 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2df, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25040 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
25042 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25043 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25044 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25045 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25046 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25047 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25048 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25049 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25051 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25052 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25053 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25054 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25055 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25056 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25057 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25058 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25060 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25061 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
25063 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25064 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25065 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25066 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25068 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25069 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25071 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25072 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25073 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25074 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25075 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25076 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25078 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25079 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25080 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25081 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25083 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv16qi, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25084 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv8hi, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25085 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv4si, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25086 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2di, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25087 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv16qi, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25088 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv8hi, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25089 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv4si, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25090 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2di, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25092 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
25093 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
25094 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
25096 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25097 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
25099 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
25100 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
25102 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
25104 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
25105 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
25106 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
25107 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
25109 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlv1ti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
25110 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
25111 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
25112 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
25113 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
25114 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
25115 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
25117 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrv1ti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
25118 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
25119 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
25120 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
25121 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
25122 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
25123 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
25125 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
25126 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
25127 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
25128 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
25130 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
25131 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
25132 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
25134 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
25136 { OPTION_MASK_ISA_SSE2, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
25137 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
25139 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
25141 /* SSE2 MMX */
25142 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
25143 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
25145 /* SSE3 */
25146 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
25147 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
25149 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25150 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25151 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25152 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25153 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25154 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25156 /* SSSE3 */
25157 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
25158 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
25159 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
25160 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
25161 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
25162 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
25164 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25165 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25166 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25167 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
25168 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25169 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25170 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25171 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25172 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25173 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
25174 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25175 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25176 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
25177 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
25178 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25179 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25180 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25181 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
25182 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25183 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
25184 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25185 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25186 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25187 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
25189 /* SSSE3. */
25190 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT },
25191 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT },
25193 /* SSE4.1 */
25194 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
25195 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
25196 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
25197 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
25198 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
25199 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
25200 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
25201 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
25202 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
25203 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
25205 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
25206 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
25207 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
25208 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
25209 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
25210 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
25211 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
25212 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
25213 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
25214 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
25215 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
25216 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
25217 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
25219 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
25220 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25221 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25222 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25223 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25224 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25225 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25226 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25227 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25228 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25229 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
25230 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25232 /* SSE4.1 */
25233 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
25234 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
25235 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
25236 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
25238 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_floorpd", IX86_BUILTIN_FLOORPD, (enum rtx_code) ROUND_FLOOR, (int) V2DF_FTYPE_V2DF_ROUND },
25239 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_ceilpd", IX86_BUILTIN_CEILPD, (enum rtx_code) ROUND_CEIL, (int) V2DF_FTYPE_V2DF_ROUND },
25240 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_truncpd", IX86_BUILTIN_TRUNCPD, (enum rtx_code) ROUND_TRUNC, (int) V2DF_FTYPE_V2DF_ROUND },
25241 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_rintpd", IX86_BUILTIN_RINTPD, (enum rtx_code) ROUND_MXCSR, (int) V2DF_FTYPE_V2DF_ROUND },
25243 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS, (enum rtx_code) ROUND_FLOOR, (int) V4SF_FTYPE_V4SF_ROUND },
25244 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS, (enum rtx_code) ROUND_CEIL, (int) V4SF_FTYPE_V4SF_ROUND },
25245 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS, (enum rtx_code) ROUND_TRUNC, (int) V4SF_FTYPE_V4SF_ROUND },
25246 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_rintps", IX86_BUILTIN_RINTPS, (enum rtx_code) ROUND_MXCSR, (int) V4SF_FTYPE_V4SF_ROUND },
25248 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
25249 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
25250 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
25252 /* SSE4.2 */
25253 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25254 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
25255 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
25256 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
25257 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
25259 /* SSE4A */
25260 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
25261 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
25262 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
25263 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25265 /* AES */
25266 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
25267 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
25269 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25270 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25271 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25272 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25274 /* PCLMUL */
25275 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
25277 /* AVX */
25278 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25279 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25280 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25281 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25282 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25283 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25284 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25285 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25286 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25287 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25288 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25289 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25290 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25291 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25292 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25293 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25294 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25295 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25296 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25297 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25298 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25299 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25300 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25301 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25302 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25303 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25305 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
25306 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
25307 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
25308 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
25310 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
25311 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
25312 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
25313 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
25314 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
25315 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
25316 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
25317 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
25318 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
25319 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
25320 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
25321 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
25322 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
25323 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
25324 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
25325 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
25326 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2pd256, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
25327 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2ps256, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
25328 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
25329 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2dq256, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
25330 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
25331 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttpd2dq256, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
25332 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
25333 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttps2dq256, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
25334 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
25335 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
25336 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
25337 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
25338 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
25339 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
25340 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
25341 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
25342 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
25343 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
25345 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
25346 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
25347 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
25349 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
25350 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
25351 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
25352 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
25353 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
25355 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
25357 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
25358 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
25360 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_floorpd256", IX86_BUILTIN_FLOORPD256, (enum rtx_code) ROUND_FLOOR, (int) V4DF_FTYPE_V4DF_ROUND },
25361 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_ceilpd256", IX86_BUILTIN_CEILPD256, (enum rtx_code) ROUND_CEIL, (int) V4DF_FTYPE_V4DF_ROUND },
25362 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_truncpd256", IX86_BUILTIN_TRUNCPD256, (enum rtx_code) ROUND_TRUNC, (int) V4DF_FTYPE_V4DF_ROUND },
25363 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_rintpd256", IX86_BUILTIN_RINTPD256, (enum rtx_code) ROUND_MXCSR, (int) V4DF_FTYPE_V4DF_ROUND },
25365 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_floorps256", IX86_BUILTIN_FLOORPS256, (enum rtx_code) ROUND_FLOOR, (int) V8SF_FTYPE_V8SF_ROUND },
25366 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_ceilps256", IX86_BUILTIN_CEILPS256, (enum rtx_code) ROUND_CEIL, (int) V8SF_FTYPE_V8SF_ROUND },
25367 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_truncps256", IX86_BUILTIN_TRUNCPS256, (enum rtx_code) ROUND_TRUNC, (int) V8SF_FTYPE_V8SF_ROUND },
25368 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_rintps256", IX86_BUILTIN_RINTPS256, (enum rtx_code) ROUND_MXCSR, (int) V8SF_FTYPE_V8SF_ROUND },
25370 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25371 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25372 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25373 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25375 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
25376 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
25377 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
25378 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8si, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
25379 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8sf, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
25380 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v4df, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
25382 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
25383 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
25384 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
25385 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
25386 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
25387 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
25388 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
25389 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
25390 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
25391 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
25392 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
25393 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
25394 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
25395 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
25396 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
25398 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
25399 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
25401 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv8sf3, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25402 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv4df3, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25404 { OPTION_MASK_ISA_ABM, CODE_FOR_clzhi2_abm, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
25406 /* BMI */
25407 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_si, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
25408 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_di, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
25409 { OPTION_MASK_ISA_BMI, CODE_FOR_ctzhi2, "__builtin_ctzs", IX86_BUILTIN_CTZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
25411 /* TBM */
25412 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_si, "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
25413 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_di, "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
25415 /* F16C */
25416 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int) V4SF_FTYPE_V8HI },
25417 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN, (int) V8SF_FTYPE_V8HI },
25418 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT },
25419 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph256, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT },
25422 /* FMA4 and XOP. */
25423 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
25424 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
25425 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
25426 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
25427 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
25428 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
25429 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
25430 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
25431 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
25432 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
25433 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
25434 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
25435 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
25436 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
25437 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
25438 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
25439 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
25440 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
25441 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
25442 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
25443 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
25444 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
25445 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
25446 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
25447 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
25448 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
25449 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
25450 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
25451 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
25452 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
25453 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
25454 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
25455 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
25456 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
25457 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
25458 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
25459 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
25460 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
25461 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
25462 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
25463 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
25464 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
25465 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
25466 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
25467 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
25468 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
25469 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
25470 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
25471 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
25472 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
25473 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
25474 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
25476 static const struct builtin_description bdesc_multi_arg[] =
25478 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v4sf,
25479 "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS,
25480 UNKNOWN, (int)MULTI_ARG_3_SF },
25481 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v2df,
25482 "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD,
25483 UNKNOWN, (int)MULTI_ARG_3_DF },
25485 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4sf,
25486 "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS,
25487 UNKNOWN, (int)MULTI_ARG_3_SF },
25488 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v2df,
25489 "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD,
25490 UNKNOWN, (int)MULTI_ARG_3_DF },
25491 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v8sf,
25492 "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256,
25493 UNKNOWN, (int)MULTI_ARG_3_SF2 },
25494 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4df,
25495 "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256,
25496 UNKNOWN, (int)MULTI_ARG_3_DF2 },
25498 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4sf,
25499 "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS,
25500 UNKNOWN, (int)MULTI_ARG_3_SF },
25501 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v2df,
25502 "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD,
25503 UNKNOWN, (int)MULTI_ARG_3_DF },
25504 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v8sf,
25505 "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256,
25506 UNKNOWN, (int)MULTI_ARG_3_SF2 },
25507 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4df,
25508 "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256,
25509 UNKNOWN, (int)MULTI_ARG_3_DF2 },
25511 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV, UNKNOWN, (int)MULTI_ARG_3_DI },
25512 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI },
25513 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4si, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI },
25514 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8hi, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI },
25515 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16qi, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI },
25516 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2df, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF },
25517 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4sf, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF },
25519 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
25520 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
25521 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8si256, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256, UNKNOWN, (int)MULTI_ARG_3_SI2 },
25522 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16hi256, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256, UNKNOWN, (int)MULTI_ARG_3_HI2 },
25523 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v32qi256, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256, UNKNOWN, (int)MULTI_ARG_3_QI2 },
25524 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4df256, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
25525 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8sf256, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
25527 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pperm, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM, UNKNOWN, (int)MULTI_ARG_3_QI },
25529 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssww, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
25530 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsww, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
25531 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsswd, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
25532 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacswd, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
25533 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdd, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
25534 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdd, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
25535 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdql, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
25536 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdqh, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
25537 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdql, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
25538 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdqh, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
25539 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcsswd, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
25540 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcswd, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
25542 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv2di3, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ, UNKNOWN, (int)MULTI_ARG_2_DI },
25543 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv4si3, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD, UNKNOWN, (int)MULTI_ARG_2_SI },
25544 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv8hi3, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW, UNKNOWN, (int)MULTI_ARG_2_HI },
25545 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv16qi3, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB, UNKNOWN, (int)MULTI_ARG_2_QI },
25546 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv2di3, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM },
25547 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv4si3, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM },
25548 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv8hi3, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM },
25549 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv16qi3, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM },
25550 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv2di3, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI },
25551 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv4si3, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD, UNKNOWN, (int)MULTI_ARG_2_SI },
25552 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv8hi3, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW, UNKNOWN, (int)MULTI_ARG_2_HI },
25553 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv16qi3, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB, UNKNOWN, (int)MULTI_ARG_2_QI },
25554 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv2di3, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI },
25555 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv4si3, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD, UNKNOWN, (int)MULTI_ARG_2_SI },
25556 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv8hi3, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
25557 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv16qi3, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
25559 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS, UNKNOWN, (int)MULTI_ARG_2_SF },
25560 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD, UNKNOWN, (int)MULTI_ARG_2_DF },
25561 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
25562 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
25563 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256, UNKNOWN, (int)MULTI_ARG_1_SF2 },
25564 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4df2, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256, UNKNOWN, (int)MULTI_ARG_1_DF2 },
25566 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbw, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
25567 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbd, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
25568 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbq, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
25569 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwd, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
25570 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwq, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
25571 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadddq, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
25572 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubw, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
25573 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubd, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
25574 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubq, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
25575 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwd, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
25576 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwq, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
25577 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddudq, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
25578 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubbw, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
25579 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubwd, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
25580 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubdq, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
25582 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
25583 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
25584 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
25585 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
25586 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
25587 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
25588 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
25590 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
25591 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
25592 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
25593 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
25594 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
25595 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
25596 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
25598 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
25599 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
25600 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
25601 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
25602 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
25603 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
25604 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
25606 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
25607 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
25608 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
25609 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
25610 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
25611 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
25612 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
25614 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
25615 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
25616 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
25617 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
25618 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
25619 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
25620 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
25622 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
25623 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
25624 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
25625 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
25626 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
25627 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
25628 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
25630 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
25631 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
25632 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
25633 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
25634 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
25635 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
25636 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
25638 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
25639 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
25640 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
25641 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
25642 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
25643 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
25644 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
25646 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
25647 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
25648 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
25649 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
25650 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
25651 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
25652 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
25653 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
25655 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
25656 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
25657 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
25658 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
25659 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
25660 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
25661 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
25662 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
25664 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I },
25665 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I },
25666 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I1 },
25667 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I1 },
25671 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
25672 in the current target ISA to allow the user to compile particular modules
25673 with different target specific options that differ from the command line
25674 options. */
25675 static void
25676 ix86_init_mmx_sse_builtins (void)
25678 const struct builtin_description * d;
25679 enum ix86_builtin_func_type ftype;
25680 size_t i;
25682 /* Add all special builtins with variable number of operands. */
25683 for (i = 0, d = bdesc_special_args;
25684 i < ARRAY_SIZE (bdesc_special_args);
25685 i++, d++)
25687 if (d->name == 0)
25688 continue;
25690 ftype = (enum ix86_builtin_func_type) d->flag;
25691 def_builtin (d->mask, d->name, ftype, d->code);
25694 /* Add all builtins with variable number of operands. */
25695 for (i = 0, d = bdesc_args;
25696 i < ARRAY_SIZE (bdesc_args);
25697 i++, d++)
25699 if (d->name == 0)
25700 continue;
25702 ftype = (enum ix86_builtin_func_type) d->flag;
25703 def_builtin_const (d->mask, d->name, ftype, d->code);
25706 /* pcmpestr[im] insns. */
25707 for (i = 0, d = bdesc_pcmpestr;
25708 i < ARRAY_SIZE (bdesc_pcmpestr);
25709 i++, d++)
25711 if (d->code == IX86_BUILTIN_PCMPESTRM128)
25712 ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT;
25713 else
25714 ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT;
25715 def_builtin_const (d->mask, d->name, ftype, d->code);
25718 /* pcmpistr[im] insns. */
25719 for (i = 0, d = bdesc_pcmpistr;
25720 i < ARRAY_SIZE (bdesc_pcmpistr);
25721 i++, d++)
25723 if (d->code == IX86_BUILTIN_PCMPISTRM128)
25724 ftype = V16QI_FTYPE_V16QI_V16QI_INT;
25725 else
25726 ftype = INT_FTYPE_V16QI_V16QI_INT;
25727 def_builtin_const (d->mask, d->name, ftype, d->code);
25730 /* comi/ucomi insns. */
25731 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
25733 if (d->mask == OPTION_MASK_ISA_SSE2)
25734 ftype = INT_FTYPE_V2DF_V2DF;
25735 else
25736 ftype = INT_FTYPE_V4SF_V4SF;
25737 def_builtin_const (d->mask, d->name, ftype, d->code);
25740 /* SSE */
25741 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr",
25742 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR);
25743 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr",
25744 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR);
25746 /* SSE or 3DNow!A */
25747 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
25748 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR,
25749 IX86_BUILTIN_MASKMOVQ);
25751 /* SSE2 */
25752 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu",
25753 VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU);
25755 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush",
25756 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH);
25757 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence",
25758 VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE);
25760 /* SSE3. */
25761 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor",
25762 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR);
25763 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait",
25764 VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
25766 /* AES */
25767 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128",
25768 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128);
25769 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128",
25770 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128);
25771 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128",
25772 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128);
25773 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128",
25774 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128);
25775 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128",
25776 V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128);
25777 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128",
25778 V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128);
25780 /* PCLMUL */
25781 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128",
25782 V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128);
25784 /* RDRND */
25785 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand16_step",
25786 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDRAND16_STEP);
25787 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand32_step",
25788 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDRAND32_STEP);
25789 def_builtin (OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT,
25790 "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG,
25791 IX86_BUILTIN_RDRAND64_STEP);
25793 /* MMX access to the vec_init patterns. */
25794 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
25795 V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
25797 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi",
25798 V4HI_FTYPE_HI_HI_HI_HI,
25799 IX86_BUILTIN_VEC_INIT_V4HI);
25801 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi",
25802 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
25803 IX86_BUILTIN_VEC_INIT_V8QI);
25805 /* Access to the vec_extract patterns. */
25806 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df",
25807 DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF);
25808 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di",
25809 DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI);
25810 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf",
25811 FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF);
25812 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si",
25813 SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI);
25814 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi",
25815 HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI);
25817 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
25818 "__builtin_ia32_vec_ext_v4hi",
25819 HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
25821 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si",
25822 SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
25824 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi",
25825 QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI);
25827 /* Access to the vec_set patterns. */
25828 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT,
25829 "__builtin_ia32_vec_set_v2di",
25830 V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI);
25832 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf",
25833 V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF);
25835 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si",
25836 V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI);
25838 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi",
25839 V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI);
25841 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
25842 "__builtin_ia32_vec_set_v4hi",
25843 V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI);
25845 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi",
25846 V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI);
25848 /* Add FMA4 multi-arg argument instructions */
25849 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
25851 if (d->name == 0)
25852 continue;
25854 ftype = (enum ix86_builtin_func_type) d->flag;
25855 def_builtin_const (d->mask, d->name, ftype, d->code);
25859 /* Internal method for ix86_init_builtins. */
25861 static void
25862 ix86_init_builtins_va_builtins_abi (void)
25864 tree ms_va_ref, sysv_va_ref;
25865 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
25866 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
25867 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
25868 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
25870 if (!TARGET_64BIT)
25871 return;
25872 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
25873 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
25874 ms_va_ref = build_reference_type (ms_va_list_type_node);
25875 sysv_va_ref =
25876 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
25878 fnvoid_va_end_ms =
25879 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
25880 fnvoid_va_start_ms =
25881 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
25882 fnvoid_va_end_sysv =
25883 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
25884 fnvoid_va_start_sysv =
25885 build_varargs_function_type_list (void_type_node, sysv_va_ref,
25886 NULL_TREE);
25887 fnvoid_va_copy_ms =
25888 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
25889 NULL_TREE);
25890 fnvoid_va_copy_sysv =
25891 build_function_type_list (void_type_node, sysv_va_ref,
25892 sysv_va_ref, NULL_TREE);
25894 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
25895 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
25896 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
25897 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
25898 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
25899 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
25900 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
25901 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
25902 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
25903 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
25904 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
25905 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
25908 static void
25909 ix86_init_builtin_types (void)
25911 tree float128_type_node, float80_type_node;
25913 /* The __float80 type. */
25914 float80_type_node = long_double_type_node;
25915 if (TYPE_MODE (float80_type_node) != XFmode)
25917 /* The __float80 type. */
25918 float80_type_node = make_node (REAL_TYPE);
25920 TYPE_PRECISION (float80_type_node) = 80;
25921 layout_type (float80_type_node);
25923 lang_hooks.types.register_builtin_type (float80_type_node, "__float80");
25925 /* The __float128 type. */
25926 float128_type_node = make_node (REAL_TYPE);
25927 TYPE_PRECISION (float128_type_node) = 128;
25928 layout_type (float128_type_node);
25929 lang_hooks.types.register_builtin_type (float128_type_node, "__float128");
25931 /* This macro is built by i386-builtin-types.awk. */
25932 DEFINE_BUILTIN_PRIMITIVE_TYPES;
25935 static void
25936 ix86_init_builtins (void)
25938 tree t;
25940 ix86_init_builtin_types ();
25942 /* TFmode support builtins. */
25943 def_builtin_const (0, "__builtin_infq",
25944 FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ);
25945 def_builtin_const (0, "__builtin_huge_valq",
25946 FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ);
25948 /* We will expand them to normal call if SSE2 isn't available since
25949 they are used by libgcc. */
25950 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128);
25951 t = add_builtin_function ("__builtin_fabsq", t, IX86_BUILTIN_FABSQ,
25952 BUILT_IN_MD, "__fabstf2", NULL_TREE);
25953 TREE_READONLY (t) = 1;
25954 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = t;
25956 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128);
25957 t = add_builtin_function ("__builtin_copysignq", t, IX86_BUILTIN_COPYSIGNQ,
25958 BUILT_IN_MD, "__copysigntf3", NULL_TREE);
25959 TREE_READONLY (t) = 1;
25960 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = t;
25962 ix86_init_mmx_sse_builtins ();
25964 if (TARGET_64BIT)
25965 ix86_init_builtins_va_builtins_abi ();
25967 #ifdef SUBTARGET_INIT_BUILTINS
25968 SUBTARGET_INIT_BUILTINS;
25969 #endif
25972 /* Return the ix86 builtin for CODE. */
25974 static tree
25975 ix86_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
25977 if (code >= IX86_BUILTIN_MAX)
25978 return error_mark_node;
25980 return ix86_builtins[code];
25983 /* Errors in the source file can cause expand_expr to return const0_rtx
25984 where we expect a vector. To avoid crashing, use one of the vector
25985 clear instructions. */
25986 static rtx
25987 safe_vector_operand (rtx x, enum machine_mode mode)
25989 if (x == const0_rtx)
25990 x = CONST0_RTX (mode);
25991 return x;
25994 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
25996 static rtx
25997 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
25999 rtx pat;
26000 tree arg0 = CALL_EXPR_ARG (exp, 0);
26001 tree arg1 = CALL_EXPR_ARG (exp, 1);
26002 rtx op0 = expand_normal (arg0);
26003 rtx op1 = expand_normal (arg1);
26004 enum machine_mode tmode = insn_data[icode].operand[0].mode;
26005 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
26006 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
26008 if (VECTOR_MODE_P (mode0))
26009 op0 = safe_vector_operand (op0, mode0);
26010 if (VECTOR_MODE_P (mode1))
26011 op1 = safe_vector_operand (op1, mode1);
26013 if (optimize || !target
26014 || GET_MODE (target) != tmode
26015 || !insn_data[icode].operand[0].predicate (target, tmode))
26016 target = gen_reg_rtx (tmode);
26018 if (GET_MODE (op1) == SImode && mode1 == TImode)
26020 rtx x = gen_reg_rtx (V4SImode);
26021 emit_insn (gen_sse2_loadd (x, op1));
26022 op1 = gen_lowpart (TImode, x);
26025 if (!insn_data[icode].operand[1].predicate (op0, mode0))
26026 op0 = copy_to_mode_reg (mode0, op0);
26027 if (!insn_data[icode].operand[2].predicate (op1, mode1))
26028 op1 = copy_to_mode_reg (mode1, op1);
26030 pat = GEN_FCN (icode) (target, op0, op1);
26031 if (! pat)
26032 return 0;
26034 emit_insn (pat);
26036 return target;
26039 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
26041 static rtx
26042 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
26043 enum ix86_builtin_func_type m_type,
26044 enum rtx_code sub_code)
26046 rtx pat;
26047 int i;
26048 int nargs;
26049 bool comparison_p = false;
26050 bool tf_p = false;
26051 bool last_arg_constant = false;
26052 int num_memory = 0;
26053 struct {
26054 rtx op;
26055 enum machine_mode mode;
26056 } args[4];
26058 enum machine_mode tmode = insn_data[icode].operand[0].mode;
26060 switch (m_type)
26062 case MULTI_ARG_4_DF2_DI_I:
26063 case MULTI_ARG_4_DF2_DI_I1:
26064 case MULTI_ARG_4_SF2_SI_I:
26065 case MULTI_ARG_4_SF2_SI_I1:
26066 nargs = 4;
26067 last_arg_constant = true;
26068 break;
26070 case MULTI_ARG_3_SF:
26071 case MULTI_ARG_3_DF:
26072 case MULTI_ARG_3_SF2:
26073 case MULTI_ARG_3_DF2:
26074 case MULTI_ARG_3_DI:
26075 case MULTI_ARG_3_SI:
26076 case MULTI_ARG_3_SI_DI:
26077 case MULTI_ARG_3_HI:
26078 case MULTI_ARG_3_HI_SI:
26079 case MULTI_ARG_3_QI:
26080 case MULTI_ARG_3_DI2:
26081 case MULTI_ARG_3_SI2:
26082 case MULTI_ARG_3_HI2:
26083 case MULTI_ARG_3_QI2:
26084 nargs = 3;
26085 break;
26087 case MULTI_ARG_2_SF:
26088 case MULTI_ARG_2_DF:
26089 case MULTI_ARG_2_DI:
26090 case MULTI_ARG_2_SI:
26091 case MULTI_ARG_2_HI:
26092 case MULTI_ARG_2_QI:
26093 nargs = 2;
26094 break;
26096 case MULTI_ARG_2_DI_IMM:
26097 case MULTI_ARG_2_SI_IMM:
26098 case MULTI_ARG_2_HI_IMM:
26099 case MULTI_ARG_2_QI_IMM:
26100 nargs = 2;
26101 last_arg_constant = true;
26102 break;
26104 case MULTI_ARG_1_SF:
26105 case MULTI_ARG_1_DF:
26106 case MULTI_ARG_1_SF2:
26107 case MULTI_ARG_1_DF2:
26108 case MULTI_ARG_1_DI:
26109 case MULTI_ARG_1_SI:
26110 case MULTI_ARG_1_HI:
26111 case MULTI_ARG_1_QI:
26112 case MULTI_ARG_1_SI_DI:
26113 case MULTI_ARG_1_HI_DI:
26114 case MULTI_ARG_1_HI_SI:
26115 case MULTI_ARG_1_QI_DI:
26116 case MULTI_ARG_1_QI_SI:
26117 case MULTI_ARG_1_QI_HI:
26118 nargs = 1;
26119 break;
26121 case MULTI_ARG_2_DI_CMP:
26122 case MULTI_ARG_2_SI_CMP:
26123 case MULTI_ARG_2_HI_CMP:
26124 case MULTI_ARG_2_QI_CMP:
26125 nargs = 2;
26126 comparison_p = true;
26127 break;
26129 case MULTI_ARG_2_SF_TF:
26130 case MULTI_ARG_2_DF_TF:
26131 case MULTI_ARG_2_DI_TF:
26132 case MULTI_ARG_2_SI_TF:
26133 case MULTI_ARG_2_HI_TF:
26134 case MULTI_ARG_2_QI_TF:
26135 nargs = 2;
26136 tf_p = true;
26137 break;
26139 default:
26140 gcc_unreachable ();
26143 if (optimize || !target
26144 || GET_MODE (target) != tmode
26145 || !insn_data[icode].operand[0].predicate (target, tmode))
26146 target = gen_reg_rtx (tmode);
26148 gcc_assert (nargs <= 4);
26150 for (i = 0; i < nargs; i++)
26152 tree arg = CALL_EXPR_ARG (exp, i);
26153 rtx op = expand_normal (arg);
26154 int adjust = (comparison_p) ? 1 : 0;
26155 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
26157 if (last_arg_constant && i == nargs-1)
26159 if (!CONST_INT_P (op))
26161 error ("last argument must be an immediate");
26162 return gen_reg_rtx (tmode);
26165 else
26167 if (VECTOR_MODE_P (mode))
26168 op = safe_vector_operand (op, mode);
26170 /* If we aren't optimizing, only allow one memory operand to be
26171 generated. */
26172 if (memory_operand (op, mode))
26173 num_memory++;
26175 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
26177 if (optimize
26178 || !insn_data[icode].operand[i+adjust+1].predicate (op, mode)
26179 || num_memory > 1)
26180 op = force_reg (mode, op);
26183 args[i].op = op;
26184 args[i].mode = mode;
26187 switch (nargs)
26189 case 1:
26190 pat = GEN_FCN (icode) (target, args[0].op);
26191 break;
26193 case 2:
26194 if (tf_p)
26195 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
26196 GEN_INT ((int)sub_code));
26197 else if (! comparison_p)
26198 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
26199 else
26201 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
26202 args[0].op,
26203 args[1].op);
26205 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
26207 break;
26209 case 3:
26210 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
26211 break;
26213 case 4:
26214 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op);
26215 break;
26217 default:
26218 gcc_unreachable ();
26221 if (! pat)
26222 return 0;
26224 emit_insn (pat);
26225 return target;
26228 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
26229 insns with vec_merge. */
26231 static rtx
26232 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
26233 rtx target)
26235 rtx pat;
26236 tree arg0 = CALL_EXPR_ARG (exp, 0);
26237 rtx op1, op0 = expand_normal (arg0);
26238 enum machine_mode tmode = insn_data[icode].operand[0].mode;
26239 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
26241 if (optimize || !target
26242 || GET_MODE (target) != tmode
26243 || !insn_data[icode].operand[0].predicate (target, tmode))
26244 target = gen_reg_rtx (tmode);
26246 if (VECTOR_MODE_P (mode0))
26247 op0 = safe_vector_operand (op0, mode0);
26249 if ((optimize && !register_operand (op0, mode0))
26250 || !insn_data[icode].operand[1].predicate (op0, mode0))
26251 op0 = copy_to_mode_reg (mode0, op0);
26253 op1 = op0;
26254 if (!insn_data[icode].operand[2].predicate (op1, mode0))
26255 op1 = copy_to_mode_reg (mode0, op1);
26257 pat = GEN_FCN (icode) (target, op0, op1);
26258 if (! pat)
26259 return 0;
26260 emit_insn (pat);
26261 return target;
26264 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
26266 static rtx
26267 ix86_expand_sse_compare (const struct builtin_description *d,
26268 tree exp, rtx target, bool swap)
26270 rtx pat;
26271 tree arg0 = CALL_EXPR_ARG (exp, 0);
26272 tree arg1 = CALL_EXPR_ARG (exp, 1);
26273 rtx op0 = expand_normal (arg0);
26274 rtx op1 = expand_normal (arg1);
26275 rtx op2;
26276 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
26277 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
26278 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
26279 enum rtx_code comparison = d->comparison;
26281 if (VECTOR_MODE_P (mode0))
26282 op0 = safe_vector_operand (op0, mode0);
26283 if (VECTOR_MODE_P (mode1))
26284 op1 = safe_vector_operand (op1, mode1);
26286 /* Swap operands if we have a comparison that isn't available in
26287 hardware. */
26288 if (swap)
26290 rtx tmp = gen_reg_rtx (mode1);
26291 emit_move_insn (tmp, op1);
26292 op1 = op0;
26293 op0 = tmp;
26296 if (optimize || !target
26297 || GET_MODE (target) != tmode
26298 || !insn_data[d->icode].operand[0].predicate (target, tmode))
26299 target = gen_reg_rtx (tmode);
26301 if ((optimize && !register_operand (op0, mode0))
26302 || !insn_data[d->icode].operand[1].predicate (op0, mode0))
26303 op0 = copy_to_mode_reg (mode0, op0);
26304 if ((optimize && !register_operand (op1, mode1))
26305 || !insn_data[d->icode].operand[2].predicate (op1, mode1))
26306 op1 = copy_to_mode_reg (mode1, op1);
26308 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
26309 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
26310 if (! pat)
26311 return 0;
26312 emit_insn (pat);
26313 return target;
26316 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
26318 static rtx
26319 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
26320 rtx target)
26322 rtx pat;
26323 tree arg0 = CALL_EXPR_ARG (exp, 0);
26324 tree arg1 = CALL_EXPR_ARG (exp, 1);
26325 rtx op0 = expand_normal (arg0);
26326 rtx op1 = expand_normal (arg1);
26327 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
26328 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
26329 enum rtx_code comparison = d->comparison;
26331 if (VECTOR_MODE_P (mode0))
26332 op0 = safe_vector_operand (op0, mode0);
26333 if (VECTOR_MODE_P (mode1))
26334 op1 = safe_vector_operand (op1, mode1);
26336 /* Swap operands if we have a comparison that isn't available in
26337 hardware. */
26338 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
26340 rtx tmp = op1;
26341 op1 = op0;
26342 op0 = tmp;
26345 target = gen_reg_rtx (SImode);
26346 emit_move_insn (target, const0_rtx);
26347 target = gen_rtx_SUBREG (QImode, target, 0);
26349 if ((optimize && !register_operand (op0, mode0))
26350 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
26351 op0 = copy_to_mode_reg (mode0, op0);
26352 if ((optimize && !register_operand (op1, mode1))
26353 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
26354 op1 = copy_to_mode_reg (mode1, op1);
26356 pat = GEN_FCN (d->icode) (op0, op1);
26357 if (! pat)
26358 return 0;
26359 emit_insn (pat);
26360 emit_insn (gen_rtx_SET (VOIDmode,
26361 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
26362 gen_rtx_fmt_ee (comparison, QImode,
26363 SET_DEST (pat),
26364 const0_rtx)));
26366 return SUBREG_REG (target);
26369 /* Subroutine of ix86_expand_args_builtin to take care of round insns. */
26371 static rtx
26372 ix86_expand_sse_round (const struct builtin_description *d, tree exp,
26373 rtx target)
26375 rtx pat;
26376 tree arg0 = CALL_EXPR_ARG (exp, 0);
26377 rtx op1, op0 = expand_normal (arg0);
26378 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
26379 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
26381 if (optimize || target == 0
26382 || GET_MODE (target) != tmode
26383 || !insn_data[d->icode].operand[0].predicate (target, tmode))
26384 target = gen_reg_rtx (tmode);
26386 if (VECTOR_MODE_P (mode0))
26387 op0 = safe_vector_operand (op0, mode0);
26389 if ((optimize && !register_operand (op0, mode0))
26390 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
26391 op0 = copy_to_mode_reg (mode0, op0);
26393 op1 = GEN_INT (d->comparison);
26395 pat = GEN_FCN (d->icode) (target, op0, op1);
26396 if (! pat)
26397 return 0;
26398 emit_insn (pat);
26399 return target;
26402 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
26404 static rtx
26405 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
26406 rtx target)
26408 rtx pat;
26409 tree arg0 = CALL_EXPR_ARG (exp, 0);
26410 tree arg1 = CALL_EXPR_ARG (exp, 1);
26411 rtx op0 = expand_normal (arg0);
26412 rtx op1 = expand_normal (arg1);
26413 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
26414 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
26415 enum rtx_code comparison = d->comparison;
26417 if (VECTOR_MODE_P (mode0))
26418 op0 = safe_vector_operand (op0, mode0);
26419 if (VECTOR_MODE_P (mode1))
26420 op1 = safe_vector_operand (op1, mode1);
26422 target = gen_reg_rtx (SImode);
26423 emit_move_insn (target, const0_rtx);
26424 target = gen_rtx_SUBREG (QImode, target, 0);
26426 if ((optimize && !register_operand (op0, mode0))
26427 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
26428 op0 = copy_to_mode_reg (mode0, op0);
26429 if ((optimize && !register_operand (op1, mode1))
26430 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
26431 op1 = copy_to_mode_reg (mode1, op1);
26433 pat = GEN_FCN (d->icode) (op0, op1);
26434 if (! pat)
26435 return 0;
26436 emit_insn (pat);
26437 emit_insn (gen_rtx_SET (VOIDmode,
26438 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
26439 gen_rtx_fmt_ee (comparison, QImode,
26440 SET_DEST (pat),
26441 const0_rtx)));
26443 return SUBREG_REG (target);
26446 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
26448 static rtx
26449 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
26450 tree exp, rtx target)
26452 rtx pat;
26453 tree arg0 = CALL_EXPR_ARG (exp, 0);
26454 tree arg1 = CALL_EXPR_ARG (exp, 1);
26455 tree arg2 = CALL_EXPR_ARG (exp, 2);
26456 tree arg3 = CALL_EXPR_ARG (exp, 3);
26457 tree arg4 = CALL_EXPR_ARG (exp, 4);
26458 rtx scratch0, scratch1;
26459 rtx op0 = expand_normal (arg0);
26460 rtx op1 = expand_normal (arg1);
26461 rtx op2 = expand_normal (arg2);
26462 rtx op3 = expand_normal (arg3);
26463 rtx op4 = expand_normal (arg4);
26464 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
26466 tmode0 = insn_data[d->icode].operand[0].mode;
26467 tmode1 = insn_data[d->icode].operand[1].mode;
26468 modev2 = insn_data[d->icode].operand[2].mode;
26469 modei3 = insn_data[d->icode].operand[3].mode;
26470 modev4 = insn_data[d->icode].operand[4].mode;
26471 modei5 = insn_data[d->icode].operand[5].mode;
26472 modeimm = insn_data[d->icode].operand[6].mode;
26474 if (VECTOR_MODE_P (modev2))
26475 op0 = safe_vector_operand (op0, modev2);
26476 if (VECTOR_MODE_P (modev4))
26477 op2 = safe_vector_operand (op2, modev4);
26479 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
26480 op0 = copy_to_mode_reg (modev2, op0);
26481 if (!insn_data[d->icode].operand[3].predicate (op1, modei3))
26482 op1 = copy_to_mode_reg (modei3, op1);
26483 if ((optimize && !register_operand (op2, modev4))
26484 || !insn_data[d->icode].operand[4].predicate (op2, modev4))
26485 op2 = copy_to_mode_reg (modev4, op2);
26486 if (!insn_data[d->icode].operand[5].predicate (op3, modei5))
26487 op3 = copy_to_mode_reg (modei5, op3);
26489 if (!insn_data[d->icode].operand[6].predicate (op4, modeimm))
26491 error ("the fifth argument must be a 8-bit immediate");
26492 return const0_rtx;
26495 if (d->code == IX86_BUILTIN_PCMPESTRI128)
26497 if (optimize || !target
26498 || GET_MODE (target) != tmode0
26499 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
26500 target = gen_reg_rtx (tmode0);
26502 scratch1 = gen_reg_rtx (tmode1);
26504 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
26506 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
26508 if (optimize || !target
26509 || GET_MODE (target) != tmode1
26510 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
26511 target = gen_reg_rtx (tmode1);
26513 scratch0 = gen_reg_rtx (tmode0);
26515 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
26517 else
26519 gcc_assert (d->flag);
26521 scratch0 = gen_reg_rtx (tmode0);
26522 scratch1 = gen_reg_rtx (tmode1);
26524 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
26527 if (! pat)
26528 return 0;
26530 emit_insn (pat);
26532 if (d->flag)
26534 target = gen_reg_rtx (SImode);
26535 emit_move_insn (target, const0_rtx);
26536 target = gen_rtx_SUBREG (QImode, target, 0);
26538 emit_insn
26539 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
26540 gen_rtx_fmt_ee (EQ, QImode,
26541 gen_rtx_REG ((enum machine_mode) d->flag,
26542 FLAGS_REG),
26543 const0_rtx)));
26544 return SUBREG_REG (target);
26546 else
26547 return target;
26551 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
26553 static rtx
26554 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
26555 tree exp, rtx target)
26557 rtx pat;
26558 tree arg0 = CALL_EXPR_ARG (exp, 0);
26559 tree arg1 = CALL_EXPR_ARG (exp, 1);
26560 tree arg2 = CALL_EXPR_ARG (exp, 2);
26561 rtx scratch0, scratch1;
26562 rtx op0 = expand_normal (arg0);
26563 rtx op1 = expand_normal (arg1);
26564 rtx op2 = expand_normal (arg2);
26565 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
26567 tmode0 = insn_data[d->icode].operand[0].mode;
26568 tmode1 = insn_data[d->icode].operand[1].mode;
26569 modev2 = insn_data[d->icode].operand[2].mode;
26570 modev3 = insn_data[d->icode].operand[3].mode;
26571 modeimm = insn_data[d->icode].operand[4].mode;
26573 if (VECTOR_MODE_P (modev2))
26574 op0 = safe_vector_operand (op0, modev2);
26575 if (VECTOR_MODE_P (modev3))
26576 op1 = safe_vector_operand (op1, modev3);
26578 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
26579 op0 = copy_to_mode_reg (modev2, op0);
26580 if ((optimize && !register_operand (op1, modev3))
26581 || !insn_data[d->icode].operand[3].predicate (op1, modev3))
26582 op1 = copy_to_mode_reg (modev3, op1);
26584 if (!insn_data[d->icode].operand[4].predicate (op2, modeimm))
26586 error ("the third argument must be a 8-bit immediate");
26587 return const0_rtx;
26590 if (d->code == IX86_BUILTIN_PCMPISTRI128)
26592 if (optimize || !target
26593 || GET_MODE (target) != tmode0
26594 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
26595 target = gen_reg_rtx (tmode0);
26597 scratch1 = gen_reg_rtx (tmode1);
26599 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
26601 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
26603 if (optimize || !target
26604 || GET_MODE (target) != tmode1
26605 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
26606 target = gen_reg_rtx (tmode1);
26608 scratch0 = gen_reg_rtx (tmode0);
26610 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
26612 else
26614 gcc_assert (d->flag);
26616 scratch0 = gen_reg_rtx (tmode0);
26617 scratch1 = gen_reg_rtx (tmode1);
26619 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
26622 if (! pat)
26623 return 0;
26625 emit_insn (pat);
26627 if (d->flag)
26629 target = gen_reg_rtx (SImode);
26630 emit_move_insn (target, const0_rtx);
26631 target = gen_rtx_SUBREG (QImode, target, 0);
26633 emit_insn
26634 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
26635 gen_rtx_fmt_ee (EQ, QImode,
26636 gen_rtx_REG ((enum machine_mode) d->flag,
26637 FLAGS_REG),
26638 const0_rtx)));
26639 return SUBREG_REG (target);
26641 else
26642 return target;
26645 /* Subroutine of ix86_expand_builtin to take care of insns with
26646 variable number of operands. */
26648 static rtx
26649 ix86_expand_args_builtin (const struct builtin_description *d,
26650 tree exp, rtx target)
26652 rtx pat, real_target;
26653 unsigned int i, nargs;
26654 unsigned int nargs_constant = 0;
26655 int num_memory = 0;
26656 struct
26658 rtx op;
26659 enum machine_mode mode;
26660 } args[4];
26661 bool last_arg_count = false;
26662 enum insn_code icode = d->icode;
26663 const struct insn_data_d *insn_p = &insn_data[icode];
26664 enum machine_mode tmode = insn_p->operand[0].mode;
26665 enum machine_mode rmode = VOIDmode;
26666 bool swap = false;
26667 enum rtx_code comparison = d->comparison;
26669 switch ((enum ix86_builtin_func_type) d->flag)
26671 case V2DF_FTYPE_V2DF_ROUND:
26672 case V4DF_FTYPE_V4DF_ROUND:
26673 case V4SF_FTYPE_V4SF_ROUND:
26674 case V8SF_FTYPE_V8SF_ROUND:
26675 return ix86_expand_sse_round (d, exp, target);
26676 case INT_FTYPE_V8SF_V8SF_PTEST:
26677 case INT_FTYPE_V4DI_V4DI_PTEST:
26678 case INT_FTYPE_V4DF_V4DF_PTEST:
26679 case INT_FTYPE_V4SF_V4SF_PTEST:
26680 case INT_FTYPE_V2DI_V2DI_PTEST:
26681 case INT_FTYPE_V2DF_V2DF_PTEST:
26682 return ix86_expand_sse_ptest (d, exp, target);
26683 case FLOAT128_FTYPE_FLOAT128:
26684 case FLOAT_FTYPE_FLOAT:
26685 case INT_FTYPE_INT:
26686 case UINT64_FTYPE_INT:
26687 case UINT16_FTYPE_UINT16:
26688 case INT64_FTYPE_INT64:
26689 case INT64_FTYPE_V4SF:
26690 case INT64_FTYPE_V2DF:
26691 case INT_FTYPE_V16QI:
26692 case INT_FTYPE_V8QI:
26693 case INT_FTYPE_V8SF:
26694 case INT_FTYPE_V4DF:
26695 case INT_FTYPE_V4SF:
26696 case INT_FTYPE_V2DF:
26697 case V16QI_FTYPE_V16QI:
26698 case V8SI_FTYPE_V8SF:
26699 case V8SI_FTYPE_V4SI:
26700 case V8HI_FTYPE_V8HI:
26701 case V8HI_FTYPE_V16QI:
26702 case V8QI_FTYPE_V8QI:
26703 case V8SF_FTYPE_V8SF:
26704 case V8SF_FTYPE_V8SI:
26705 case V8SF_FTYPE_V4SF:
26706 case V8SF_FTYPE_V8HI:
26707 case V4SI_FTYPE_V4SI:
26708 case V4SI_FTYPE_V16QI:
26709 case V4SI_FTYPE_V4SF:
26710 case V4SI_FTYPE_V8SI:
26711 case V4SI_FTYPE_V8HI:
26712 case V4SI_FTYPE_V4DF:
26713 case V4SI_FTYPE_V2DF:
26714 case V4HI_FTYPE_V4HI:
26715 case V4DF_FTYPE_V4DF:
26716 case V4DF_FTYPE_V4SI:
26717 case V4DF_FTYPE_V4SF:
26718 case V4DF_FTYPE_V2DF:
26719 case V4SF_FTYPE_V4SF:
26720 case V4SF_FTYPE_V4SI:
26721 case V4SF_FTYPE_V8SF:
26722 case V4SF_FTYPE_V4DF:
26723 case V4SF_FTYPE_V8HI:
26724 case V4SF_FTYPE_V2DF:
26725 case V2DI_FTYPE_V2DI:
26726 case V2DI_FTYPE_V16QI:
26727 case V2DI_FTYPE_V8HI:
26728 case V2DI_FTYPE_V4SI:
26729 case V2DF_FTYPE_V2DF:
26730 case V2DF_FTYPE_V4SI:
26731 case V2DF_FTYPE_V4DF:
26732 case V2DF_FTYPE_V4SF:
26733 case V2DF_FTYPE_V2SI:
26734 case V2SI_FTYPE_V2SI:
26735 case V2SI_FTYPE_V4SF:
26736 case V2SI_FTYPE_V2SF:
26737 case V2SI_FTYPE_V2DF:
26738 case V2SF_FTYPE_V2SF:
26739 case V2SF_FTYPE_V2SI:
26740 nargs = 1;
26741 break;
26742 case V4SF_FTYPE_V4SF_VEC_MERGE:
26743 case V2DF_FTYPE_V2DF_VEC_MERGE:
26744 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
26745 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
26746 case V16QI_FTYPE_V16QI_V16QI:
26747 case V16QI_FTYPE_V8HI_V8HI:
26748 case V8QI_FTYPE_V8QI_V8QI:
26749 case V8QI_FTYPE_V4HI_V4HI:
26750 case V8HI_FTYPE_V8HI_V8HI:
26751 case V8HI_FTYPE_V16QI_V16QI:
26752 case V8HI_FTYPE_V4SI_V4SI:
26753 case V8SF_FTYPE_V8SF_V8SF:
26754 case V8SF_FTYPE_V8SF_V8SI:
26755 case V4SI_FTYPE_V4SI_V4SI:
26756 case V4SI_FTYPE_V8HI_V8HI:
26757 case V4SI_FTYPE_V4SF_V4SF:
26758 case V4SI_FTYPE_V2DF_V2DF:
26759 case V4HI_FTYPE_V4HI_V4HI:
26760 case V4HI_FTYPE_V8QI_V8QI:
26761 case V4HI_FTYPE_V2SI_V2SI:
26762 case V4DF_FTYPE_V4DF_V4DF:
26763 case V4DF_FTYPE_V4DF_V4DI:
26764 case V4SF_FTYPE_V4SF_V4SF:
26765 case V4SF_FTYPE_V4SF_V4SI:
26766 case V4SF_FTYPE_V4SF_V2SI:
26767 case V4SF_FTYPE_V4SF_V2DF:
26768 case V4SF_FTYPE_V4SF_DI:
26769 case V4SF_FTYPE_V4SF_SI:
26770 case V2DI_FTYPE_V2DI_V2DI:
26771 case V2DI_FTYPE_V16QI_V16QI:
26772 case V2DI_FTYPE_V4SI_V4SI:
26773 case V2DI_FTYPE_V2DI_V16QI:
26774 case V2DI_FTYPE_V2DF_V2DF:
26775 case V2SI_FTYPE_V2SI_V2SI:
26776 case V2SI_FTYPE_V4HI_V4HI:
26777 case V2SI_FTYPE_V2SF_V2SF:
26778 case V2DF_FTYPE_V2DF_V2DF:
26779 case V2DF_FTYPE_V2DF_V4SF:
26780 case V2DF_FTYPE_V2DF_V2DI:
26781 case V2DF_FTYPE_V2DF_DI:
26782 case V2DF_FTYPE_V2DF_SI:
26783 case V2SF_FTYPE_V2SF_V2SF:
26784 case V1DI_FTYPE_V1DI_V1DI:
26785 case V1DI_FTYPE_V8QI_V8QI:
26786 case V1DI_FTYPE_V2SI_V2SI:
26787 if (comparison == UNKNOWN)
26788 return ix86_expand_binop_builtin (icode, exp, target);
26789 nargs = 2;
26790 break;
26791 case V4SF_FTYPE_V4SF_V4SF_SWAP:
26792 case V2DF_FTYPE_V2DF_V2DF_SWAP:
26793 gcc_assert (comparison != UNKNOWN);
26794 nargs = 2;
26795 swap = true;
26796 break;
26797 case V8HI_FTYPE_V8HI_V8HI_COUNT:
26798 case V8HI_FTYPE_V8HI_SI_COUNT:
26799 case V4SI_FTYPE_V4SI_V4SI_COUNT:
26800 case V4SI_FTYPE_V4SI_SI_COUNT:
26801 case V4HI_FTYPE_V4HI_V4HI_COUNT:
26802 case V4HI_FTYPE_V4HI_SI_COUNT:
26803 case V2DI_FTYPE_V2DI_V2DI_COUNT:
26804 case V2DI_FTYPE_V2DI_SI_COUNT:
26805 case V2SI_FTYPE_V2SI_V2SI_COUNT:
26806 case V2SI_FTYPE_V2SI_SI_COUNT:
26807 case V1DI_FTYPE_V1DI_V1DI_COUNT:
26808 case V1DI_FTYPE_V1DI_SI_COUNT:
26809 nargs = 2;
26810 last_arg_count = true;
26811 break;
26812 case UINT64_FTYPE_UINT64_UINT64:
26813 case UINT_FTYPE_UINT_UINT:
26814 case UINT_FTYPE_UINT_USHORT:
26815 case UINT_FTYPE_UINT_UCHAR:
26816 case UINT16_FTYPE_UINT16_INT:
26817 case UINT8_FTYPE_UINT8_INT:
26818 nargs = 2;
26819 break;
26820 case V2DI_FTYPE_V2DI_INT_CONVERT:
26821 nargs = 2;
26822 rmode = V1TImode;
26823 nargs_constant = 1;
26824 break;
26825 case V8HI_FTYPE_V8HI_INT:
26826 case V8HI_FTYPE_V8SF_INT:
26827 case V8HI_FTYPE_V4SF_INT:
26828 case V8SF_FTYPE_V8SF_INT:
26829 case V4SI_FTYPE_V4SI_INT:
26830 case V4SI_FTYPE_V8SI_INT:
26831 case V4HI_FTYPE_V4HI_INT:
26832 case V4DF_FTYPE_V4DF_INT:
26833 case V4SF_FTYPE_V4SF_INT:
26834 case V4SF_FTYPE_V8SF_INT:
26835 case V2DI_FTYPE_V2DI_INT:
26836 case V2DF_FTYPE_V2DF_INT:
26837 case V2DF_FTYPE_V4DF_INT:
26838 nargs = 2;
26839 nargs_constant = 1;
26840 break;
26841 case V16QI_FTYPE_V16QI_V16QI_V16QI:
26842 case V8SF_FTYPE_V8SF_V8SF_V8SF:
26843 case V4DF_FTYPE_V4DF_V4DF_V4DF:
26844 case V4SF_FTYPE_V4SF_V4SF_V4SF:
26845 case V2DF_FTYPE_V2DF_V2DF_V2DF:
26846 nargs = 3;
26847 break;
26848 case V16QI_FTYPE_V16QI_V16QI_INT:
26849 case V8HI_FTYPE_V8HI_V8HI_INT:
26850 case V8SI_FTYPE_V8SI_V8SI_INT:
26851 case V8SI_FTYPE_V8SI_V4SI_INT:
26852 case V8SF_FTYPE_V8SF_V8SF_INT:
26853 case V8SF_FTYPE_V8SF_V4SF_INT:
26854 case V4SI_FTYPE_V4SI_V4SI_INT:
26855 case V4DF_FTYPE_V4DF_V4DF_INT:
26856 case V4DF_FTYPE_V4DF_V2DF_INT:
26857 case V4SF_FTYPE_V4SF_V4SF_INT:
26858 case V2DI_FTYPE_V2DI_V2DI_INT:
26859 case V2DF_FTYPE_V2DF_V2DF_INT:
26860 nargs = 3;
26861 nargs_constant = 1;
26862 break;
26863 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT:
26864 nargs = 3;
26865 rmode = V2DImode;
26866 nargs_constant = 1;
26867 break;
26868 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT:
26869 nargs = 3;
26870 rmode = DImode;
26871 nargs_constant = 1;
26872 break;
26873 case V2DI_FTYPE_V2DI_UINT_UINT:
26874 nargs = 3;
26875 nargs_constant = 2;
26876 break;
26877 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
26878 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
26879 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
26880 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
26881 nargs = 4;
26882 nargs_constant = 1;
26883 break;
26884 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
26885 nargs = 4;
26886 nargs_constant = 2;
26887 break;
26888 default:
26889 gcc_unreachable ();
26892 gcc_assert (nargs <= ARRAY_SIZE (args));
26894 if (comparison != UNKNOWN)
26896 gcc_assert (nargs == 2);
26897 return ix86_expand_sse_compare (d, exp, target, swap);
26900 if (rmode == VOIDmode || rmode == tmode)
26902 if (optimize
26903 || target == 0
26904 || GET_MODE (target) != tmode
26905 || !insn_p->operand[0].predicate (target, tmode))
26906 target = gen_reg_rtx (tmode);
26907 real_target = target;
26909 else
26911 target = gen_reg_rtx (rmode);
26912 real_target = simplify_gen_subreg (tmode, target, rmode, 0);
26915 for (i = 0; i < nargs; i++)
26917 tree arg = CALL_EXPR_ARG (exp, i);
26918 rtx op = expand_normal (arg);
26919 enum machine_mode mode = insn_p->operand[i + 1].mode;
26920 bool match = insn_p->operand[i + 1].predicate (op, mode);
26922 if (last_arg_count && (i + 1) == nargs)
26924 /* SIMD shift insns take either an 8-bit immediate or
26925 register as count. But builtin functions take int as
26926 count. If count doesn't match, we put it in register. */
26927 if (!match)
26929 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
26930 if (!insn_p->operand[i + 1].predicate (op, mode))
26931 op = copy_to_reg (op);
26934 else if ((nargs - i) <= nargs_constant)
26936 if (!match)
26937 switch (icode)
26939 case CODE_FOR_sse4_1_roundpd:
26940 case CODE_FOR_sse4_1_roundps:
26941 case CODE_FOR_sse4_1_roundsd:
26942 case CODE_FOR_sse4_1_roundss:
26943 case CODE_FOR_sse4_1_blendps:
26944 case CODE_FOR_avx_blendpd256:
26945 case CODE_FOR_avx_vpermilv4df:
26946 case CODE_FOR_avx_roundpd256:
26947 case CODE_FOR_avx_roundps256:
26948 error ("the last argument must be a 4-bit immediate");
26949 return const0_rtx;
26951 case CODE_FOR_sse4_1_blendpd:
26952 case CODE_FOR_avx_vpermilv2df:
26953 case CODE_FOR_xop_vpermil2v2df3:
26954 case CODE_FOR_xop_vpermil2v4sf3:
26955 case CODE_FOR_xop_vpermil2v4df3:
26956 case CODE_FOR_xop_vpermil2v8sf3:
26957 error ("the last argument must be a 2-bit immediate");
26958 return const0_rtx;
26960 case CODE_FOR_avx_vextractf128v4df:
26961 case CODE_FOR_avx_vextractf128v8sf:
26962 case CODE_FOR_avx_vextractf128v8si:
26963 case CODE_FOR_avx_vinsertf128v4df:
26964 case CODE_FOR_avx_vinsertf128v8sf:
26965 case CODE_FOR_avx_vinsertf128v8si:
26966 error ("the last argument must be a 1-bit immediate");
26967 return const0_rtx;
26969 case CODE_FOR_avx_vmcmpv2df3:
26970 case CODE_FOR_avx_vmcmpv4sf3:
26971 case CODE_FOR_avx_cmpv2df3:
26972 case CODE_FOR_avx_cmpv4sf3:
26973 case CODE_FOR_avx_cmpv4df3:
26974 case CODE_FOR_avx_cmpv8sf3:
26975 error ("the last argument must be a 5-bit immediate");
26976 return const0_rtx;
26978 default:
26979 switch (nargs_constant)
26981 case 2:
26982 if ((nargs - i) == nargs_constant)
26984 error ("the next to last argument must be an 8-bit immediate");
26985 break;
26987 case 1:
26988 error ("the last argument must be an 8-bit immediate");
26989 break;
26990 default:
26991 gcc_unreachable ();
26993 return const0_rtx;
26996 else
26998 if (VECTOR_MODE_P (mode))
26999 op = safe_vector_operand (op, mode);
27001 /* If we aren't optimizing, only allow one memory operand to
27002 be generated. */
27003 if (memory_operand (op, mode))
27004 num_memory++;
27006 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
27008 if (optimize || !match || num_memory > 1)
27009 op = copy_to_mode_reg (mode, op);
27011 else
27013 op = copy_to_reg (op);
27014 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
27018 args[i].op = op;
27019 args[i].mode = mode;
27022 switch (nargs)
27024 case 1:
27025 pat = GEN_FCN (icode) (real_target, args[0].op);
27026 break;
27027 case 2:
27028 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
27029 break;
27030 case 3:
27031 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
27032 args[2].op);
27033 break;
27034 case 4:
27035 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
27036 args[2].op, args[3].op);
27037 break;
27038 default:
27039 gcc_unreachable ();
27042 if (! pat)
27043 return 0;
27045 emit_insn (pat);
27046 return target;
27049 /* Subroutine of ix86_expand_builtin to take care of special insns
27050 with variable number of operands. */
27052 static rtx
27053 ix86_expand_special_args_builtin (const struct builtin_description *d,
27054 tree exp, rtx target)
27056 tree arg;
27057 rtx pat, op;
27058 unsigned int i, nargs, arg_adjust, memory;
27059 struct
27061 rtx op;
27062 enum machine_mode mode;
27063 } args[3];
27064 enum insn_code icode = d->icode;
27065 bool last_arg_constant = false;
27066 const struct insn_data_d *insn_p = &insn_data[icode];
27067 enum machine_mode tmode = insn_p->operand[0].mode;
27068 enum { load, store } klass;
27070 switch ((enum ix86_builtin_func_type) d->flag)
27072 case VOID_FTYPE_VOID:
27073 if (icode == CODE_FOR_avx_vzeroupper)
27074 target = GEN_INT (vzeroupper_intrinsic);
27075 emit_insn (GEN_FCN (icode) (target));
27076 return 0;
27077 case VOID_FTYPE_UINT64:
27078 case VOID_FTYPE_UNSIGNED:
27079 nargs = 0;
27080 klass = store;
27081 memory = 0;
27082 break;
27083 break;
27084 case UINT64_FTYPE_VOID:
27085 case UNSIGNED_FTYPE_VOID:
27086 nargs = 0;
27087 klass = load;
27088 memory = 0;
27089 break;
27090 case UINT64_FTYPE_PUNSIGNED:
27091 case V2DI_FTYPE_PV2DI:
27092 case V32QI_FTYPE_PCCHAR:
27093 case V16QI_FTYPE_PCCHAR:
27094 case V8SF_FTYPE_PCV4SF:
27095 case V8SF_FTYPE_PCFLOAT:
27096 case V4SF_FTYPE_PCFLOAT:
27097 case V4DF_FTYPE_PCV2DF:
27098 case V4DF_FTYPE_PCDOUBLE:
27099 case V2DF_FTYPE_PCDOUBLE:
27100 case VOID_FTYPE_PVOID:
27101 nargs = 1;
27102 klass = load;
27103 memory = 0;
27104 break;
27105 case VOID_FTYPE_PV2SF_V4SF:
27106 case VOID_FTYPE_PV4DI_V4DI:
27107 case VOID_FTYPE_PV2DI_V2DI:
27108 case VOID_FTYPE_PCHAR_V32QI:
27109 case VOID_FTYPE_PCHAR_V16QI:
27110 case VOID_FTYPE_PFLOAT_V8SF:
27111 case VOID_FTYPE_PFLOAT_V4SF:
27112 case VOID_FTYPE_PDOUBLE_V4DF:
27113 case VOID_FTYPE_PDOUBLE_V2DF:
27114 case VOID_FTYPE_PULONGLONG_ULONGLONG:
27115 case VOID_FTYPE_PINT_INT:
27116 nargs = 1;
27117 klass = store;
27118 /* Reserve memory operand for target. */
27119 memory = ARRAY_SIZE (args);
27120 break;
27121 case V4SF_FTYPE_V4SF_PCV2SF:
27122 case V2DF_FTYPE_V2DF_PCDOUBLE:
27123 nargs = 2;
27124 klass = load;
27125 memory = 1;
27126 break;
27127 case V8SF_FTYPE_PCV8SF_V8SI:
27128 case V4DF_FTYPE_PCV4DF_V4DI:
27129 case V4SF_FTYPE_PCV4SF_V4SI:
27130 case V2DF_FTYPE_PCV2DF_V2DI:
27131 nargs = 2;
27132 klass = load;
27133 memory = 0;
27134 break;
27135 case VOID_FTYPE_PV8SF_V8SI_V8SF:
27136 case VOID_FTYPE_PV4DF_V4DI_V4DF:
27137 case VOID_FTYPE_PV4SF_V4SI_V4SF:
27138 case VOID_FTYPE_PV2DF_V2DI_V2DF:
27139 nargs = 2;
27140 klass = store;
27141 /* Reserve memory operand for target. */
27142 memory = ARRAY_SIZE (args);
27143 break;
27144 case VOID_FTYPE_UINT_UINT_UINT:
27145 case VOID_FTYPE_UINT64_UINT_UINT:
27146 case UCHAR_FTYPE_UINT_UINT_UINT:
27147 case UCHAR_FTYPE_UINT64_UINT_UINT:
27148 nargs = 3;
27149 klass = load;
27150 memory = ARRAY_SIZE (args);
27151 last_arg_constant = true;
27152 break;
27153 default:
27154 gcc_unreachable ();
27157 gcc_assert (nargs <= ARRAY_SIZE (args));
27159 if (klass == store)
27161 arg = CALL_EXPR_ARG (exp, 0);
27162 op = expand_normal (arg);
27163 gcc_assert (target == 0);
27164 if (memory)
27165 target = gen_rtx_MEM (tmode, copy_to_mode_reg (Pmode, op));
27166 else
27167 target = force_reg (tmode, op);
27168 arg_adjust = 1;
27170 else
27172 arg_adjust = 0;
27173 if (optimize
27174 || target == 0
27175 || GET_MODE (target) != tmode
27176 || !insn_p->operand[0].predicate (target, tmode))
27177 target = gen_reg_rtx (tmode);
27180 for (i = 0; i < nargs; i++)
27182 enum machine_mode mode = insn_p->operand[i + 1].mode;
27183 bool match;
27185 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
27186 op = expand_normal (arg);
27187 match = insn_p->operand[i + 1].predicate (op, mode);
27189 if (last_arg_constant && (i + 1) == nargs)
27191 if (!match)
27193 if (icode == CODE_FOR_lwp_lwpvalsi3
27194 || icode == CODE_FOR_lwp_lwpinssi3
27195 || icode == CODE_FOR_lwp_lwpvaldi3
27196 || icode == CODE_FOR_lwp_lwpinsdi3)
27197 error ("the last argument must be a 32-bit immediate");
27198 else
27199 error ("the last argument must be an 8-bit immediate");
27200 return const0_rtx;
27203 else
27205 if (i == memory)
27207 /* This must be the memory operand. */
27208 op = gen_rtx_MEM (mode, copy_to_mode_reg (Pmode, op));
27209 gcc_assert (GET_MODE (op) == mode
27210 || GET_MODE (op) == VOIDmode);
27212 else
27214 /* This must be register. */
27215 if (VECTOR_MODE_P (mode))
27216 op = safe_vector_operand (op, mode);
27218 gcc_assert (GET_MODE (op) == mode
27219 || GET_MODE (op) == VOIDmode);
27220 op = copy_to_mode_reg (mode, op);
27224 args[i].op = op;
27225 args[i].mode = mode;
27228 switch (nargs)
27230 case 0:
27231 pat = GEN_FCN (icode) (target);
27232 break;
27233 case 1:
27234 pat = GEN_FCN (icode) (target, args[0].op);
27235 break;
27236 case 2:
27237 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
27238 break;
27239 case 3:
27240 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
27241 break;
27242 default:
27243 gcc_unreachable ();
27246 if (! pat)
27247 return 0;
27248 emit_insn (pat);
27249 return klass == store ? 0 : target;
27252 /* Return the integer constant in ARG. Constrain it to be in the range
27253 of the subparts of VEC_TYPE; issue an error if not. */
27255 static int
27256 get_element_number (tree vec_type, tree arg)
27258 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
27260 if (!host_integerp (arg, 1)
27261 || (elt = tree_low_cst (arg, 1), elt > max))
27263 error ("selector must be an integer constant in the range 0..%wi", max);
27264 return 0;
27267 return elt;
27270 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
27271 ix86_expand_vector_init. We DO have language-level syntax for this, in
27272 the form of (type){ init-list }. Except that since we can't place emms
27273 instructions from inside the compiler, we can't allow the use of MMX
27274 registers unless the user explicitly asks for it. So we do *not* define
27275 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
27276 we have builtins invoked by mmintrin.h that gives us license to emit
27277 these sorts of instructions. */
27279 static rtx
27280 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
27282 enum machine_mode tmode = TYPE_MODE (type);
27283 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
27284 int i, n_elt = GET_MODE_NUNITS (tmode);
27285 rtvec v = rtvec_alloc (n_elt);
27287 gcc_assert (VECTOR_MODE_P (tmode));
27288 gcc_assert (call_expr_nargs (exp) == n_elt);
27290 for (i = 0; i < n_elt; ++i)
27292 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
27293 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
27296 if (!target || !register_operand (target, tmode))
27297 target = gen_reg_rtx (tmode);
27299 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
27300 return target;
27303 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
27304 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
27305 had a language-level syntax for referencing vector elements. */
27307 static rtx
27308 ix86_expand_vec_ext_builtin (tree exp, rtx target)
27310 enum machine_mode tmode, mode0;
27311 tree arg0, arg1;
27312 int elt;
27313 rtx op0;
27315 arg0 = CALL_EXPR_ARG (exp, 0);
27316 arg1 = CALL_EXPR_ARG (exp, 1);
27318 op0 = expand_normal (arg0);
27319 elt = get_element_number (TREE_TYPE (arg0), arg1);
27321 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
27322 mode0 = TYPE_MODE (TREE_TYPE (arg0));
27323 gcc_assert (VECTOR_MODE_P (mode0));
27325 op0 = force_reg (mode0, op0);
27327 if (optimize || !target || !register_operand (target, tmode))
27328 target = gen_reg_rtx (tmode);
27330 ix86_expand_vector_extract (true, target, op0, elt);
27332 return target;
27335 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
27336 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
27337 a language-level syntax for referencing vector elements. */
27339 static rtx
27340 ix86_expand_vec_set_builtin (tree exp)
27342 enum machine_mode tmode, mode1;
27343 tree arg0, arg1, arg2;
27344 int elt;
27345 rtx op0, op1, target;
27347 arg0 = CALL_EXPR_ARG (exp, 0);
27348 arg1 = CALL_EXPR_ARG (exp, 1);
27349 arg2 = CALL_EXPR_ARG (exp, 2);
27351 tmode = TYPE_MODE (TREE_TYPE (arg0));
27352 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
27353 gcc_assert (VECTOR_MODE_P (tmode));
27355 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
27356 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
27357 elt = get_element_number (TREE_TYPE (arg0), arg2);
27359 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
27360 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
27362 op0 = force_reg (tmode, op0);
27363 op1 = force_reg (mode1, op1);
27365 /* OP0 is the source of these builtin functions and shouldn't be
27366 modified. Create a copy, use it and return it as target. */
27367 target = gen_reg_rtx (tmode);
27368 emit_move_insn (target, op0);
27369 ix86_expand_vector_set (true, target, op1, elt);
27371 return target;
27374 /* Expand an expression EXP that calls a built-in function,
27375 with result going to TARGET if that's convenient
27376 (and in mode MODE if that's convenient).
27377 SUBTARGET may be used as the target for computing one of EXP's operands.
27378 IGNORE is nonzero if the value is to be ignored. */
27380 static rtx
27381 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
27382 enum machine_mode mode ATTRIBUTE_UNUSED,
27383 int ignore ATTRIBUTE_UNUSED)
27385 const struct builtin_description *d;
27386 size_t i;
27387 enum insn_code icode;
27388 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
27389 tree arg0, arg1, arg2;
27390 rtx op0, op1, op2, pat;
27391 enum machine_mode mode0, mode1, mode2;
27392 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
27394 /* Determine whether the builtin function is available under the current ISA.
27395 Originally the builtin was not created if it wasn't applicable to the
27396 current ISA based on the command line switches. With function specific
27397 options, we need to check in the context of the function making the call
27398 whether it is supported. */
27399 if (ix86_builtins_isa[fcode].isa
27400 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
27402 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
27403 NULL, (enum fpmath_unit) 0, false);
27405 if (!opts)
27406 error ("%qE needs unknown isa option", fndecl);
27407 else
27409 gcc_assert (opts != NULL);
27410 error ("%qE needs isa option %s", fndecl, opts);
27411 free (opts);
27413 return const0_rtx;
27416 switch (fcode)
27418 case IX86_BUILTIN_MASKMOVQ:
27419 case IX86_BUILTIN_MASKMOVDQU:
27420 icode = (fcode == IX86_BUILTIN_MASKMOVQ
27421 ? CODE_FOR_mmx_maskmovq
27422 : CODE_FOR_sse2_maskmovdqu);
27423 /* Note the arg order is different from the operand order. */
27424 arg1 = CALL_EXPR_ARG (exp, 0);
27425 arg2 = CALL_EXPR_ARG (exp, 1);
27426 arg0 = CALL_EXPR_ARG (exp, 2);
27427 op0 = expand_normal (arg0);
27428 op1 = expand_normal (arg1);
27429 op2 = expand_normal (arg2);
27430 mode0 = insn_data[icode].operand[0].mode;
27431 mode1 = insn_data[icode].operand[1].mode;
27432 mode2 = insn_data[icode].operand[2].mode;
27434 op0 = force_reg (Pmode, op0);
27435 op0 = gen_rtx_MEM (mode1, op0);
27437 if (!insn_data[icode].operand[0].predicate (op0, mode0))
27438 op0 = copy_to_mode_reg (mode0, op0);
27439 if (!insn_data[icode].operand[1].predicate (op1, mode1))
27440 op1 = copy_to_mode_reg (mode1, op1);
27441 if (!insn_data[icode].operand[2].predicate (op2, mode2))
27442 op2 = copy_to_mode_reg (mode2, op2);
27443 pat = GEN_FCN (icode) (op0, op1, op2);
27444 if (! pat)
27445 return 0;
27446 emit_insn (pat);
27447 return 0;
27449 case IX86_BUILTIN_LDMXCSR:
27450 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
27451 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
27452 emit_move_insn (target, op0);
27453 emit_insn (gen_sse_ldmxcsr (target));
27454 return 0;
27456 case IX86_BUILTIN_STMXCSR:
27457 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
27458 emit_insn (gen_sse_stmxcsr (target));
27459 return copy_to_mode_reg (SImode, target);
27461 case IX86_BUILTIN_CLFLUSH:
27462 arg0 = CALL_EXPR_ARG (exp, 0);
27463 op0 = expand_normal (arg0);
27464 icode = CODE_FOR_sse2_clflush;
27465 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
27466 op0 = copy_to_mode_reg (Pmode, op0);
27468 emit_insn (gen_sse2_clflush (op0));
27469 return 0;
27471 case IX86_BUILTIN_MONITOR:
27472 arg0 = CALL_EXPR_ARG (exp, 0);
27473 arg1 = CALL_EXPR_ARG (exp, 1);
27474 arg2 = CALL_EXPR_ARG (exp, 2);
27475 op0 = expand_normal (arg0);
27476 op1 = expand_normal (arg1);
27477 op2 = expand_normal (arg2);
27478 if (!REG_P (op0))
27479 op0 = copy_to_mode_reg (Pmode, op0);
27480 if (!REG_P (op1))
27481 op1 = copy_to_mode_reg (SImode, op1);
27482 if (!REG_P (op2))
27483 op2 = copy_to_mode_reg (SImode, op2);
27484 emit_insn (ix86_gen_monitor (op0, op1, op2));
27485 return 0;
27487 case IX86_BUILTIN_MWAIT:
27488 arg0 = CALL_EXPR_ARG (exp, 0);
27489 arg1 = CALL_EXPR_ARG (exp, 1);
27490 op0 = expand_normal (arg0);
27491 op1 = expand_normal (arg1);
27492 if (!REG_P (op0))
27493 op0 = copy_to_mode_reg (SImode, op0);
27494 if (!REG_P (op1))
27495 op1 = copy_to_mode_reg (SImode, op1);
27496 emit_insn (gen_sse3_mwait (op0, op1));
27497 return 0;
27499 case IX86_BUILTIN_VEC_INIT_V2SI:
27500 case IX86_BUILTIN_VEC_INIT_V4HI:
27501 case IX86_BUILTIN_VEC_INIT_V8QI:
27502 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
27504 case IX86_BUILTIN_VEC_EXT_V2DF:
27505 case IX86_BUILTIN_VEC_EXT_V2DI:
27506 case IX86_BUILTIN_VEC_EXT_V4SF:
27507 case IX86_BUILTIN_VEC_EXT_V4SI:
27508 case IX86_BUILTIN_VEC_EXT_V8HI:
27509 case IX86_BUILTIN_VEC_EXT_V2SI:
27510 case IX86_BUILTIN_VEC_EXT_V4HI:
27511 case IX86_BUILTIN_VEC_EXT_V16QI:
27512 return ix86_expand_vec_ext_builtin (exp, target);
27514 case IX86_BUILTIN_VEC_SET_V2DI:
27515 case IX86_BUILTIN_VEC_SET_V4SF:
27516 case IX86_BUILTIN_VEC_SET_V4SI:
27517 case IX86_BUILTIN_VEC_SET_V8HI:
27518 case IX86_BUILTIN_VEC_SET_V4HI:
27519 case IX86_BUILTIN_VEC_SET_V16QI:
27520 return ix86_expand_vec_set_builtin (exp);
27522 case IX86_BUILTIN_VEC_PERM_V2DF:
27523 case IX86_BUILTIN_VEC_PERM_V4SF:
27524 case IX86_BUILTIN_VEC_PERM_V2DI:
27525 case IX86_BUILTIN_VEC_PERM_V4SI:
27526 case IX86_BUILTIN_VEC_PERM_V8HI:
27527 case IX86_BUILTIN_VEC_PERM_V16QI:
27528 case IX86_BUILTIN_VEC_PERM_V2DI_U:
27529 case IX86_BUILTIN_VEC_PERM_V4SI_U:
27530 case IX86_BUILTIN_VEC_PERM_V8HI_U:
27531 case IX86_BUILTIN_VEC_PERM_V16QI_U:
27532 case IX86_BUILTIN_VEC_PERM_V4DF:
27533 case IX86_BUILTIN_VEC_PERM_V8SF:
27534 return ix86_expand_vec_perm_builtin (exp);
27536 case IX86_BUILTIN_INFQ:
27537 case IX86_BUILTIN_HUGE_VALQ:
27539 REAL_VALUE_TYPE inf;
27540 rtx tmp;
27542 real_inf (&inf);
27543 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
27545 tmp = validize_mem (force_const_mem (mode, tmp));
27547 if (target == 0)
27548 target = gen_reg_rtx (mode);
27550 emit_move_insn (target, tmp);
27551 return target;
27554 case IX86_BUILTIN_LLWPCB:
27555 arg0 = CALL_EXPR_ARG (exp, 0);
27556 op0 = expand_normal (arg0);
27557 icode = CODE_FOR_lwp_llwpcb;
27558 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
27559 op0 = copy_to_mode_reg (Pmode, op0);
27560 emit_insn (gen_lwp_llwpcb (op0));
27561 return 0;
27563 case IX86_BUILTIN_SLWPCB:
27564 icode = CODE_FOR_lwp_slwpcb;
27565 if (!target
27566 || !insn_data[icode].operand[0].predicate (target, Pmode))
27567 target = gen_reg_rtx (Pmode);
27568 emit_insn (gen_lwp_slwpcb (target));
27569 return target;
27571 case IX86_BUILTIN_BEXTRI32:
27572 case IX86_BUILTIN_BEXTRI64:
27573 arg0 = CALL_EXPR_ARG (exp, 0);
27574 arg1 = CALL_EXPR_ARG (exp, 1);
27575 op0 = expand_normal (arg0);
27576 op1 = expand_normal (arg1);
27577 icode = (fcode == IX86_BUILTIN_BEXTRI32
27578 ? CODE_FOR_tbm_bextri_si
27579 : CODE_FOR_tbm_bextri_di);
27580 if (!CONST_INT_P (op1))
27582 error ("last argument must be an immediate");
27583 return const0_rtx;
27585 else
27587 unsigned char length = (INTVAL (op1) >> 8) & 0xFF;
27588 unsigned char lsb_index = INTVAL (op1) & 0xFF;
27589 op1 = GEN_INT (length);
27590 op2 = GEN_INT (lsb_index);
27591 pat = GEN_FCN (icode) (target, op0, op1, op2);
27592 if (pat)
27593 emit_insn (pat);
27594 return target;
27597 case IX86_BUILTIN_RDRAND16_STEP:
27598 icode = CODE_FOR_rdrandhi_1;
27599 mode0 = HImode;
27600 goto rdrand_step;
27602 case IX86_BUILTIN_RDRAND32_STEP:
27603 icode = CODE_FOR_rdrandsi_1;
27604 mode0 = SImode;
27605 goto rdrand_step;
27607 case IX86_BUILTIN_RDRAND64_STEP:
27608 icode = CODE_FOR_rdranddi_1;
27609 mode0 = DImode;
27611 rdrand_step:
27612 op0 = gen_reg_rtx (mode0);
27613 emit_insn (GEN_FCN (icode) (op0));
27615 arg0 = CALL_EXPR_ARG (exp, 0);
27616 op1 = expand_normal (arg0);
27617 if (!address_operand (op1, VOIDmode))
27618 op1 = copy_addr_to_reg (op1);
27619 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
27621 op1 = gen_reg_rtx (SImode);
27622 emit_move_insn (op1, CONST1_RTX (SImode));
27624 /* Emit SImode conditional move. */
27625 if (mode0 == HImode)
27627 op2 = gen_reg_rtx (SImode);
27628 emit_insn (gen_zero_extendhisi2 (op2, op0));
27630 else if (mode0 == SImode)
27631 op2 = op0;
27632 else
27633 op2 = gen_rtx_SUBREG (SImode, op0, 0);
27635 if (target == 0)
27636 target = gen_reg_rtx (SImode);
27638 pat = gen_rtx_GEU (VOIDmode, gen_rtx_REG (CCCmode, FLAGS_REG),
27639 const0_rtx);
27640 emit_insn (gen_rtx_SET (VOIDmode, target,
27641 gen_rtx_IF_THEN_ELSE (SImode, pat, op2, op1)));
27642 return target;
27644 default:
27645 break;
27648 for (i = 0, d = bdesc_special_args;
27649 i < ARRAY_SIZE (bdesc_special_args);
27650 i++, d++)
27651 if (d->code == fcode)
27652 return ix86_expand_special_args_builtin (d, exp, target);
27654 for (i = 0, d = bdesc_args;
27655 i < ARRAY_SIZE (bdesc_args);
27656 i++, d++)
27657 if (d->code == fcode)
27658 switch (fcode)
27660 case IX86_BUILTIN_FABSQ:
27661 case IX86_BUILTIN_COPYSIGNQ:
27662 if (!TARGET_SSE2)
27663 /* Emit a normal call if SSE2 isn't available. */
27664 return expand_call (exp, target, ignore);
27665 default:
27666 return ix86_expand_args_builtin (d, exp, target);
27669 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
27670 if (d->code == fcode)
27671 return ix86_expand_sse_comi (d, exp, target);
27673 for (i = 0, d = bdesc_pcmpestr;
27674 i < ARRAY_SIZE (bdesc_pcmpestr);
27675 i++, d++)
27676 if (d->code == fcode)
27677 return ix86_expand_sse_pcmpestr (d, exp, target);
27679 for (i = 0, d = bdesc_pcmpistr;
27680 i < ARRAY_SIZE (bdesc_pcmpistr);
27681 i++, d++)
27682 if (d->code == fcode)
27683 return ix86_expand_sse_pcmpistr (d, exp, target);
27685 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
27686 if (d->code == fcode)
27687 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
27688 (enum ix86_builtin_func_type)
27689 d->flag, d->comparison);
27691 gcc_unreachable ();
27694 /* Returns a function decl for a vectorized version of the builtin function
27695 with builtin function code FN and the result vector type TYPE, or NULL_TREE
27696 if it is not available. */
27698 static tree
27699 ix86_builtin_vectorized_function (tree fndecl, tree type_out,
27700 tree type_in)
27702 enum machine_mode in_mode, out_mode;
27703 int in_n, out_n;
27704 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
27706 if (TREE_CODE (type_out) != VECTOR_TYPE
27707 || TREE_CODE (type_in) != VECTOR_TYPE
27708 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
27709 return NULL_TREE;
27711 out_mode = TYPE_MODE (TREE_TYPE (type_out));
27712 out_n = TYPE_VECTOR_SUBPARTS (type_out);
27713 in_mode = TYPE_MODE (TREE_TYPE (type_in));
27714 in_n = TYPE_VECTOR_SUBPARTS (type_in);
27716 switch (fn)
27718 case BUILT_IN_SQRT:
27719 if (out_mode == DFmode && in_mode == DFmode)
27721 if (out_n == 2 && in_n == 2)
27722 return ix86_builtins[IX86_BUILTIN_SQRTPD];
27723 else if (out_n == 4 && in_n == 4)
27724 return ix86_builtins[IX86_BUILTIN_SQRTPD256];
27726 break;
27728 case BUILT_IN_SQRTF:
27729 if (out_mode == SFmode && in_mode == SFmode)
27731 if (out_n == 4 && in_n == 4)
27732 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR];
27733 else if (out_n == 8 && in_n == 8)
27734 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR256];
27736 break;
27738 case BUILT_IN_LRINT:
27739 if (out_mode == SImode && out_n == 4
27740 && in_mode == DFmode && in_n == 2)
27741 return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX];
27742 break;
27744 case BUILT_IN_LRINTF:
27745 if (out_mode == SImode && in_mode == SFmode)
27747 if (out_n == 4 && in_n == 4)
27748 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
27749 else if (out_n == 8 && in_n == 8)
27750 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ256];
27752 break;
27754 case BUILT_IN_COPYSIGN:
27755 if (out_mode == DFmode && in_mode == DFmode)
27757 if (out_n == 2 && in_n == 2)
27758 return ix86_builtins[IX86_BUILTIN_CPYSGNPD];
27759 else if (out_n == 4 && in_n == 4)
27760 return ix86_builtins[IX86_BUILTIN_CPYSGNPD256];
27762 break;
27764 case BUILT_IN_COPYSIGNF:
27765 if (out_mode == SFmode && in_mode == SFmode)
27767 if (out_n == 4 && in_n == 4)
27768 return ix86_builtins[IX86_BUILTIN_CPYSGNPS];
27769 else if (out_n == 8 && in_n == 8)
27770 return ix86_builtins[IX86_BUILTIN_CPYSGNPS256];
27772 break;
27774 case BUILT_IN_FLOOR:
27775 /* The round insn does not trap on denormals. */
27776 if (flag_trapping_math || !TARGET_ROUND)
27777 break;
27779 if (out_mode == DFmode && in_mode == DFmode)
27781 if (out_n == 2 && in_n == 2)
27782 return ix86_builtins[IX86_BUILTIN_FLOORPD];
27783 else if (out_n == 4 && in_n == 4)
27784 return ix86_builtins[IX86_BUILTIN_FLOORPD256];
27786 break;
27788 case BUILT_IN_FLOORF:
27789 /* The round insn does not trap on denormals. */
27790 if (flag_trapping_math || !TARGET_ROUND)
27791 break;
27793 if (out_mode == SFmode && in_mode == SFmode)
27795 if (out_n == 4 && in_n == 4)
27796 return ix86_builtins[IX86_BUILTIN_FLOORPS];
27797 else if (out_n == 8 && in_n == 8)
27798 return ix86_builtins[IX86_BUILTIN_FLOORPS256];
27800 break;
27802 case BUILT_IN_CEIL:
27803 /* The round insn does not trap on denormals. */
27804 if (flag_trapping_math || !TARGET_ROUND)
27805 break;
27807 if (out_mode == DFmode && in_mode == DFmode)
27809 if (out_n == 2 && in_n == 2)
27810 return ix86_builtins[IX86_BUILTIN_CEILPD];
27811 else if (out_n == 4 && in_n == 4)
27812 return ix86_builtins[IX86_BUILTIN_CEILPD256];
27814 break;
27816 case BUILT_IN_CEILF:
27817 /* The round insn does not trap on denormals. */
27818 if (flag_trapping_math || !TARGET_ROUND)
27819 break;
27821 if (out_mode == SFmode && in_mode == SFmode)
27823 if (out_n == 4 && in_n == 4)
27824 return ix86_builtins[IX86_BUILTIN_CEILPS];
27825 else if (out_n == 8 && in_n == 8)
27826 return ix86_builtins[IX86_BUILTIN_CEILPS256];
27828 break;
27830 case BUILT_IN_TRUNC:
27831 /* The round insn does not trap on denormals. */
27832 if (flag_trapping_math || !TARGET_ROUND)
27833 break;
27835 if (out_mode == DFmode && in_mode == DFmode)
27837 if (out_n == 2 && in_n == 2)
27838 return ix86_builtins[IX86_BUILTIN_TRUNCPD];
27839 else if (out_n == 4 && in_n == 4)
27840 return ix86_builtins[IX86_BUILTIN_TRUNCPD256];
27842 break;
27844 case BUILT_IN_TRUNCF:
27845 /* The round insn does not trap on denormals. */
27846 if (flag_trapping_math || !TARGET_ROUND)
27847 break;
27849 if (out_mode == SFmode && in_mode == SFmode)
27851 if (out_n == 4 && in_n == 4)
27852 return ix86_builtins[IX86_BUILTIN_TRUNCPS];
27853 else if (out_n == 8 && in_n == 8)
27854 return ix86_builtins[IX86_BUILTIN_TRUNCPS256];
27856 break;
27858 case BUILT_IN_RINT:
27859 /* The round insn does not trap on denormals. */
27860 if (flag_trapping_math || !TARGET_ROUND)
27861 break;
27863 if (out_mode == DFmode && in_mode == DFmode)
27865 if (out_n == 2 && in_n == 2)
27866 return ix86_builtins[IX86_BUILTIN_RINTPD];
27867 else if (out_n == 4 && in_n == 4)
27868 return ix86_builtins[IX86_BUILTIN_RINTPD256];
27870 break;
27872 case BUILT_IN_RINTF:
27873 /* The round insn does not trap on denormals. */
27874 if (flag_trapping_math || !TARGET_ROUND)
27875 break;
27877 if (out_mode == SFmode && in_mode == SFmode)
27879 if (out_n == 4 && in_n == 4)
27880 return ix86_builtins[IX86_BUILTIN_RINTPS];
27881 else if (out_n == 8 && in_n == 8)
27882 return ix86_builtins[IX86_BUILTIN_RINTPS256];
27884 break;
27886 case BUILT_IN_FMA:
27887 if (out_mode == DFmode && in_mode == DFmode)
27889 if (out_n == 2 && in_n == 2)
27890 return ix86_builtins[IX86_BUILTIN_VFMADDPD];
27891 if (out_n == 4 && in_n == 4)
27892 return ix86_builtins[IX86_BUILTIN_VFMADDPD256];
27894 break;
27896 case BUILT_IN_FMAF:
27897 if (out_mode == SFmode && in_mode == SFmode)
27899 if (out_n == 4 && in_n == 4)
27900 return ix86_builtins[IX86_BUILTIN_VFMADDPS];
27901 if (out_n == 8 && in_n == 8)
27902 return ix86_builtins[IX86_BUILTIN_VFMADDPS256];
27904 break;
27906 default:
27907 break;
27910 /* Dispatch to a handler for a vectorization library. */
27911 if (ix86_veclib_handler)
27912 return ix86_veclib_handler ((enum built_in_function) fn, type_out,
27913 type_in);
27915 return NULL_TREE;
27918 /* Handler for an SVML-style interface to
27919 a library with vectorized intrinsics. */
27921 static tree
27922 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
27924 char name[20];
27925 tree fntype, new_fndecl, args;
27926 unsigned arity;
27927 const char *bname;
27928 enum machine_mode el_mode, in_mode;
27929 int n, in_n;
27931 /* The SVML is suitable for unsafe math only. */
27932 if (!flag_unsafe_math_optimizations)
27933 return NULL_TREE;
27935 el_mode = TYPE_MODE (TREE_TYPE (type_out));
27936 n = TYPE_VECTOR_SUBPARTS (type_out);
27937 in_mode = TYPE_MODE (TREE_TYPE (type_in));
27938 in_n = TYPE_VECTOR_SUBPARTS (type_in);
27939 if (el_mode != in_mode
27940 || n != in_n)
27941 return NULL_TREE;
27943 switch (fn)
27945 case BUILT_IN_EXP:
27946 case BUILT_IN_LOG:
27947 case BUILT_IN_LOG10:
27948 case BUILT_IN_POW:
27949 case BUILT_IN_TANH:
27950 case BUILT_IN_TAN:
27951 case BUILT_IN_ATAN:
27952 case BUILT_IN_ATAN2:
27953 case BUILT_IN_ATANH:
27954 case BUILT_IN_CBRT:
27955 case BUILT_IN_SINH:
27956 case BUILT_IN_SIN:
27957 case BUILT_IN_ASINH:
27958 case BUILT_IN_ASIN:
27959 case BUILT_IN_COSH:
27960 case BUILT_IN_COS:
27961 case BUILT_IN_ACOSH:
27962 case BUILT_IN_ACOS:
27963 if (el_mode != DFmode || n != 2)
27964 return NULL_TREE;
27965 break;
27967 case BUILT_IN_EXPF:
27968 case BUILT_IN_LOGF:
27969 case BUILT_IN_LOG10F:
27970 case BUILT_IN_POWF:
27971 case BUILT_IN_TANHF:
27972 case BUILT_IN_TANF:
27973 case BUILT_IN_ATANF:
27974 case BUILT_IN_ATAN2F:
27975 case BUILT_IN_ATANHF:
27976 case BUILT_IN_CBRTF:
27977 case BUILT_IN_SINHF:
27978 case BUILT_IN_SINF:
27979 case BUILT_IN_ASINHF:
27980 case BUILT_IN_ASINF:
27981 case BUILT_IN_COSHF:
27982 case BUILT_IN_COSF:
27983 case BUILT_IN_ACOSHF:
27984 case BUILT_IN_ACOSF:
27985 if (el_mode != SFmode || n != 4)
27986 return NULL_TREE;
27987 break;
27989 default:
27990 return NULL_TREE;
27993 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
27995 if (fn == BUILT_IN_LOGF)
27996 strcpy (name, "vmlsLn4");
27997 else if (fn == BUILT_IN_LOG)
27998 strcpy (name, "vmldLn2");
27999 else if (n == 4)
28001 sprintf (name, "vmls%s", bname+10);
28002 name[strlen (name)-1] = '4';
28004 else
28005 sprintf (name, "vmld%s2", bname+10);
28007 /* Convert to uppercase. */
28008 name[4] &= ~0x20;
28010 arity = 0;
28011 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
28012 args = TREE_CHAIN (args))
28013 arity++;
28015 if (arity == 1)
28016 fntype = build_function_type_list (type_out, type_in, NULL);
28017 else
28018 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
28020 /* Build a function declaration for the vectorized function. */
28021 new_fndecl = build_decl (BUILTINS_LOCATION,
28022 FUNCTION_DECL, get_identifier (name), fntype);
28023 TREE_PUBLIC (new_fndecl) = 1;
28024 DECL_EXTERNAL (new_fndecl) = 1;
28025 DECL_IS_NOVOPS (new_fndecl) = 1;
28026 TREE_READONLY (new_fndecl) = 1;
28028 return new_fndecl;
28031 /* Handler for an ACML-style interface to
28032 a library with vectorized intrinsics. */
28034 static tree
28035 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
28037 char name[20] = "__vr.._";
28038 tree fntype, new_fndecl, args;
28039 unsigned arity;
28040 const char *bname;
28041 enum machine_mode el_mode, in_mode;
28042 int n, in_n;
28044 /* The ACML is 64bits only and suitable for unsafe math only as
28045 it does not correctly support parts of IEEE with the required
28046 precision such as denormals. */
28047 if (!TARGET_64BIT
28048 || !flag_unsafe_math_optimizations)
28049 return NULL_TREE;
28051 el_mode = TYPE_MODE (TREE_TYPE (type_out));
28052 n = TYPE_VECTOR_SUBPARTS (type_out);
28053 in_mode = TYPE_MODE (TREE_TYPE (type_in));
28054 in_n = TYPE_VECTOR_SUBPARTS (type_in);
28055 if (el_mode != in_mode
28056 || n != in_n)
28057 return NULL_TREE;
28059 switch (fn)
28061 case BUILT_IN_SIN:
28062 case BUILT_IN_COS:
28063 case BUILT_IN_EXP:
28064 case BUILT_IN_LOG:
28065 case BUILT_IN_LOG2:
28066 case BUILT_IN_LOG10:
28067 name[4] = 'd';
28068 name[5] = '2';
28069 if (el_mode != DFmode
28070 || n != 2)
28071 return NULL_TREE;
28072 break;
28074 case BUILT_IN_SINF:
28075 case BUILT_IN_COSF:
28076 case BUILT_IN_EXPF:
28077 case BUILT_IN_POWF:
28078 case BUILT_IN_LOGF:
28079 case BUILT_IN_LOG2F:
28080 case BUILT_IN_LOG10F:
28081 name[4] = 's';
28082 name[5] = '4';
28083 if (el_mode != SFmode
28084 || n != 4)
28085 return NULL_TREE;
28086 break;
28088 default:
28089 return NULL_TREE;
28092 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
28093 sprintf (name + 7, "%s", bname+10);
28095 arity = 0;
28096 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
28097 args = TREE_CHAIN (args))
28098 arity++;
28100 if (arity == 1)
28101 fntype = build_function_type_list (type_out, type_in, NULL);
28102 else
28103 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
28105 /* Build a function declaration for the vectorized function. */
28106 new_fndecl = build_decl (BUILTINS_LOCATION,
28107 FUNCTION_DECL, get_identifier (name), fntype);
28108 TREE_PUBLIC (new_fndecl) = 1;
28109 DECL_EXTERNAL (new_fndecl) = 1;
28110 DECL_IS_NOVOPS (new_fndecl) = 1;
28111 TREE_READONLY (new_fndecl) = 1;
28113 return new_fndecl;
28117 /* Returns a decl of a function that implements conversion of an integer vector
28118 into a floating-point vector, or vice-versa. DEST_TYPE and SRC_TYPE
28119 are the types involved when converting according to CODE.
28120 Return NULL_TREE if it is not available. */
28122 static tree
28123 ix86_vectorize_builtin_conversion (unsigned int code,
28124 tree dest_type, tree src_type)
28126 if (! TARGET_SSE2)
28127 return NULL_TREE;
28129 switch (code)
28131 case FLOAT_EXPR:
28132 switch (TYPE_MODE (src_type))
28134 case V4SImode:
28135 switch (TYPE_MODE (dest_type))
28137 case V4SFmode:
28138 return (TYPE_UNSIGNED (src_type)
28139 ? ix86_builtins[IX86_BUILTIN_CVTUDQ2PS]
28140 : ix86_builtins[IX86_BUILTIN_CVTDQ2PS]);
28141 case V4DFmode:
28142 return (TYPE_UNSIGNED (src_type)
28143 ? NULL_TREE
28144 : ix86_builtins[IX86_BUILTIN_CVTDQ2PD256]);
28145 default:
28146 return NULL_TREE;
28148 break;
28149 case V8SImode:
28150 switch (TYPE_MODE (dest_type))
28152 case V8SFmode:
28153 return (TYPE_UNSIGNED (src_type)
28154 ? NULL_TREE
28155 : ix86_builtins[IX86_BUILTIN_CVTDQ2PS256]);
28156 default:
28157 return NULL_TREE;
28159 break;
28160 default:
28161 return NULL_TREE;
28164 case FIX_TRUNC_EXPR:
28165 switch (TYPE_MODE (dest_type))
28167 case V4SImode:
28168 switch (TYPE_MODE (src_type))
28170 case V4SFmode:
28171 return (TYPE_UNSIGNED (dest_type)
28172 ? NULL_TREE
28173 : ix86_builtins[IX86_BUILTIN_CVTTPS2DQ]);
28174 case V4DFmode:
28175 return (TYPE_UNSIGNED (dest_type)
28176 ? NULL_TREE
28177 : ix86_builtins[IX86_BUILTIN_CVTTPD2DQ256]);
28178 default:
28179 return NULL_TREE;
28181 break;
28183 case V8SImode:
28184 switch (TYPE_MODE (src_type))
28186 case V8SFmode:
28187 return (TYPE_UNSIGNED (dest_type)
28188 ? NULL_TREE
28189 : ix86_builtins[IX86_BUILTIN_CVTTPS2DQ256]);
28190 default:
28191 return NULL_TREE;
28193 break;
28195 default:
28196 return NULL_TREE;
28199 default:
28200 return NULL_TREE;
28203 return NULL_TREE;
28206 /* Returns a code for a target-specific builtin that implements
28207 reciprocal of the function, or NULL_TREE if not available. */
28209 static tree
28210 ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
28211 bool sqrt ATTRIBUTE_UNUSED)
28213 if (! (TARGET_SSE_MATH && !optimize_insn_for_size_p ()
28214 && flag_finite_math_only && !flag_trapping_math
28215 && flag_unsafe_math_optimizations))
28216 return NULL_TREE;
28218 if (md_fn)
28219 /* Machine dependent builtins. */
28220 switch (fn)
28222 /* Vectorized version of sqrt to rsqrt conversion. */
28223 case IX86_BUILTIN_SQRTPS_NR:
28224 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR];
28226 case IX86_BUILTIN_SQRTPS_NR256:
28227 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR256];
28229 default:
28230 return NULL_TREE;
28232 else
28233 /* Normal builtins. */
28234 switch (fn)
28236 /* Sqrt to rsqrt conversion. */
28237 case BUILT_IN_SQRTF:
28238 return ix86_builtins[IX86_BUILTIN_RSQRTF];
28240 default:
28241 return NULL_TREE;
28245 /* Helper for avx_vpermilps256_operand et al. This is also used by
28246 the expansion functions to turn the parallel back into a mask.
28247 The return value is 0 for no match and the imm8+1 for a match. */
28250 avx_vpermilp_parallel (rtx par, enum machine_mode mode)
28252 unsigned i, nelt = GET_MODE_NUNITS (mode);
28253 unsigned mask = 0;
28254 unsigned char ipar[8];
28256 if (XVECLEN (par, 0) != (int) nelt)
28257 return 0;
28259 /* Validate that all of the elements are constants, and not totally
28260 out of range. Copy the data into an integral array to make the
28261 subsequent checks easier. */
28262 for (i = 0; i < nelt; ++i)
28264 rtx er = XVECEXP (par, 0, i);
28265 unsigned HOST_WIDE_INT ei;
28267 if (!CONST_INT_P (er))
28268 return 0;
28269 ei = INTVAL (er);
28270 if (ei >= nelt)
28271 return 0;
28272 ipar[i] = ei;
28275 switch (mode)
28277 case V4DFmode:
28278 /* In the 256-bit DFmode case, we can only move elements within
28279 a 128-bit lane. */
28280 for (i = 0; i < 2; ++i)
28282 if (ipar[i] >= 2)
28283 return 0;
28284 mask |= ipar[i] << i;
28286 for (i = 2; i < 4; ++i)
28288 if (ipar[i] < 2)
28289 return 0;
28290 mask |= (ipar[i] - 2) << i;
28292 break;
28294 case V8SFmode:
28295 /* In the 256-bit SFmode case, we have full freedom of movement
28296 within the low 128-bit lane, but the high 128-bit lane must
28297 mirror the exact same pattern. */
28298 for (i = 0; i < 4; ++i)
28299 if (ipar[i] + 4 != ipar[i + 4])
28300 return 0;
28301 nelt = 4;
28302 /* FALLTHRU */
28304 case V2DFmode:
28305 case V4SFmode:
28306 /* In the 128-bit case, we've full freedom in the placement of
28307 the elements from the source operand. */
28308 for (i = 0; i < nelt; ++i)
28309 mask |= ipar[i] << (i * (nelt / 2));
28310 break;
28312 default:
28313 gcc_unreachable ();
28316 /* Make sure success has a non-zero value by adding one. */
28317 return mask + 1;
28320 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
28321 the expansion functions to turn the parallel back into a mask.
28322 The return value is 0 for no match and the imm8+1 for a match. */
28325 avx_vperm2f128_parallel (rtx par, enum machine_mode mode)
28327 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
28328 unsigned mask = 0;
28329 unsigned char ipar[8];
28331 if (XVECLEN (par, 0) != (int) nelt)
28332 return 0;
28334 /* Validate that all of the elements are constants, and not totally
28335 out of range. Copy the data into an integral array to make the
28336 subsequent checks easier. */
28337 for (i = 0; i < nelt; ++i)
28339 rtx er = XVECEXP (par, 0, i);
28340 unsigned HOST_WIDE_INT ei;
28342 if (!CONST_INT_P (er))
28343 return 0;
28344 ei = INTVAL (er);
28345 if (ei >= 2 * nelt)
28346 return 0;
28347 ipar[i] = ei;
28350 /* Validate that the halves of the permute are halves. */
28351 for (i = 0; i < nelt2 - 1; ++i)
28352 if (ipar[i] + 1 != ipar[i + 1])
28353 return 0;
28354 for (i = nelt2; i < nelt - 1; ++i)
28355 if (ipar[i] + 1 != ipar[i + 1])
28356 return 0;
28358 /* Reconstruct the mask. */
28359 for (i = 0; i < 2; ++i)
28361 unsigned e = ipar[i * nelt2];
28362 if (e % nelt2)
28363 return 0;
28364 e /= nelt2;
28365 mask |= e << (i * 4);
28368 /* Make sure success has a non-zero value by adding one. */
28369 return mask + 1;
28373 /* Store OPERAND to the memory after reload is completed. This means
28374 that we can't easily use assign_stack_local. */
28376 ix86_force_to_memory (enum machine_mode mode, rtx operand)
28378 rtx result;
28380 gcc_assert (reload_completed);
28381 if (ix86_using_red_zone ())
28383 result = gen_rtx_MEM (mode,
28384 gen_rtx_PLUS (Pmode,
28385 stack_pointer_rtx,
28386 GEN_INT (-RED_ZONE_SIZE)));
28387 emit_move_insn (result, operand);
28389 else if (TARGET_64BIT)
28391 switch (mode)
28393 case HImode:
28394 case SImode:
28395 operand = gen_lowpart (DImode, operand);
28396 /* FALLTHRU */
28397 case DImode:
28398 emit_insn (
28399 gen_rtx_SET (VOIDmode,
28400 gen_rtx_MEM (DImode,
28401 gen_rtx_PRE_DEC (DImode,
28402 stack_pointer_rtx)),
28403 operand));
28404 break;
28405 default:
28406 gcc_unreachable ();
28408 result = gen_rtx_MEM (mode, stack_pointer_rtx);
28410 else
28412 switch (mode)
28414 case DImode:
28416 rtx operands[2];
28417 split_double_mode (mode, &operand, 1, operands, operands + 1);
28418 emit_insn (
28419 gen_rtx_SET (VOIDmode,
28420 gen_rtx_MEM (SImode,
28421 gen_rtx_PRE_DEC (Pmode,
28422 stack_pointer_rtx)),
28423 operands[1]));
28424 emit_insn (
28425 gen_rtx_SET (VOIDmode,
28426 gen_rtx_MEM (SImode,
28427 gen_rtx_PRE_DEC (Pmode,
28428 stack_pointer_rtx)),
28429 operands[0]));
28431 break;
28432 case HImode:
28433 /* Store HImodes as SImodes. */
28434 operand = gen_lowpart (SImode, operand);
28435 /* FALLTHRU */
28436 case SImode:
28437 emit_insn (
28438 gen_rtx_SET (VOIDmode,
28439 gen_rtx_MEM (GET_MODE (operand),
28440 gen_rtx_PRE_DEC (SImode,
28441 stack_pointer_rtx)),
28442 operand));
28443 break;
28444 default:
28445 gcc_unreachable ();
28447 result = gen_rtx_MEM (mode, stack_pointer_rtx);
28449 return result;
28452 /* Free operand from the memory. */
28453 void
28454 ix86_free_from_memory (enum machine_mode mode)
28456 if (!ix86_using_red_zone ())
28458 int size;
28460 if (mode == DImode || TARGET_64BIT)
28461 size = 8;
28462 else
28463 size = 4;
28464 /* Use LEA to deallocate stack space. In peephole2 it will be converted
28465 to pop or add instruction if registers are available. */
28466 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
28467 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
28468 GEN_INT (size))));
28472 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
28474 Put float CONST_DOUBLE in the constant pool instead of fp regs.
28475 QImode must go into class Q_REGS.
28476 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
28477 movdf to do mem-to-mem moves through integer regs. */
28479 static reg_class_t
28480 ix86_preferred_reload_class (rtx x, reg_class_t regclass)
28482 enum machine_mode mode = GET_MODE (x);
28484 /* We're only allowed to return a subclass of CLASS. Many of the
28485 following checks fail for NO_REGS, so eliminate that early. */
28486 if (regclass == NO_REGS)
28487 return NO_REGS;
28489 /* All classes can load zeros. */
28490 if (x == CONST0_RTX (mode))
28491 return regclass;
28493 /* Force constants into memory if we are loading a (nonzero) constant into
28494 an MMX or SSE register. This is because there are no MMX/SSE instructions
28495 to load from a constant. */
28496 if (CONSTANT_P (x)
28497 && (MAYBE_MMX_CLASS_P (regclass) || MAYBE_SSE_CLASS_P (regclass)))
28498 return NO_REGS;
28500 /* Prefer SSE regs only, if we can use them for math. */
28501 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
28502 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
28504 /* Floating-point constants need more complex checks. */
28505 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
28507 /* General regs can load everything. */
28508 if (reg_class_subset_p (regclass, GENERAL_REGS))
28509 return regclass;
28511 /* Floats can load 0 and 1 plus some others. Note that we eliminated
28512 zero above. We only want to wind up preferring 80387 registers if
28513 we plan on doing computation with them. */
28514 if (TARGET_80387
28515 && standard_80387_constant_p (x) > 0)
28517 /* Limit class to non-sse. */
28518 if (regclass == FLOAT_SSE_REGS)
28519 return FLOAT_REGS;
28520 if (regclass == FP_TOP_SSE_REGS)
28521 return FP_TOP_REG;
28522 if (regclass == FP_SECOND_SSE_REGS)
28523 return FP_SECOND_REG;
28524 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
28525 return regclass;
28528 return NO_REGS;
28531 /* Generally when we see PLUS here, it's the function invariant
28532 (plus soft-fp const_int). Which can only be computed into general
28533 regs. */
28534 if (GET_CODE (x) == PLUS)
28535 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
28537 /* QImode constants are easy to load, but non-constant QImode data
28538 must go into Q_REGS. */
28539 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
28541 if (reg_class_subset_p (regclass, Q_REGS))
28542 return regclass;
28543 if (reg_class_subset_p (Q_REGS, regclass))
28544 return Q_REGS;
28545 return NO_REGS;
28548 return regclass;
28551 /* Discourage putting floating-point values in SSE registers unless
28552 SSE math is being used, and likewise for the 387 registers. */
28553 static reg_class_t
28554 ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
28556 enum machine_mode mode = GET_MODE (x);
28558 /* Restrict the output reload class to the register bank that we are doing
28559 math on. If we would like not to return a subset of CLASS, reject this
28560 alternative: if reload cannot do this, it will still use its choice. */
28561 mode = GET_MODE (x);
28562 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
28563 return MAYBE_SSE_CLASS_P (regclass) ? SSE_REGS : NO_REGS;
28565 if (X87_FLOAT_MODE_P (mode))
28567 if (regclass == FP_TOP_SSE_REGS)
28568 return FP_TOP_REG;
28569 else if (regclass == FP_SECOND_SSE_REGS)
28570 return FP_SECOND_REG;
28571 else
28572 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
28575 return regclass;
28578 static reg_class_t
28579 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
28580 enum machine_mode mode,
28581 secondary_reload_info *sri ATTRIBUTE_UNUSED)
28583 /* QImode spills from non-QI registers require
28584 intermediate register on 32bit targets. */
28585 if (!TARGET_64BIT
28586 && !in_p && mode == QImode
28587 && (rclass == GENERAL_REGS
28588 || rclass == LEGACY_REGS
28589 || rclass == INDEX_REGS))
28591 int regno;
28593 if (REG_P (x))
28594 regno = REGNO (x);
28595 else
28596 regno = -1;
28598 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
28599 regno = true_regnum (x);
28601 /* Return Q_REGS if the operand is in memory. */
28602 if (regno == -1)
28603 return Q_REGS;
28606 /* This condition handles corner case where an expression involving
28607 pointers gets vectorized. We're trying to use the address of a
28608 stack slot as a vector initializer.
28610 (set (reg:V2DI 74 [ vect_cst_.2 ])
28611 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
28613 Eventually frame gets turned into sp+offset like this:
28615 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
28616 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
28617 (const_int 392 [0x188]))))
28619 That later gets turned into:
28621 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
28622 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
28623 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
28625 We'll have the following reload recorded:
28627 Reload 0: reload_in (DI) =
28628 (plus:DI (reg/f:DI 7 sp)
28629 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
28630 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
28631 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
28632 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
28633 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
28634 reload_reg_rtx: (reg:V2DI 22 xmm1)
28636 Which isn't going to work since SSE instructions can't handle scalar
28637 additions. Returning GENERAL_REGS forces the addition into integer
28638 register and reload can handle subsequent reloads without problems. */
28640 if (in_p && GET_CODE (x) == PLUS
28641 && SSE_CLASS_P (rclass)
28642 && SCALAR_INT_MODE_P (mode))
28643 return GENERAL_REGS;
28645 return NO_REGS;
28648 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
28650 static bool
28651 ix86_class_likely_spilled_p (reg_class_t rclass)
28653 switch (rclass)
28655 case AREG:
28656 case DREG:
28657 case CREG:
28658 case BREG:
28659 case AD_REGS:
28660 case SIREG:
28661 case DIREG:
28662 case SSE_FIRST_REG:
28663 case FP_TOP_REG:
28664 case FP_SECOND_REG:
28665 return true;
28667 default:
28668 break;
28671 return false;
28674 /* If we are copying between general and FP registers, we need a memory
28675 location. The same is true for SSE and MMX registers.
28677 To optimize register_move_cost performance, allow inline variant.
28679 The macro can't work reliably when one of the CLASSES is class containing
28680 registers from multiple units (SSE, MMX, integer). We avoid this by never
28681 combining those units in single alternative in the machine description.
28682 Ensure that this constraint holds to avoid unexpected surprises.
28684 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
28685 enforce these sanity checks. */
28687 static inline bool
28688 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
28689 enum machine_mode mode, int strict)
28691 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
28692 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
28693 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
28694 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
28695 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
28696 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
28698 gcc_assert (!strict);
28699 return true;
28702 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
28703 return true;
28705 /* ??? This is a lie. We do have moves between mmx/general, and for
28706 mmx/sse2. But by saying we need secondary memory we discourage the
28707 register allocator from using the mmx registers unless needed. */
28708 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
28709 return true;
28711 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
28713 /* SSE1 doesn't have any direct moves from other classes. */
28714 if (!TARGET_SSE2)
28715 return true;
28717 /* If the target says that inter-unit moves are more expensive
28718 than moving through memory, then don't generate them. */
28719 if (!TARGET_INTER_UNIT_MOVES)
28720 return true;
28722 /* Between SSE and general, we have moves no larger than word size. */
28723 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
28724 return true;
28727 return false;
28730 bool
28731 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
28732 enum machine_mode mode, int strict)
28734 return inline_secondary_memory_needed (class1, class2, mode, strict);
28737 /* Return true if the registers in CLASS cannot represent the change from
28738 modes FROM to TO. */
28740 bool
28741 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
28742 enum reg_class regclass)
28744 if (from == to)
28745 return false;
28747 /* x87 registers can't do subreg at all, as all values are reformatted
28748 to extended precision. */
28749 if (MAYBE_FLOAT_CLASS_P (regclass))
28750 return true;
28752 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
28754 /* Vector registers do not support QI or HImode loads. If we don't
28755 disallow a change to these modes, reload will assume it's ok to
28756 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
28757 the vec_dupv4hi pattern. */
28758 if (GET_MODE_SIZE (from) < 4)
28759 return true;
28761 /* Vector registers do not support subreg with nonzero offsets, which
28762 are otherwise valid for integer registers. Since we can't see
28763 whether we have a nonzero offset from here, prohibit all
28764 nonparadoxical subregs changing size. */
28765 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
28766 return true;
28769 return false;
28772 /* Return the cost of moving data of mode M between a
28773 register and memory. A value of 2 is the default; this cost is
28774 relative to those in `REGISTER_MOVE_COST'.
28776 This function is used extensively by register_move_cost that is used to
28777 build tables at startup. Make it inline in this case.
28778 When IN is 2, return maximum of in and out move cost.
28780 If moving between registers and memory is more expensive than
28781 between two registers, you should define this macro to express the
28782 relative cost.
28784 Model also increased moving costs of QImode registers in non
28785 Q_REGS classes.
28787 static inline int
28788 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
28789 int in)
28791 int cost;
28792 if (FLOAT_CLASS_P (regclass))
28794 int index;
28795 switch (mode)
28797 case SFmode:
28798 index = 0;
28799 break;
28800 case DFmode:
28801 index = 1;
28802 break;
28803 case XFmode:
28804 index = 2;
28805 break;
28806 default:
28807 return 100;
28809 if (in == 2)
28810 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
28811 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
28813 if (SSE_CLASS_P (regclass))
28815 int index;
28816 switch (GET_MODE_SIZE (mode))
28818 case 4:
28819 index = 0;
28820 break;
28821 case 8:
28822 index = 1;
28823 break;
28824 case 16:
28825 index = 2;
28826 break;
28827 default:
28828 return 100;
28830 if (in == 2)
28831 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
28832 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
28834 if (MMX_CLASS_P (regclass))
28836 int index;
28837 switch (GET_MODE_SIZE (mode))
28839 case 4:
28840 index = 0;
28841 break;
28842 case 8:
28843 index = 1;
28844 break;
28845 default:
28846 return 100;
28848 if (in)
28849 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
28850 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
28852 switch (GET_MODE_SIZE (mode))
28854 case 1:
28855 if (Q_CLASS_P (regclass) || TARGET_64BIT)
28857 if (!in)
28858 return ix86_cost->int_store[0];
28859 if (TARGET_PARTIAL_REG_DEPENDENCY
28860 && optimize_function_for_speed_p (cfun))
28861 cost = ix86_cost->movzbl_load;
28862 else
28863 cost = ix86_cost->int_load[0];
28864 if (in == 2)
28865 return MAX (cost, ix86_cost->int_store[0]);
28866 return cost;
28868 else
28870 if (in == 2)
28871 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
28872 if (in)
28873 return ix86_cost->movzbl_load;
28874 else
28875 return ix86_cost->int_store[0] + 4;
28877 break;
28878 case 2:
28879 if (in == 2)
28880 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
28881 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
28882 default:
28883 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
28884 if (mode == TFmode)
28885 mode = XFmode;
28886 if (in == 2)
28887 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
28888 else if (in)
28889 cost = ix86_cost->int_load[2];
28890 else
28891 cost = ix86_cost->int_store[2];
28892 return (cost * (((int) GET_MODE_SIZE (mode)
28893 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
28897 static int
28898 ix86_memory_move_cost (enum machine_mode mode, reg_class_t regclass,
28899 bool in)
28901 return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
28905 /* Return the cost of moving data from a register in class CLASS1 to
28906 one in class CLASS2.
28908 It is not required that the cost always equal 2 when FROM is the same as TO;
28909 on some machines it is expensive to move between registers if they are not
28910 general registers. */
28912 static int
28913 ix86_register_move_cost (enum machine_mode mode, reg_class_t class1_i,
28914 reg_class_t class2_i)
28916 enum reg_class class1 = (enum reg_class) class1_i;
28917 enum reg_class class2 = (enum reg_class) class2_i;
28919 /* In case we require secondary memory, compute cost of the store followed
28920 by load. In order to avoid bad register allocation choices, we need
28921 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
28923 if (inline_secondary_memory_needed (class1, class2, mode, 0))
28925 int cost = 1;
28927 cost += inline_memory_move_cost (mode, class1, 2);
28928 cost += inline_memory_move_cost (mode, class2, 2);
28930 /* In case of copying from general_purpose_register we may emit multiple
28931 stores followed by single load causing memory size mismatch stall.
28932 Count this as arbitrarily high cost of 20. */
28933 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
28934 cost += 20;
28936 /* In the case of FP/MMX moves, the registers actually overlap, and we
28937 have to switch modes in order to treat them differently. */
28938 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
28939 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
28940 cost += 20;
28942 return cost;
28945 /* Moves between SSE/MMX and integer unit are expensive. */
28946 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
28947 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
28949 /* ??? By keeping returned value relatively high, we limit the number
28950 of moves between integer and MMX/SSE registers for all targets.
28951 Additionally, high value prevents problem with x86_modes_tieable_p(),
28952 where integer modes in MMX/SSE registers are not tieable
28953 because of missing QImode and HImode moves to, from or between
28954 MMX/SSE registers. */
28955 return MAX (8, ix86_cost->mmxsse_to_integer);
28957 if (MAYBE_FLOAT_CLASS_P (class1))
28958 return ix86_cost->fp_move;
28959 if (MAYBE_SSE_CLASS_P (class1))
28960 return ix86_cost->sse_move;
28961 if (MAYBE_MMX_CLASS_P (class1))
28962 return ix86_cost->mmx_move;
28963 return 2;
28966 /* Return TRUE if hard register REGNO can hold a value of machine-mode
28967 MODE. */
28969 bool
28970 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
28972 /* Flags and only flags can only hold CCmode values. */
28973 if (CC_REGNO_P (regno))
28974 return GET_MODE_CLASS (mode) == MODE_CC;
28975 if (GET_MODE_CLASS (mode) == MODE_CC
28976 || GET_MODE_CLASS (mode) == MODE_RANDOM
28977 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
28978 return false;
28979 if (FP_REGNO_P (regno))
28980 return VALID_FP_MODE_P (mode);
28981 if (SSE_REGNO_P (regno))
28983 /* We implement the move patterns for all vector modes into and
28984 out of SSE registers, even when no operation instructions
28985 are available. OImode move is available only when AVX is
28986 enabled. */
28987 return ((TARGET_AVX && mode == OImode)
28988 || VALID_AVX256_REG_MODE (mode)
28989 || VALID_SSE_REG_MODE (mode)
28990 || VALID_SSE2_REG_MODE (mode)
28991 || VALID_MMX_REG_MODE (mode)
28992 || VALID_MMX_REG_MODE_3DNOW (mode));
28994 if (MMX_REGNO_P (regno))
28996 /* We implement the move patterns for 3DNOW modes even in MMX mode,
28997 so if the register is available at all, then we can move data of
28998 the given mode into or out of it. */
28999 return (VALID_MMX_REG_MODE (mode)
29000 || VALID_MMX_REG_MODE_3DNOW (mode));
29003 if (mode == QImode)
29005 /* Take care for QImode values - they can be in non-QI regs,
29006 but then they do cause partial register stalls. */
29007 if (regno <= BX_REG || TARGET_64BIT)
29008 return true;
29009 if (!TARGET_PARTIAL_REG_STALL)
29010 return true;
29011 return !can_create_pseudo_p ();
29013 /* We handle both integer and floats in the general purpose registers. */
29014 else if (VALID_INT_MODE_P (mode))
29015 return true;
29016 else if (VALID_FP_MODE_P (mode))
29017 return true;
29018 else if (VALID_DFP_MODE_P (mode))
29019 return true;
29020 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
29021 on to use that value in smaller contexts, this can easily force a
29022 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
29023 supporting DImode, allow it. */
29024 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
29025 return true;
29027 return false;
29030 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
29031 tieable integer mode. */
29033 static bool
29034 ix86_tieable_integer_mode_p (enum machine_mode mode)
29036 switch (mode)
29038 case HImode:
29039 case SImode:
29040 return true;
29042 case QImode:
29043 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
29045 case DImode:
29046 return TARGET_64BIT;
29048 default:
29049 return false;
29053 /* Return true if MODE1 is accessible in a register that can hold MODE2
29054 without copying. That is, all register classes that can hold MODE2
29055 can also hold MODE1. */
29057 bool
29058 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
29060 if (mode1 == mode2)
29061 return true;
29063 if (ix86_tieable_integer_mode_p (mode1)
29064 && ix86_tieable_integer_mode_p (mode2))
29065 return true;
29067 /* MODE2 being XFmode implies fp stack or general regs, which means we
29068 can tie any smaller floating point modes to it. Note that we do not
29069 tie this with TFmode. */
29070 if (mode2 == XFmode)
29071 return mode1 == SFmode || mode1 == DFmode;
29073 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
29074 that we can tie it with SFmode. */
29075 if (mode2 == DFmode)
29076 return mode1 == SFmode;
29078 /* If MODE2 is only appropriate for an SSE register, then tie with
29079 any other mode acceptable to SSE registers. */
29080 if (GET_MODE_SIZE (mode2) == 16
29081 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
29082 return (GET_MODE_SIZE (mode1) == 16
29083 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
29085 /* If MODE2 is appropriate for an MMX register, then tie
29086 with any other mode acceptable to MMX registers. */
29087 if (GET_MODE_SIZE (mode2) == 8
29088 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
29089 return (GET_MODE_SIZE (mode1) == 8
29090 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
29092 return false;
29095 /* Compute a (partial) cost for rtx X. Return true if the complete
29096 cost has been computed, and false if subexpressions should be
29097 scanned. In either case, *TOTAL contains the cost result. */
29099 static bool
29100 ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total, bool speed)
29102 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
29103 enum machine_mode mode = GET_MODE (x);
29104 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
29106 switch (code)
29108 case CONST_INT:
29109 case CONST:
29110 case LABEL_REF:
29111 case SYMBOL_REF:
29112 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
29113 *total = 3;
29114 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
29115 *total = 2;
29116 else if (flag_pic && SYMBOLIC_CONST (x)
29117 && (!TARGET_64BIT
29118 || (!GET_CODE (x) != LABEL_REF
29119 && (GET_CODE (x) != SYMBOL_REF
29120 || !SYMBOL_REF_LOCAL_P (x)))))
29121 *total = 1;
29122 else
29123 *total = 0;
29124 return true;
29126 case CONST_DOUBLE:
29127 if (mode == VOIDmode)
29128 *total = 0;
29129 else
29130 switch (standard_80387_constant_p (x))
29132 case 1: /* 0.0 */
29133 *total = 1;
29134 break;
29135 default: /* Other constants */
29136 *total = 2;
29137 break;
29138 case 0:
29139 case -1:
29140 /* Start with (MEM (SYMBOL_REF)), since that's where
29141 it'll probably end up. Add a penalty for size. */
29142 *total = (COSTS_N_INSNS (1)
29143 + (flag_pic != 0 && !TARGET_64BIT)
29144 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
29145 break;
29147 return true;
29149 case ZERO_EXTEND:
29150 /* The zero extensions is often completely free on x86_64, so make
29151 it as cheap as possible. */
29152 if (TARGET_64BIT && mode == DImode
29153 && GET_MODE (XEXP (x, 0)) == SImode)
29154 *total = 1;
29155 else if (TARGET_ZERO_EXTEND_WITH_AND)
29156 *total = cost->add;
29157 else
29158 *total = cost->movzx;
29159 return false;
29161 case SIGN_EXTEND:
29162 *total = cost->movsx;
29163 return false;
29165 case ASHIFT:
29166 if (CONST_INT_P (XEXP (x, 1))
29167 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
29169 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
29170 if (value == 1)
29172 *total = cost->add;
29173 return false;
29175 if ((value == 2 || value == 3)
29176 && cost->lea <= cost->shift_const)
29178 *total = cost->lea;
29179 return false;
29182 /* FALLTHRU */
29184 case ROTATE:
29185 case ASHIFTRT:
29186 case LSHIFTRT:
29187 case ROTATERT:
29188 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
29190 if (CONST_INT_P (XEXP (x, 1)))
29192 if (INTVAL (XEXP (x, 1)) > 32)
29193 *total = cost->shift_const + COSTS_N_INSNS (2);
29194 else
29195 *total = cost->shift_const * 2;
29197 else
29199 if (GET_CODE (XEXP (x, 1)) == AND)
29200 *total = cost->shift_var * 2;
29201 else
29202 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
29205 else
29207 if (CONST_INT_P (XEXP (x, 1)))
29208 *total = cost->shift_const;
29209 else
29210 *total = cost->shift_var;
29212 return false;
29214 case FMA:
29216 rtx sub;
29218 gcc_assert (FLOAT_MODE_P (mode));
29219 gcc_assert (TARGET_FMA || TARGET_FMA4);
29221 /* ??? SSE scalar/vector cost should be used here. */
29222 /* ??? Bald assumption that fma has the same cost as fmul. */
29223 *total = cost->fmul;
29224 *total += rtx_cost (XEXP (x, 1), FMA, speed);
29226 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
29227 sub = XEXP (x, 0);
29228 if (GET_CODE (sub) == NEG)
29229 sub = XEXP (sub, 0);
29230 *total += rtx_cost (sub, FMA, speed);
29232 sub = XEXP (x, 2);
29233 if (GET_CODE (sub) == NEG)
29234 sub = XEXP (sub, 0);
29235 *total += rtx_cost (sub, FMA, speed);
29236 return true;
29239 case MULT:
29240 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
29242 /* ??? SSE scalar cost should be used here. */
29243 *total = cost->fmul;
29244 return false;
29246 else if (X87_FLOAT_MODE_P (mode))
29248 *total = cost->fmul;
29249 return false;
29251 else if (FLOAT_MODE_P (mode))
29253 /* ??? SSE vector cost should be used here. */
29254 *total = cost->fmul;
29255 return false;
29257 else
29259 rtx op0 = XEXP (x, 0);
29260 rtx op1 = XEXP (x, 1);
29261 int nbits;
29262 if (CONST_INT_P (XEXP (x, 1)))
29264 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
29265 for (nbits = 0; value != 0; value &= value - 1)
29266 nbits++;
29268 else
29269 /* This is arbitrary. */
29270 nbits = 7;
29272 /* Compute costs correctly for widening multiplication. */
29273 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
29274 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
29275 == GET_MODE_SIZE (mode))
29277 int is_mulwiden = 0;
29278 enum machine_mode inner_mode = GET_MODE (op0);
29280 if (GET_CODE (op0) == GET_CODE (op1))
29281 is_mulwiden = 1, op1 = XEXP (op1, 0);
29282 else if (CONST_INT_P (op1))
29284 if (GET_CODE (op0) == SIGN_EXTEND)
29285 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
29286 == INTVAL (op1);
29287 else
29288 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
29291 if (is_mulwiden)
29292 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
29295 *total = (cost->mult_init[MODE_INDEX (mode)]
29296 + nbits * cost->mult_bit
29297 + rtx_cost (op0, outer_code, speed) + rtx_cost (op1, outer_code, speed));
29299 return true;
29302 case DIV:
29303 case UDIV:
29304 case MOD:
29305 case UMOD:
29306 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
29307 /* ??? SSE cost should be used here. */
29308 *total = cost->fdiv;
29309 else if (X87_FLOAT_MODE_P (mode))
29310 *total = cost->fdiv;
29311 else if (FLOAT_MODE_P (mode))
29312 /* ??? SSE vector cost should be used here. */
29313 *total = cost->fdiv;
29314 else
29315 *total = cost->divide[MODE_INDEX (mode)];
29316 return false;
29318 case PLUS:
29319 if (GET_MODE_CLASS (mode) == MODE_INT
29320 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
29322 if (GET_CODE (XEXP (x, 0)) == PLUS
29323 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
29324 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
29325 && CONSTANT_P (XEXP (x, 1)))
29327 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
29328 if (val == 2 || val == 4 || val == 8)
29330 *total = cost->lea;
29331 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
29332 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
29333 outer_code, speed);
29334 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
29335 return true;
29338 else if (GET_CODE (XEXP (x, 0)) == MULT
29339 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
29341 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
29342 if (val == 2 || val == 4 || val == 8)
29344 *total = cost->lea;
29345 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
29346 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
29347 return true;
29350 else if (GET_CODE (XEXP (x, 0)) == PLUS)
29352 *total = cost->lea;
29353 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
29354 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
29355 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
29356 return true;
29359 /* FALLTHRU */
29361 case MINUS:
29362 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
29364 /* ??? SSE cost should be used here. */
29365 *total = cost->fadd;
29366 return false;
29368 else if (X87_FLOAT_MODE_P (mode))
29370 *total = cost->fadd;
29371 return false;
29373 else if (FLOAT_MODE_P (mode))
29375 /* ??? SSE vector cost should be used here. */
29376 *total = cost->fadd;
29377 return false;
29379 /* FALLTHRU */
29381 case AND:
29382 case IOR:
29383 case XOR:
29384 if (!TARGET_64BIT && mode == DImode)
29386 *total = (cost->add * 2
29387 + (rtx_cost (XEXP (x, 0), outer_code, speed)
29388 << (GET_MODE (XEXP (x, 0)) != DImode))
29389 + (rtx_cost (XEXP (x, 1), outer_code, speed)
29390 << (GET_MODE (XEXP (x, 1)) != DImode)));
29391 return true;
29393 /* FALLTHRU */
29395 case NEG:
29396 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
29398 /* ??? SSE cost should be used here. */
29399 *total = cost->fchs;
29400 return false;
29402 else if (X87_FLOAT_MODE_P (mode))
29404 *total = cost->fchs;
29405 return false;
29407 else if (FLOAT_MODE_P (mode))
29409 /* ??? SSE vector cost should be used here. */
29410 *total = cost->fchs;
29411 return false;
29413 /* FALLTHRU */
29415 case NOT:
29416 if (!TARGET_64BIT && mode == DImode)
29417 *total = cost->add * 2;
29418 else
29419 *total = cost->add;
29420 return false;
29422 case COMPARE:
29423 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
29424 && XEXP (XEXP (x, 0), 1) == const1_rtx
29425 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
29426 && XEXP (x, 1) == const0_rtx)
29428 /* This kind of construct is implemented using test[bwl].
29429 Treat it as if we had an AND. */
29430 *total = (cost->add
29431 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed)
29432 + rtx_cost (const1_rtx, outer_code, speed));
29433 return true;
29435 return false;
29437 case FLOAT_EXTEND:
29438 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
29439 *total = 0;
29440 return false;
29442 case ABS:
29443 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
29444 /* ??? SSE cost should be used here. */
29445 *total = cost->fabs;
29446 else if (X87_FLOAT_MODE_P (mode))
29447 *total = cost->fabs;
29448 else if (FLOAT_MODE_P (mode))
29449 /* ??? SSE vector cost should be used here. */
29450 *total = cost->fabs;
29451 return false;
29453 case SQRT:
29454 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
29455 /* ??? SSE cost should be used here. */
29456 *total = cost->fsqrt;
29457 else if (X87_FLOAT_MODE_P (mode))
29458 *total = cost->fsqrt;
29459 else if (FLOAT_MODE_P (mode))
29460 /* ??? SSE vector cost should be used here. */
29461 *total = cost->fsqrt;
29462 return false;
29464 case UNSPEC:
29465 if (XINT (x, 1) == UNSPEC_TP)
29466 *total = 0;
29467 return false;
29469 case VEC_SELECT:
29470 case VEC_CONCAT:
29471 case VEC_MERGE:
29472 case VEC_DUPLICATE:
29473 /* ??? Assume all of these vector manipulation patterns are
29474 recognizable. In which case they all pretty much have the
29475 same cost. */
29476 *total = COSTS_N_INSNS (1);
29477 return true;
29479 default:
29480 return false;
29484 #if TARGET_MACHO
29486 static int current_machopic_label_num;
29488 /* Given a symbol name and its associated stub, write out the
29489 definition of the stub. */
29491 void
29492 machopic_output_stub (FILE *file, const char *symb, const char *stub)
29494 unsigned int length;
29495 char *binder_name, *symbol_name, lazy_ptr_name[32];
29496 int label = ++current_machopic_label_num;
29498 /* For 64-bit we shouldn't get here. */
29499 gcc_assert (!TARGET_64BIT);
29501 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
29502 symb = targetm.strip_name_encoding (symb);
29504 length = strlen (stub);
29505 binder_name = XALLOCAVEC (char, length + 32);
29506 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
29508 length = strlen (symb);
29509 symbol_name = XALLOCAVEC (char, length + 32);
29510 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
29512 sprintf (lazy_ptr_name, "L%d$lz", label);
29514 if (MACHOPIC_ATT_STUB)
29515 switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
29516 else if (MACHOPIC_PURE)
29518 if (TARGET_DEEP_BRANCH_PREDICTION)
29519 switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
29520 else
29521 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
29523 else
29524 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
29526 fprintf (file, "%s:\n", stub);
29527 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
29529 if (MACHOPIC_ATT_STUB)
29531 fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
29533 else if (MACHOPIC_PURE)
29535 /* PIC stub. */
29536 if (TARGET_DEEP_BRANCH_PREDICTION)
29538 /* 25-byte PIC stub using "CALL get_pc_thunk". */
29539 rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
29540 output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */
29541 fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n", label, lazy_ptr_name, label);
29543 else
29545 /* 26-byte PIC stub using inline picbase: "CALL L42 ! L42: pop %eax". */
29546 fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%ecx\n", label, label);
29547 fprintf (file, "\tmovl %s-LPC$%d(%%ecx),%%ecx\n", lazy_ptr_name, label);
29549 fprintf (file, "\tjmp\t*%%ecx\n");
29551 else
29552 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
29554 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
29555 it needs no stub-binding-helper. */
29556 if (MACHOPIC_ATT_STUB)
29557 return;
29559 fprintf (file, "%s:\n", binder_name);
29561 if (MACHOPIC_PURE)
29563 fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name);
29564 fprintf (file, "\tpushl\t%%ecx\n");
29566 else
29567 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
29569 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
29571 /* N.B. Keep the correspondence of these
29572 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
29573 old-pic/new-pic/non-pic stubs; altering this will break
29574 compatibility with existing dylibs. */
29575 if (MACHOPIC_PURE)
29577 /* PIC stubs. */
29578 if (TARGET_DEEP_BRANCH_PREDICTION)
29579 /* 25-byte PIC stub using "CALL get_pc_thunk". */
29580 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
29581 else
29582 /* 26-byte PIC stub using inline picbase: "CALL L42 ! L42: pop %ebx". */
29583 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
29585 else
29586 /* 16-byte -mdynamic-no-pic stub. */
29587 switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]);
29589 fprintf (file, "%s:\n", lazy_ptr_name);
29590 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
29591 fprintf (file, ASM_LONG "%s\n", binder_name);
29593 #endif /* TARGET_MACHO */
29595 /* Order the registers for register allocator. */
29597 void
29598 x86_order_regs_for_local_alloc (void)
29600 int pos = 0;
29601 int i;
29603 /* First allocate the local general purpose registers. */
29604 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
29605 if (GENERAL_REGNO_P (i) && call_used_regs[i])
29606 reg_alloc_order [pos++] = i;
29608 /* Global general purpose registers. */
29609 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
29610 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
29611 reg_alloc_order [pos++] = i;
29613 /* x87 registers come first in case we are doing FP math
29614 using them. */
29615 if (!TARGET_SSE_MATH)
29616 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
29617 reg_alloc_order [pos++] = i;
29619 /* SSE registers. */
29620 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
29621 reg_alloc_order [pos++] = i;
29622 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
29623 reg_alloc_order [pos++] = i;
29625 /* x87 registers. */
29626 if (TARGET_SSE_MATH)
29627 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
29628 reg_alloc_order [pos++] = i;
29630 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
29631 reg_alloc_order [pos++] = i;
29633 /* Initialize the rest of array as we do not allocate some registers
29634 at all. */
29635 while (pos < FIRST_PSEUDO_REGISTER)
29636 reg_alloc_order [pos++] = 0;
29639 /* Handle a "callee_pop_aggregate_return" attribute; arguments as
29640 in struct attribute_spec handler. */
29641 static tree
29642 ix86_handle_callee_pop_aggregate_return (tree *node, tree name,
29643 tree args,
29644 int flags ATTRIBUTE_UNUSED,
29645 bool *no_add_attrs)
29647 if (TREE_CODE (*node) != FUNCTION_TYPE
29648 && TREE_CODE (*node) != METHOD_TYPE
29649 && TREE_CODE (*node) != FIELD_DECL
29650 && TREE_CODE (*node) != TYPE_DECL)
29652 warning (OPT_Wattributes, "%qE attribute only applies to functions",
29653 name);
29654 *no_add_attrs = true;
29655 return NULL_TREE;
29657 if (TARGET_64BIT)
29659 warning (OPT_Wattributes, "%qE attribute only available for 32-bit",
29660 name);
29661 *no_add_attrs = true;
29662 return NULL_TREE;
29664 if (is_attribute_p ("callee_pop_aggregate_return", name))
29666 tree cst;
29668 cst = TREE_VALUE (args);
29669 if (TREE_CODE (cst) != INTEGER_CST)
29671 warning (OPT_Wattributes,
29672 "%qE attribute requires an integer constant argument",
29673 name);
29674 *no_add_attrs = true;
29676 else if (compare_tree_int (cst, 0) != 0
29677 && compare_tree_int (cst, 1) != 0)
29679 warning (OPT_Wattributes,
29680 "argument to %qE attribute is neither zero, nor one",
29681 name);
29682 *no_add_attrs = true;
29685 return NULL_TREE;
29688 return NULL_TREE;
29691 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
29692 struct attribute_spec.handler. */
29693 static tree
29694 ix86_handle_abi_attribute (tree *node, tree name,
29695 tree args ATTRIBUTE_UNUSED,
29696 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
29698 if (TREE_CODE (*node) != FUNCTION_TYPE
29699 && TREE_CODE (*node) != METHOD_TYPE
29700 && TREE_CODE (*node) != FIELD_DECL
29701 && TREE_CODE (*node) != TYPE_DECL)
29703 warning (OPT_Wattributes, "%qE attribute only applies to functions",
29704 name);
29705 *no_add_attrs = true;
29706 return NULL_TREE;
29708 if (!TARGET_64BIT)
29710 warning (OPT_Wattributes, "%qE attribute only available for 64-bit",
29711 name);
29712 *no_add_attrs = true;
29713 return NULL_TREE;
29716 /* Can combine regparm with all attributes but fastcall. */
29717 if (is_attribute_p ("ms_abi", name))
29719 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
29721 error ("ms_abi and sysv_abi attributes are not compatible");
29724 return NULL_TREE;
29726 else if (is_attribute_p ("sysv_abi", name))
29728 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
29730 error ("ms_abi and sysv_abi attributes are not compatible");
29733 return NULL_TREE;
29736 return NULL_TREE;
29739 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
29740 struct attribute_spec.handler. */
29741 static tree
29742 ix86_handle_struct_attribute (tree *node, tree name,
29743 tree args ATTRIBUTE_UNUSED,
29744 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
29746 tree *type = NULL;
29747 if (DECL_P (*node))
29749 if (TREE_CODE (*node) == TYPE_DECL)
29750 type = &TREE_TYPE (*node);
29752 else
29753 type = node;
29755 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
29756 || TREE_CODE (*type) == UNION_TYPE)))
29758 warning (OPT_Wattributes, "%qE attribute ignored",
29759 name);
29760 *no_add_attrs = true;
29763 else if ((is_attribute_p ("ms_struct", name)
29764 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
29765 || ((is_attribute_p ("gcc_struct", name)
29766 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
29768 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
29769 name);
29770 *no_add_attrs = true;
29773 return NULL_TREE;
29776 static tree
29777 ix86_handle_fndecl_attribute (tree *node, tree name,
29778 tree args ATTRIBUTE_UNUSED,
29779 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
29781 if (TREE_CODE (*node) != FUNCTION_DECL)
29783 warning (OPT_Wattributes, "%qE attribute only applies to functions",
29784 name);
29785 *no_add_attrs = true;
29787 return NULL_TREE;
29790 static bool
29791 ix86_ms_bitfield_layout_p (const_tree record_type)
29793 return ((TARGET_MS_BITFIELD_LAYOUT
29794 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
29795 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
29798 /* Returns an expression indicating where the this parameter is
29799 located on entry to the FUNCTION. */
29801 static rtx
29802 x86_this_parameter (tree function)
29804 tree type = TREE_TYPE (function);
29805 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
29806 int nregs;
29808 if (TARGET_64BIT)
29810 const int *parm_regs;
29812 if (ix86_function_type_abi (type) == MS_ABI)
29813 parm_regs = x86_64_ms_abi_int_parameter_registers;
29814 else
29815 parm_regs = x86_64_int_parameter_registers;
29816 return gen_rtx_REG (DImode, parm_regs[aggr]);
29819 nregs = ix86_function_regparm (type, function);
29821 if (nregs > 0 && !stdarg_p (type))
29823 int regno;
29824 unsigned int ccvt = ix86_get_callcvt (type);
29826 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
29827 regno = aggr ? DX_REG : CX_REG;
29828 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
29830 regno = CX_REG;
29831 if (aggr)
29832 return gen_rtx_MEM (SImode,
29833 plus_constant (stack_pointer_rtx, 4));
29835 else
29837 regno = AX_REG;
29838 if (aggr)
29840 regno = DX_REG;
29841 if (nregs == 1)
29842 return gen_rtx_MEM (SImode,
29843 plus_constant (stack_pointer_rtx, 4));
29846 return gen_rtx_REG (SImode, regno);
29849 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, aggr ? 8 : 4));
29852 /* Determine whether x86_output_mi_thunk can succeed. */
29854 static bool
29855 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
29856 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
29857 HOST_WIDE_INT vcall_offset, const_tree function)
29859 /* 64-bit can handle anything. */
29860 if (TARGET_64BIT)
29861 return true;
29863 /* For 32-bit, everything's fine if we have one free register. */
29864 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
29865 return true;
29867 /* Need a free register for vcall_offset. */
29868 if (vcall_offset)
29869 return false;
29871 /* Need a free register for GOT references. */
29872 if (flag_pic && !targetm.binds_local_p (function))
29873 return false;
29875 /* Otherwise ok. */
29876 return true;
29879 /* Output the assembler code for a thunk function. THUNK_DECL is the
29880 declaration for the thunk function itself, FUNCTION is the decl for
29881 the target function. DELTA is an immediate constant offset to be
29882 added to THIS. If VCALL_OFFSET is nonzero, the word at
29883 *(*this + vcall_offset) should be added to THIS. */
29885 static void
29886 x86_output_mi_thunk (FILE *file,
29887 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
29888 HOST_WIDE_INT vcall_offset, tree function)
29890 rtx xops[3];
29891 rtx this_param = x86_this_parameter (function);
29892 rtx this_reg, tmp;
29894 /* Make sure unwind info is emitted for the thunk if needed. */
29895 final_start_function (emit_barrier (), file, 1);
29897 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
29898 pull it in now and let DELTA benefit. */
29899 if (REG_P (this_param))
29900 this_reg = this_param;
29901 else if (vcall_offset)
29903 /* Put the this parameter into %eax. */
29904 xops[0] = this_param;
29905 xops[1] = this_reg = gen_rtx_REG (Pmode, AX_REG);
29906 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
29908 else
29909 this_reg = NULL_RTX;
29911 /* Adjust the this parameter by a fixed constant. */
29912 if (delta)
29914 xops[0] = GEN_INT (delta);
29915 xops[1] = this_reg ? this_reg : this_param;
29916 if (TARGET_64BIT)
29918 if (!x86_64_general_operand (xops[0], DImode))
29920 tmp = gen_rtx_REG (DImode, R10_REG);
29921 xops[1] = tmp;
29922 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
29923 xops[0] = tmp;
29924 xops[1] = this_param;
29926 if (x86_maybe_negate_const_int (&xops[0], DImode))
29927 output_asm_insn ("sub{q}\t{%0, %1|%1, %0}", xops);
29928 else
29929 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
29931 else if (x86_maybe_negate_const_int (&xops[0], SImode))
29932 output_asm_insn ("sub{l}\t{%0, %1|%1, %0}", xops);
29933 else
29934 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
29937 /* Adjust the this parameter by a value stored in the vtable. */
29938 if (vcall_offset)
29940 if (TARGET_64BIT)
29941 tmp = gen_rtx_REG (DImode, R10_REG);
29942 else
29944 int tmp_regno = CX_REG;
29945 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function));
29946 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) != 0)
29947 tmp_regno = AX_REG;
29948 tmp = gen_rtx_REG (SImode, tmp_regno);
29951 xops[0] = gen_rtx_MEM (Pmode, this_reg);
29952 xops[1] = tmp;
29953 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
29955 /* Adjust the this parameter. */
29956 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
29957 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
29959 rtx tmp2 = gen_rtx_REG (DImode, R11_REG);
29960 xops[0] = GEN_INT (vcall_offset);
29961 xops[1] = tmp2;
29962 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
29963 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
29965 xops[1] = this_reg;
29966 output_asm_insn ("add%z1\t{%0, %1|%1, %0}", xops);
29969 /* If necessary, drop THIS back to its stack slot. */
29970 if (this_reg && this_reg != this_param)
29972 xops[0] = this_reg;
29973 xops[1] = this_param;
29974 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
29977 xops[0] = XEXP (DECL_RTL (function), 0);
29978 if (TARGET_64BIT)
29980 if (!flag_pic || targetm.binds_local_p (function)
29981 || DEFAULT_ABI == MS_ABI)
29982 output_asm_insn ("jmp\t%P0", xops);
29983 /* All thunks should be in the same object as their target,
29984 and thus binds_local_p should be true. */
29985 else if (TARGET_64BIT && cfun->machine->call_abi == MS_ABI)
29986 gcc_unreachable ();
29987 else
29989 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
29990 tmp = gen_rtx_CONST (Pmode, tmp);
29991 tmp = gen_rtx_MEM (QImode, tmp);
29992 xops[0] = tmp;
29993 output_asm_insn ("jmp\t%A0", xops);
29996 else
29998 if (!flag_pic || targetm.binds_local_p (function))
29999 output_asm_insn ("jmp\t%P0", xops);
30000 else
30001 #if TARGET_MACHO
30002 if (TARGET_MACHO)
30004 rtx sym_ref = XEXP (DECL_RTL (function), 0);
30005 if (TARGET_MACHO_BRANCH_ISLANDS)
30006 sym_ref = (gen_rtx_SYMBOL_REF
30007 (Pmode,
30008 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
30009 tmp = gen_rtx_MEM (QImode, sym_ref);
30010 xops[0] = tmp;
30011 output_asm_insn ("jmp\t%0", xops);
30013 else
30014 #endif /* TARGET_MACHO */
30016 tmp = gen_rtx_REG (SImode, CX_REG);
30017 output_set_got (tmp, NULL_RTX);
30019 xops[1] = tmp;
30020 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
30021 output_asm_insn ("jmp\t{*}%1", xops);
30024 final_end_function ();
30027 static void
30028 x86_file_start (void)
30030 default_file_start ();
30031 #if TARGET_MACHO
30032 darwin_file_start ();
30033 #endif
30034 if (X86_FILE_START_VERSION_DIRECTIVE)
30035 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
30036 if (X86_FILE_START_FLTUSED)
30037 fputs ("\t.global\t__fltused\n", asm_out_file);
30038 if (ix86_asm_dialect == ASM_INTEL)
30039 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
30043 x86_field_alignment (tree field, int computed)
30045 enum machine_mode mode;
30046 tree type = TREE_TYPE (field);
30048 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
30049 return computed;
30050 mode = TYPE_MODE (strip_array_types (type));
30051 if (mode == DFmode || mode == DCmode
30052 || GET_MODE_CLASS (mode) == MODE_INT
30053 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
30054 return MIN (32, computed);
30055 return computed;
30058 /* Output assembler code to FILE to increment profiler label # LABELNO
30059 for profiling a function entry. */
30060 void
30061 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
30063 const char *mcount_name = (flag_fentry ? MCOUNT_NAME_BEFORE_PROLOGUE
30064 : MCOUNT_NAME);
30066 if (TARGET_64BIT)
30068 #ifndef NO_PROFILE_COUNTERS
30069 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
30070 #endif
30072 if (DEFAULT_ABI == SYSV_ABI && flag_pic)
30073 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
30074 else
30075 fprintf (file, "\tcall\t%s\n", mcount_name);
30077 else if (flag_pic)
30079 #ifndef NO_PROFILE_COUNTERS
30080 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
30081 LPREFIX, labelno);
30082 #endif
30083 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
30085 else
30087 #ifndef NO_PROFILE_COUNTERS
30088 fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
30089 LPREFIX, labelno);
30090 #endif
30091 fprintf (file, "\tcall\t%s\n", mcount_name);
30095 /* We don't have exact information about the insn sizes, but we may assume
30096 quite safely that we are informed about all 1 byte insns and memory
30097 address sizes. This is enough to eliminate unnecessary padding in
30098 99% of cases. */
30100 static int
30101 min_insn_size (rtx insn)
30103 int l = 0, len;
30105 if (!INSN_P (insn) || !active_insn_p (insn))
30106 return 0;
30108 /* Discard alignments we've emit and jump instructions. */
30109 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
30110 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
30111 return 0;
30112 if (JUMP_TABLE_DATA_P (insn))
30113 return 0;
30115 /* Important case - calls are always 5 bytes.
30116 It is common to have many calls in the row. */
30117 if (CALL_P (insn)
30118 && symbolic_reference_mentioned_p (PATTERN (insn))
30119 && !SIBLING_CALL_P (insn))
30120 return 5;
30121 len = get_attr_length (insn);
30122 if (len <= 1)
30123 return 1;
30125 /* For normal instructions we rely on get_attr_length being exact,
30126 with a few exceptions. */
30127 if (!JUMP_P (insn))
30129 enum attr_type type = get_attr_type (insn);
30131 switch (type)
30133 case TYPE_MULTI:
30134 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
30135 || asm_noperands (PATTERN (insn)) >= 0)
30136 return 0;
30137 break;
30138 case TYPE_OTHER:
30139 case TYPE_FCMP:
30140 break;
30141 default:
30142 /* Otherwise trust get_attr_length. */
30143 return len;
30146 l = get_attr_length_address (insn);
30147 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
30148 l = 4;
30150 if (l)
30151 return 1+l;
30152 else
30153 return 2;
30156 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
30158 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
30159 window. */
30161 static void
30162 ix86_avoid_jump_mispredicts (void)
30164 rtx insn, start = get_insns ();
30165 int nbytes = 0, njumps = 0;
30166 int isjump = 0;
30168 /* Look for all minimal intervals of instructions containing 4 jumps.
30169 The intervals are bounded by START and INSN. NBYTES is the total
30170 size of instructions in the interval including INSN and not including
30171 START. When the NBYTES is smaller than 16 bytes, it is possible
30172 that the end of START and INSN ends up in the same 16byte page.
30174 The smallest offset in the page INSN can start is the case where START
30175 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
30176 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
30178 for (insn = start; insn; insn = NEXT_INSN (insn))
30180 int min_size;
30182 if (LABEL_P (insn))
30184 int align = label_to_alignment (insn);
30185 int max_skip = label_to_max_skip (insn);
30187 if (max_skip > 15)
30188 max_skip = 15;
30189 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
30190 already in the current 16 byte page, because otherwise
30191 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
30192 bytes to reach 16 byte boundary. */
30193 if (align <= 0
30194 || (align <= 3 && max_skip != (1 << align) - 1))
30195 max_skip = 0;
30196 if (dump_file)
30197 fprintf (dump_file, "Label %i with max_skip %i\n",
30198 INSN_UID (insn), max_skip);
30199 if (max_skip)
30201 while (nbytes + max_skip >= 16)
30203 start = NEXT_INSN (start);
30204 if ((JUMP_P (start)
30205 && GET_CODE (PATTERN (start)) != ADDR_VEC
30206 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
30207 || CALL_P (start))
30208 njumps--, isjump = 1;
30209 else
30210 isjump = 0;
30211 nbytes -= min_insn_size (start);
30214 continue;
30217 min_size = min_insn_size (insn);
30218 nbytes += min_size;
30219 if (dump_file)
30220 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
30221 INSN_UID (insn), min_size);
30222 if ((JUMP_P (insn)
30223 && GET_CODE (PATTERN (insn)) != ADDR_VEC
30224 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
30225 || CALL_P (insn))
30226 njumps++;
30227 else
30228 continue;
30230 while (njumps > 3)
30232 start = NEXT_INSN (start);
30233 if ((JUMP_P (start)
30234 && GET_CODE (PATTERN (start)) != ADDR_VEC
30235 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
30236 || CALL_P (start))
30237 njumps--, isjump = 1;
30238 else
30239 isjump = 0;
30240 nbytes -= min_insn_size (start);
30242 gcc_assert (njumps >= 0);
30243 if (dump_file)
30244 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
30245 INSN_UID (start), INSN_UID (insn), nbytes);
30247 if (njumps == 3 && isjump && nbytes < 16)
30249 int padsize = 15 - nbytes + min_insn_size (insn);
30251 if (dump_file)
30252 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
30253 INSN_UID (insn), padsize);
30254 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
30258 #endif
30260 /* AMD Athlon works faster
30261 when RET is not destination of conditional jump or directly preceded
30262 by other jump instruction. We avoid the penalty by inserting NOP just
30263 before the RET instructions in such cases. */
30264 static void
30265 ix86_pad_returns (void)
30267 edge e;
30268 edge_iterator ei;
30270 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
30272 basic_block bb = e->src;
30273 rtx ret = BB_END (bb);
30274 rtx prev;
30275 bool replace = false;
30277 if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN
30278 || optimize_bb_for_size_p (bb))
30279 continue;
30280 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
30281 if (active_insn_p (prev) || LABEL_P (prev))
30282 break;
30283 if (prev && LABEL_P (prev))
30285 edge e;
30286 edge_iterator ei;
30288 FOR_EACH_EDGE (e, ei, bb->preds)
30289 if (EDGE_FREQUENCY (e) && e->src->index >= 0
30290 && !(e->flags & EDGE_FALLTHRU))
30291 replace = true;
30293 if (!replace)
30295 prev = prev_active_insn (ret);
30296 if (prev
30297 && ((JUMP_P (prev) && any_condjump_p (prev))
30298 || CALL_P (prev)))
30299 replace = true;
30300 /* Empty functions get branch mispredict even when
30301 the jump destination is not visible to us. */
30302 if (!prev && !optimize_function_for_size_p (cfun))
30303 replace = true;
30305 if (replace)
30307 emit_jump_insn_before (gen_return_internal_long (), ret);
30308 delete_insn (ret);
30313 /* Count the minimum number of instructions in BB. Return 4 if the
30314 number of instructions >= 4. */
30316 static int
30317 ix86_count_insn_bb (basic_block bb)
30319 rtx insn;
30320 int insn_count = 0;
30322 /* Count number of instructions in this block. Return 4 if the number
30323 of instructions >= 4. */
30324 FOR_BB_INSNS (bb, insn)
30326 /* Only happen in exit blocks. */
30327 if (JUMP_P (insn)
30328 && GET_CODE (PATTERN (insn)) == RETURN)
30329 break;
30331 if (NONDEBUG_INSN_P (insn)
30332 && GET_CODE (PATTERN (insn)) != USE
30333 && GET_CODE (PATTERN (insn)) != CLOBBER)
30335 insn_count++;
30336 if (insn_count >= 4)
30337 return insn_count;
30341 return insn_count;
30345 /* Count the minimum number of instructions in code path in BB.
30346 Return 4 if the number of instructions >= 4. */
30348 static int
30349 ix86_count_insn (basic_block bb)
30351 edge e;
30352 edge_iterator ei;
30353 int min_prev_count;
30355 /* Only bother counting instructions along paths with no
30356 more than 2 basic blocks between entry and exit. Given
30357 that BB has an edge to exit, determine if a predecessor
30358 of BB has an edge from entry. If so, compute the number
30359 of instructions in the predecessor block. If there
30360 happen to be multiple such blocks, compute the minimum. */
30361 min_prev_count = 4;
30362 FOR_EACH_EDGE (e, ei, bb->preds)
30364 edge prev_e;
30365 edge_iterator prev_ei;
30367 if (e->src == ENTRY_BLOCK_PTR)
30369 min_prev_count = 0;
30370 break;
30372 FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
30374 if (prev_e->src == ENTRY_BLOCK_PTR)
30376 int count = ix86_count_insn_bb (e->src);
30377 if (count < min_prev_count)
30378 min_prev_count = count;
30379 break;
30384 if (min_prev_count < 4)
30385 min_prev_count += ix86_count_insn_bb (bb);
30387 return min_prev_count;
30390 /* Pad short funtion to 4 instructions. */
30392 static void
30393 ix86_pad_short_function (void)
30395 edge e;
30396 edge_iterator ei;
30398 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
30400 rtx ret = BB_END (e->src);
30401 if (JUMP_P (ret) && GET_CODE (PATTERN (ret)) == RETURN)
30403 int insn_count = ix86_count_insn (e->src);
30405 /* Pad short function. */
30406 if (insn_count < 4)
30408 rtx insn = ret;
30410 /* Find epilogue. */
30411 while (insn
30412 && (!NOTE_P (insn)
30413 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
30414 insn = PREV_INSN (insn);
30416 if (!insn)
30417 insn = ret;
30419 /* Two NOPs count as one instruction. */
30420 insn_count = 2 * (4 - insn_count);
30421 emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
30427 /* Implement machine specific optimizations. We implement padding of returns
30428 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
30429 static void
30430 ix86_reorg (void)
30432 /* We are freeing block_for_insn in the toplev to keep compatibility
30433 with old MDEP_REORGS that are not CFG based. Recompute it now. */
30434 compute_bb_for_insn ();
30436 /* Run the vzeroupper optimization if needed. */
30437 if (TARGET_VZEROUPPER)
30438 move_or_delete_vzeroupper ();
30440 if (optimize && optimize_function_for_speed_p (cfun))
30442 if (TARGET_PAD_SHORT_FUNCTION)
30443 ix86_pad_short_function ();
30444 else if (TARGET_PAD_RETURNS)
30445 ix86_pad_returns ();
30446 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
30447 if (TARGET_FOUR_JUMP_LIMIT)
30448 ix86_avoid_jump_mispredicts ();
30449 #endif
30453 /* Return nonzero when QImode register that must be represented via REX prefix
30454 is used. */
30455 bool
30456 x86_extended_QIreg_mentioned_p (rtx insn)
30458 int i;
30459 extract_insn_cached (insn);
30460 for (i = 0; i < recog_data.n_operands; i++)
30461 if (REG_P (recog_data.operand[i])
30462 && REGNO (recog_data.operand[i]) > BX_REG)
30463 return true;
30464 return false;
30467 /* Return nonzero when P points to register encoded via REX prefix.
30468 Called via for_each_rtx. */
30469 static int
30470 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
30472 unsigned int regno;
30473 if (!REG_P (*p))
30474 return 0;
30475 regno = REGNO (*p);
30476 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
30479 /* Return true when INSN mentions register that must be encoded using REX
30480 prefix. */
30481 bool
30482 x86_extended_reg_mentioned_p (rtx insn)
30484 return for_each_rtx (INSN_P (insn) ? &PATTERN (insn) : &insn,
30485 extended_reg_mentioned_1, NULL);
30488 /* If profitable, negate (without causing overflow) integer constant
30489 of mode MODE at location LOC. Return true in this case. */
30490 bool
30491 x86_maybe_negate_const_int (rtx *loc, enum machine_mode mode)
30493 HOST_WIDE_INT val;
30495 if (!CONST_INT_P (*loc))
30496 return false;
30498 switch (mode)
30500 case DImode:
30501 /* DImode x86_64 constants must fit in 32 bits. */
30502 gcc_assert (x86_64_immediate_operand (*loc, mode));
30504 mode = SImode;
30505 break;
30507 case SImode:
30508 case HImode:
30509 case QImode:
30510 break;
30512 default:
30513 gcc_unreachable ();
30516 /* Avoid overflows. */
30517 if (mode_signbit_p (mode, *loc))
30518 return false;
30520 val = INTVAL (*loc);
30522 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
30523 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
30524 if ((val < 0 && val != -128)
30525 || val == 128)
30527 *loc = GEN_INT (-val);
30528 return true;
30531 return false;
30534 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
30535 optabs would emit if we didn't have TFmode patterns. */
30537 void
30538 x86_emit_floatuns (rtx operands[2])
30540 rtx neglab, donelab, i0, i1, f0, in, out;
30541 enum machine_mode mode, inmode;
30543 inmode = GET_MODE (operands[1]);
30544 gcc_assert (inmode == SImode || inmode == DImode);
30546 out = operands[0];
30547 in = force_reg (inmode, operands[1]);
30548 mode = GET_MODE (out);
30549 neglab = gen_label_rtx ();
30550 donelab = gen_label_rtx ();
30551 f0 = gen_reg_rtx (mode);
30553 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
30555 expand_float (out, in, 0);
30557 emit_jump_insn (gen_jump (donelab));
30558 emit_barrier ();
30560 emit_label (neglab);
30562 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
30563 1, OPTAB_DIRECT);
30564 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
30565 1, OPTAB_DIRECT);
30566 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
30568 expand_float (f0, i0, 0);
30570 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
30572 emit_label (donelab);
30575 /* AVX does not support 32-byte integer vector operations,
30576 thus the longest vector we are faced with is V16QImode. */
30577 #define MAX_VECT_LEN 16
30579 struct expand_vec_perm_d
30581 rtx target, op0, op1;
30582 unsigned char perm[MAX_VECT_LEN];
30583 enum machine_mode vmode;
30584 unsigned char nelt;
30585 bool testing_p;
30588 static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
30589 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
30591 /* Get a vector mode of the same size as the original but with elements
30592 twice as wide. This is only guaranteed to apply to integral vectors. */
30594 static inline enum machine_mode
30595 get_mode_wider_vector (enum machine_mode o)
30597 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
30598 enum machine_mode n = GET_MODE_WIDER_MODE (o);
30599 gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2);
30600 gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n));
30601 return n;
30604 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
30605 with all elements equal to VAR. Return true if successful. */
30607 static bool
30608 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
30609 rtx target, rtx val)
30611 bool ok;
30613 switch (mode)
30615 case V2SImode:
30616 case V2SFmode:
30617 if (!mmx_ok)
30618 return false;
30619 /* FALLTHRU */
30621 case V4DFmode:
30622 case V4DImode:
30623 case V8SFmode:
30624 case V8SImode:
30625 case V2DFmode:
30626 case V2DImode:
30627 case V4SFmode:
30628 case V4SImode:
30630 rtx insn, dup;
30632 /* First attempt to recognize VAL as-is. */
30633 dup = gen_rtx_VEC_DUPLICATE (mode, val);
30634 insn = emit_insn (gen_rtx_SET (VOIDmode, target, dup));
30635 if (recog_memoized (insn) < 0)
30637 rtx seq;
30638 /* If that fails, force VAL into a register. */
30640 start_sequence ();
30641 XEXP (dup, 0) = force_reg (GET_MODE_INNER (mode), val);
30642 seq = get_insns ();
30643 end_sequence ();
30644 if (seq)
30645 emit_insn_before (seq, insn);
30647 ok = recog_memoized (insn) >= 0;
30648 gcc_assert (ok);
30651 return true;
30653 case V4HImode:
30654 if (!mmx_ok)
30655 return false;
30656 if (TARGET_SSE || TARGET_3DNOW_A)
30658 rtx x;
30660 val = gen_lowpart (SImode, val);
30661 x = gen_rtx_TRUNCATE (HImode, val);
30662 x = gen_rtx_VEC_DUPLICATE (mode, x);
30663 emit_insn (gen_rtx_SET (VOIDmode, target, x));
30664 return true;
30666 goto widen;
30668 case V8QImode:
30669 if (!mmx_ok)
30670 return false;
30671 goto widen;
30673 case V8HImode:
30674 if (TARGET_SSE2)
30676 struct expand_vec_perm_d dperm;
30677 rtx tmp1, tmp2;
30679 permute:
30680 memset (&dperm, 0, sizeof (dperm));
30681 dperm.target = target;
30682 dperm.vmode = mode;
30683 dperm.nelt = GET_MODE_NUNITS (mode);
30684 dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
30686 /* Extend to SImode using a paradoxical SUBREG. */
30687 tmp1 = gen_reg_rtx (SImode);
30688 emit_move_insn (tmp1, gen_lowpart (SImode, val));
30690 /* Insert the SImode value as low element of a V4SImode vector. */
30691 tmp2 = gen_lowpart (V4SImode, dperm.op0);
30692 emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
30694 ok = (expand_vec_perm_1 (&dperm)
30695 || expand_vec_perm_broadcast_1 (&dperm));
30696 gcc_assert (ok);
30697 return ok;
30699 goto widen;
30701 case V16QImode:
30702 if (TARGET_SSE2)
30703 goto permute;
30704 goto widen;
30706 widen:
30707 /* Replicate the value once into the next wider mode and recurse. */
30709 enum machine_mode smode, wsmode, wvmode;
30710 rtx x;
30712 smode = GET_MODE_INNER (mode);
30713 wvmode = get_mode_wider_vector (mode);
30714 wsmode = GET_MODE_INNER (wvmode);
30716 val = convert_modes (wsmode, smode, val, true);
30717 x = expand_simple_binop (wsmode, ASHIFT, val,
30718 GEN_INT (GET_MODE_BITSIZE (smode)),
30719 NULL_RTX, 1, OPTAB_LIB_WIDEN);
30720 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
30722 x = gen_lowpart (wvmode, target);
30723 ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
30724 gcc_assert (ok);
30725 return ok;
30728 case V16HImode:
30729 case V32QImode:
30731 enum machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
30732 rtx x = gen_reg_rtx (hvmode);
30734 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
30735 gcc_assert (ok);
30737 x = gen_rtx_VEC_CONCAT (mode, x, x);
30738 emit_insn (gen_rtx_SET (VOIDmode, target, x));
30740 return true;
30742 default:
30743 return false;
30747 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
30748 whose ONE_VAR element is VAR, and other elements are zero. Return true
30749 if successful. */
30751 static bool
30752 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
30753 rtx target, rtx var, int one_var)
30755 enum machine_mode vsimode;
30756 rtx new_target;
30757 rtx x, tmp;
30758 bool use_vector_set = false;
30760 switch (mode)
30762 case V2DImode:
30763 /* For SSE4.1, we normally use vector set. But if the second
30764 element is zero and inter-unit moves are OK, we use movq
30765 instead. */
30766 use_vector_set = (TARGET_64BIT
30767 && TARGET_SSE4_1
30768 && !(TARGET_INTER_UNIT_MOVES
30769 && one_var == 0));
30770 break;
30771 case V16QImode:
30772 case V4SImode:
30773 case V4SFmode:
30774 use_vector_set = TARGET_SSE4_1;
30775 break;
30776 case V8HImode:
30777 use_vector_set = TARGET_SSE2;
30778 break;
30779 case V4HImode:
30780 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
30781 break;
30782 case V32QImode:
30783 case V16HImode:
30784 case V8SImode:
30785 case V8SFmode:
30786 case V4DFmode:
30787 use_vector_set = TARGET_AVX;
30788 break;
30789 case V4DImode:
30790 /* Use ix86_expand_vector_set in 64bit mode only. */
30791 use_vector_set = TARGET_AVX && TARGET_64BIT;
30792 break;
30793 default:
30794 break;
30797 if (use_vector_set)
30799 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
30800 var = force_reg (GET_MODE_INNER (mode), var);
30801 ix86_expand_vector_set (mmx_ok, target, var, one_var);
30802 return true;
30805 switch (mode)
30807 case V2SFmode:
30808 case V2SImode:
30809 if (!mmx_ok)
30810 return false;
30811 /* FALLTHRU */
30813 case V2DFmode:
30814 case V2DImode:
30815 if (one_var != 0)
30816 return false;
30817 var = force_reg (GET_MODE_INNER (mode), var);
30818 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
30819 emit_insn (gen_rtx_SET (VOIDmode, target, x));
30820 return true;
30822 case V4SFmode:
30823 case V4SImode:
30824 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
30825 new_target = gen_reg_rtx (mode);
30826 else
30827 new_target = target;
30828 var = force_reg (GET_MODE_INNER (mode), var);
30829 x = gen_rtx_VEC_DUPLICATE (mode, var);
30830 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
30831 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
30832 if (one_var != 0)
30834 /* We need to shuffle the value to the correct position, so
30835 create a new pseudo to store the intermediate result. */
30837 /* With SSE2, we can use the integer shuffle insns. */
30838 if (mode != V4SFmode && TARGET_SSE2)
30840 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
30841 const1_rtx,
30842 GEN_INT (one_var == 1 ? 0 : 1),
30843 GEN_INT (one_var == 2 ? 0 : 1),
30844 GEN_INT (one_var == 3 ? 0 : 1)));
30845 if (target != new_target)
30846 emit_move_insn (target, new_target);
30847 return true;
30850 /* Otherwise convert the intermediate result to V4SFmode and
30851 use the SSE1 shuffle instructions. */
30852 if (mode != V4SFmode)
30854 tmp = gen_reg_rtx (V4SFmode);
30855 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
30857 else
30858 tmp = new_target;
30860 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
30861 const1_rtx,
30862 GEN_INT (one_var == 1 ? 0 : 1),
30863 GEN_INT (one_var == 2 ? 0+4 : 1+4),
30864 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
30866 if (mode != V4SFmode)
30867 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
30868 else if (tmp != target)
30869 emit_move_insn (target, tmp);
30871 else if (target != new_target)
30872 emit_move_insn (target, new_target);
30873 return true;
30875 case V8HImode:
30876 case V16QImode:
30877 vsimode = V4SImode;
30878 goto widen;
30879 case V4HImode:
30880 case V8QImode:
30881 if (!mmx_ok)
30882 return false;
30883 vsimode = V2SImode;
30884 goto widen;
30885 widen:
30886 if (one_var != 0)
30887 return false;
30889 /* Zero extend the variable element to SImode and recurse. */
30890 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
30892 x = gen_reg_rtx (vsimode);
30893 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
30894 var, one_var))
30895 gcc_unreachable ();
30897 emit_move_insn (target, gen_lowpart (mode, x));
30898 return true;
30900 default:
30901 return false;
30905 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
30906 consisting of the values in VALS. It is known that all elements
30907 except ONE_VAR are constants. Return true if successful. */
30909 static bool
30910 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
30911 rtx target, rtx vals, int one_var)
30913 rtx var = XVECEXP (vals, 0, one_var);
30914 enum machine_mode wmode;
30915 rtx const_vec, x;
30917 const_vec = copy_rtx (vals);
30918 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
30919 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
30921 switch (mode)
30923 case V2DFmode:
30924 case V2DImode:
30925 case V2SFmode:
30926 case V2SImode:
30927 /* For the two element vectors, it's just as easy to use
30928 the general case. */
30929 return false;
30931 case V4DImode:
30932 /* Use ix86_expand_vector_set in 64bit mode only. */
30933 if (!TARGET_64BIT)
30934 return false;
30935 case V4DFmode:
30936 case V8SFmode:
30937 case V8SImode:
30938 case V16HImode:
30939 case V32QImode:
30940 case V4SFmode:
30941 case V4SImode:
30942 case V8HImode:
30943 case V4HImode:
30944 break;
30946 case V16QImode:
30947 if (TARGET_SSE4_1)
30948 break;
30949 wmode = V8HImode;
30950 goto widen;
30951 case V8QImode:
30952 wmode = V4HImode;
30953 goto widen;
30954 widen:
30955 /* There's no way to set one QImode entry easily. Combine
30956 the variable value with its adjacent constant value, and
30957 promote to an HImode set. */
30958 x = XVECEXP (vals, 0, one_var ^ 1);
30959 if (one_var & 1)
30961 var = convert_modes (HImode, QImode, var, true);
30962 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
30963 NULL_RTX, 1, OPTAB_LIB_WIDEN);
30964 x = GEN_INT (INTVAL (x) & 0xff);
30966 else
30968 var = convert_modes (HImode, QImode, var, true);
30969 x = gen_int_mode (INTVAL (x) << 8, HImode);
30971 if (x != const0_rtx)
30972 var = expand_simple_binop (HImode, IOR, var, x, var,
30973 1, OPTAB_LIB_WIDEN);
30975 x = gen_reg_rtx (wmode);
30976 emit_move_insn (x, gen_lowpart (wmode, const_vec));
30977 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
30979 emit_move_insn (target, gen_lowpart (mode, x));
30980 return true;
30982 default:
30983 return false;
30986 emit_move_insn (target, const_vec);
30987 ix86_expand_vector_set (mmx_ok, target, var, one_var);
30988 return true;
30991 /* A subroutine of ix86_expand_vector_init_general. Use vector
30992 concatenate to handle the most general case: all values variable,
30993 and none identical. */
30995 static void
30996 ix86_expand_vector_init_concat (enum machine_mode mode,
30997 rtx target, rtx *ops, int n)
30999 enum machine_mode cmode, hmode = VOIDmode;
31000 rtx first[8], second[4];
31001 rtvec v;
31002 int i, j;
31004 switch (n)
31006 case 2:
31007 switch (mode)
31009 case V8SImode:
31010 cmode = V4SImode;
31011 break;
31012 case V8SFmode:
31013 cmode = V4SFmode;
31014 break;
31015 case V4DImode:
31016 cmode = V2DImode;
31017 break;
31018 case V4DFmode:
31019 cmode = V2DFmode;
31020 break;
31021 case V4SImode:
31022 cmode = V2SImode;
31023 break;
31024 case V4SFmode:
31025 cmode = V2SFmode;
31026 break;
31027 case V2DImode:
31028 cmode = DImode;
31029 break;
31030 case V2SImode:
31031 cmode = SImode;
31032 break;
31033 case V2DFmode:
31034 cmode = DFmode;
31035 break;
31036 case V2SFmode:
31037 cmode = SFmode;
31038 break;
31039 default:
31040 gcc_unreachable ();
31043 if (!register_operand (ops[1], cmode))
31044 ops[1] = force_reg (cmode, ops[1]);
31045 if (!register_operand (ops[0], cmode))
31046 ops[0] = force_reg (cmode, ops[0]);
31047 emit_insn (gen_rtx_SET (VOIDmode, target,
31048 gen_rtx_VEC_CONCAT (mode, ops[0],
31049 ops[1])));
31050 break;
31052 case 4:
31053 switch (mode)
31055 case V4DImode:
31056 cmode = V2DImode;
31057 break;
31058 case V4DFmode:
31059 cmode = V2DFmode;
31060 break;
31061 case V4SImode:
31062 cmode = V2SImode;
31063 break;
31064 case V4SFmode:
31065 cmode = V2SFmode;
31066 break;
31067 default:
31068 gcc_unreachable ();
31070 goto half;
31072 case 8:
31073 switch (mode)
31075 case V8SImode:
31076 cmode = V2SImode;
31077 hmode = V4SImode;
31078 break;
31079 case V8SFmode:
31080 cmode = V2SFmode;
31081 hmode = V4SFmode;
31082 break;
31083 default:
31084 gcc_unreachable ();
31086 goto half;
31088 half:
31089 /* FIXME: We process inputs backward to help RA. PR 36222. */
31090 i = n - 1;
31091 j = (n >> 1) - 1;
31092 for (; i > 0; i -= 2, j--)
31094 first[j] = gen_reg_rtx (cmode);
31095 v = gen_rtvec (2, ops[i - 1], ops[i]);
31096 ix86_expand_vector_init (false, first[j],
31097 gen_rtx_PARALLEL (cmode, v));
31100 n >>= 1;
31101 if (n > 2)
31103 gcc_assert (hmode != VOIDmode);
31104 for (i = j = 0; i < n; i += 2, j++)
31106 second[j] = gen_reg_rtx (hmode);
31107 ix86_expand_vector_init_concat (hmode, second [j],
31108 &first [i], 2);
31110 n >>= 1;
31111 ix86_expand_vector_init_concat (mode, target, second, n);
31113 else
31114 ix86_expand_vector_init_concat (mode, target, first, n);
31115 break;
31117 default:
31118 gcc_unreachable ();
31122 /* A subroutine of ix86_expand_vector_init_general. Use vector
31123 interleave to handle the most general case: all values variable,
31124 and none identical. */
31126 static void
31127 ix86_expand_vector_init_interleave (enum machine_mode mode,
31128 rtx target, rtx *ops, int n)
31130 enum machine_mode first_imode, second_imode, third_imode, inner_mode;
31131 int i, j;
31132 rtx op0, op1;
31133 rtx (*gen_load_even) (rtx, rtx, rtx);
31134 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
31135 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
31137 switch (mode)
31139 case V8HImode:
31140 gen_load_even = gen_vec_setv8hi;
31141 gen_interleave_first_low = gen_vec_interleave_lowv4si;
31142 gen_interleave_second_low = gen_vec_interleave_lowv2di;
31143 inner_mode = HImode;
31144 first_imode = V4SImode;
31145 second_imode = V2DImode;
31146 third_imode = VOIDmode;
31147 break;
31148 case V16QImode:
31149 gen_load_even = gen_vec_setv16qi;
31150 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
31151 gen_interleave_second_low = gen_vec_interleave_lowv4si;
31152 inner_mode = QImode;
31153 first_imode = V8HImode;
31154 second_imode = V4SImode;
31155 third_imode = V2DImode;
31156 break;
31157 default:
31158 gcc_unreachable ();
31161 for (i = 0; i < n; i++)
31163 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
31164 op0 = gen_reg_rtx (SImode);
31165 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
31167 /* Insert the SImode value as low element of V4SImode vector. */
31168 op1 = gen_reg_rtx (V4SImode);
31169 op0 = gen_rtx_VEC_MERGE (V4SImode,
31170 gen_rtx_VEC_DUPLICATE (V4SImode,
31171 op0),
31172 CONST0_RTX (V4SImode),
31173 const1_rtx);
31174 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
31176 /* Cast the V4SImode vector back to a vector in orignal mode. */
31177 op0 = gen_reg_rtx (mode);
31178 emit_move_insn (op0, gen_lowpart (mode, op1));
31180 /* Load even elements into the second positon. */
31181 emit_insn (gen_load_even (op0,
31182 force_reg (inner_mode,
31183 ops [i + i + 1]),
31184 const1_rtx));
31186 /* Cast vector to FIRST_IMODE vector. */
31187 ops[i] = gen_reg_rtx (first_imode);
31188 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
31191 /* Interleave low FIRST_IMODE vectors. */
31192 for (i = j = 0; i < n; i += 2, j++)
31194 op0 = gen_reg_rtx (first_imode);
31195 emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1]));
31197 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
31198 ops[j] = gen_reg_rtx (second_imode);
31199 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
31202 /* Interleave low SECOND_IMODE vectors. */
31203 switch (second_imode)
31205 case V4SImode:
31206 for (i = j = 0; i < n / 2; i += 2, j++)
31208 op0 = gen_reg_rtx (second_imode);
31209 emit_insn (gen_interleave_second_low (op0, ops[i],
31210 ops[i + 1]));
31212 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
31213 vector. */
31214 ops[j] = gen_reg_rtx (third_imode);
31215 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
31217 second_imode = V2DImode;
31218 gen_interleave_second_low = gen_vec_interleave_lowv2di;
31219 /* FALLTHRU */
31221 case V2DImode:
31222 op0 = gen_reg_rtx (second_imode);
31223 emit_insn (gen_interleave_second_low (op0, ops[0],
31224 ops[1]));
31226 /* Cast the SECOND_IMODE vector back to a vector on original
31227 mode. */
31228 emit_insn (gen_rtx_SET (VOIDmode, target,
31229 gen_lowpart (mode, op0)));
31230 break;
31232 default:
31233 gcc_unreachable ();
31237 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
31238 all values variable, and none identical. */
31240 static void
31241 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
31242 rtx target, rtx vals)
31244 rtx ops[32], op0, op1;
31245 enum machine_mode half_mode = VOIDmode;
31246 int n, i;
31248 switch (mode)
31250 case V2SFmode:
31251 case V2SImode:
31252 if (!mmx_ok && !TARGET_SSE)
31253 break;
31254 /* FALLTHRU */
31256 case V8SFmode:
31257 case V8SImode:
31258 case V4DFmode:
31259 case V4DImode:
31260 case V4SFmode:
31261 case V4SImode:
31262 case V2DFmode:
31263 case V2DImode:
31264 n = GET_MODE_NUNITS (mode);
31265 for (i = 0; i < n; i++)
31266 ops[i] = XVECEXP (vals, 0, i);
31267 ix86_expand_vector_init_concat (mode, target, ops, n);
31268 return;
31270 case V32QImode:
31271 half_mode = V16QImode;
31272 goto half;
31274 case V16HImode:
31275 half_mode = V8HImode;
31276 goto half;
31278 half:
31279 n = GET_MODE_NUNITS (mode);
31280 for (i = 0; i < n; i++)
31281 ops[i] = XVECEXP (vals, 0, i);
31282 op0 = gen_reg_rtx (half_mode);
31283 op1 = gen_reg_rtx (half_mode);
31284 ix86_expand_vector_init_interleave (half_mode, op0, ops,
31285 n >> 2);
31286 ix86_expand_vector_init_interleave (half_mode, op1,
31287 &ops [n >> 1], n >> 2);
31288 emit_insn (gen_rtx_SET (VOIDmode, target,
31289 gen_rtx_VEC_CONCAT (mode, op0, op1)));
31290 return;
31292 case V16QImode:
31293 if (!TARGET_SSE4_1)
31294 break;
31295 /* FALLTHRU */
31297 case V8HImode:
31298 if (!TARGET_SSE2)
31299 break;
31301 /* Don't use ix86_expand_vector_init_interleave if we can't
31302 move from GPR to SSE register directly. */
31303 if (!TARGET_INTER_UNIT_MOVES)
31304 break;
31306 n = GET_MODE_NUNITS (mode);
31307 for (i = 0; i < n; i++)
31308 ops[i] = XVECEXP (vals, 0, i);
31309 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
31310 return;
31312 case V4HImode:
31313 case V8QImode:
31314 break;
31316 default:
31317 gcc_unreachable ();
31321 int i, j, n_elts, n_words, n_elt_per_word;
31322 enum machine_mode inner_mode;
31323 rtx words[4], shift;
31325 inner_mode = GET_MODE_INNER (mode);
31326 n_elts = GET_MODE_NUNITS (mode);
31327 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
31328 n_elt_per_word = n_elts / n_words;
31329 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
31331 for (i = 0; i < n_words; ++i)
31333 rtx word = NULL_RTX;
31335 for (j = 0; j < n_elt_per_word; ++j)
31337 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
31338 elt = convert_modes (word_mode, inner_mode, elt, true);
31340 if (j == 0)
31341 word = elt;
31342 else
31344 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
31345 word, 1, OPTAB_LIB_WIDEN);
31346 word = expand_simple_binop (word_mode, IOR, word, elt,
31347 word, 1, OPTAB_LIB_WIDEN);
31351 words[i] = word;
31354 if (n_words == 1)
31355 emit_move_insn (target, gen_lowpart (mode, words[0]));
31356 else if (n_words == 2)
31358 rtx tmp = gen_reg_rtx (mode);
31359 emit_clobber (tmp);
31360 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
31361 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
31362 emit_move_insn (target, tmp);
31364 else if (n_words == 4)
31366 rtx tmp = gen_reg_rtx (V4SImode);
31367 gcc_assert (word_mode == SImode);
31368 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
31369 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
31370 emit_move_insn (target, gen_lowpart (mode, tmp));
31372 else
31373 gcc_unreachable ();
31377 /* Initialize vector TARGET via VALS. Suppress the use of MMX
31378 instructions unless MMX_OK is true. */
31380 void
31381 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
31383 enum machine_mode mode = GET_MODE (target);
31384 enum machine_mode inner_mode = GET_MODE_INNER (mode);
31385 int n_elts = GET_MODE_NUNITS (mode);
31386 int n_var = 0, one_var = -1;
31387 bool all_same = true, all_const_zero = true;
31388 int i;
31389 rtx x;
31391 for (i = 0; i < n_elts; ++i)
31393 x = XVECEXP (vals, 0, i);
31394 if (!(CONST_INT_P (x)
31395 || GET_CODE (x) == CONST_DOUBLE
31396 || GET_CODE (x) == CONST_FIXED))
31397 n_var++, one_var = i;
31398 else if (x != CONST0_RTX (inner_mode))
31399 all_const_zero = false;
31400 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
31401 all_same = false;
31404 /* Constants are best loaded from the constant pool. */
31405 if (n_var == 0)
31407 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
31408 return;
31411 /* If all values are identical, broadcast the value. */
31412 if (all_same
31413 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
31414 XVECEXP (vals, 0, 0)))
31415 return;
31417 /* Values where only one field is non-constant are best loaded from
31418 the pool and overwritten via move later. */
31419 if (n_var == 1)
31421 if (all_const_zero
31422 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
31423 XVECEXP (vals, 0, one_var),
31424 one_var))
31425 return;
31427 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
31428 return;
31431 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
31434 void
31435 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
31437 enum machine_mode mode = GET_MODE (target);
31438 enum machine_mode inner_mode = GET_MODE_INNER (mode);
31439 enum machine_mode half_mode;
31440 bool use_vec_merge = false;
31441 rtx tmp;
31442 static rtx (*gen_extract[6][2]) (rtx, rtx)
31444 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
31445 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
31446 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
31447 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
31448 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
31449 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
31451 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
31453 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
31454 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
31455 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
31456 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
31457 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
31458 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
31460 int i, j, n;
31462 switch (mode)
31464 case V2SFmode:
31465 case V2SImode:
31466 if (mmx_ok)
31468 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
31469 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
31470 if (elt == 0)
31471 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
31472 else
31473 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
31474 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
31475 return;
31477 break;
31479 case V2DImode:
31480 use_vec_merge = TARGET_SSE4_1 && TARGET_64BIT;
31481 if (use_vec_merge)
31482 break;
31484 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
31485 ix86_expand_vector_extract (false, tmp, target, 1 - elt);
31486 if (elt == 0)
31487 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
31488 else
31489 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
31490 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
31491 return;
31493 case V2DFmode:
31495 rtx op0, op1;
31497 /* For the two element vectors, we implement a VEC_CONCAT with
31498 the extraction of the other element. */
31500 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
31501 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
31503 if (elt == 0)
31504 op0 = val, op1 = tmp;
31505 else
31506 op0 = tmp, op1 = val;
31508 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
31509 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
31511 return;
31513 case V4SFmode:
31514 use_vec_merge = TARGET_SSE4_1;
31515 if (use_vec_merge)
31516 break;
31518 switch (elt)
31520 case 0:
31521 use_vec_merge = true;
31522 break;
31524 case 1:
31525 /* tmp = target = A B C D */
31526 tmp = copy_to_reg (target);
31527 /* target = A A B B */
31528 emit_insn (gen_vec_interleave_lowv4sf (target, target, target));
31529 /* target = X A B B */
31530 ix86_expand_vector_set (false, target, val, 0);
31531 /* target = A X C D */
31532 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
31533 const1_rtx, const0_rtx,
31534 GEN_INT (2+4), GEN_INT (3+4)));
31535 return;
31537 case 2:
31538 /* tmp = target = A B C D */
31539 tmp = copy_to_reg (target);
31540 /* tmp = X B C D */
31541 ix86_expand_vector_set (false, tmp, val, 0);
31542 /* target = A B X D */
31543 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
31544 const0_rtx, const1_rtx,
31545 GEN_INT (0+4), GEN_INT (3+4)));
31546 return;
31548 case 3:
31549 /* tmp = target = A B C D */
31550 tmp = copy_to_reg (target);
31551 /* tmp = X B C D */
31552 ix86_expand_vector_set (false, tmp, val, 0);
31553 /* target = A B X D */
31554 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
31555 const0_rtx, const1_rtx,
31556 GEN_INT (2+4), GEN_INT (0+4)));
31557 return;
31559 default:
31560 gcc_unreachable ();
31562 break;
31564 case V4SImode:
31565 use_vec_merge = TARGET_SSE4_1;
31566 if (use_vec_merge)
31567 break;
31569 /* Element 0 handled by vec_merge below. */
31570 if (elt == 0)
31572 use_vec_merge = true;
31573 break;
31576 if (TARGET_SSE2)
31578 /* With SSE2, use integer shuffles to swap element 0 and ELT,
31579 store into element 0, then shuffle them back. */
31581 rtx order[4];
31583 order[0] = GEN_INT (elt);
31584 order[1] = const1_rtx;
31585 order[2] = const2_rtx;
31586 order[3] = GEN_INT (3);
31587 order[elt] = const0_rtx;
31589 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
31590 order[1], order[2], order[3]));
31592 ix86_expand_vector_set (false, target, val, 0);
31594 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
31595 order[1], order[2], order[3]));
31597 else
31599 /* For SSE1, we have to reuse the V4SF code. */
31600 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
31601 gen_lowpart (SFmode, val), elt);
31603 return;
31605 case V8HImode:
31606 use_vec_merge = TARGET_SSE2;
31607 break;
31608 case V4HImode:
31609 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
31610 break;
31612 case V16QImode:
31613 use_vec_merge = TARGET_SSE4_1;
31614 break;
31616 case V8QImode:
31617 break;
31619 case V32QImode:
31620 half_mode = V16QImode;
31621 j = 0;
31622 n = 16;
31623 goto half;
31625 case V16HImode:
31626 half_mode = V8HImode;
31627 j = 1;
31628 n = 8;
31629 goto half;
31631 case V8SImode:
31632 half_mode = V4SImode;
31633 j = 2;
31634 n = 4;
31635 goto half;
31637 case V4DImode:
31638 half_mode = V2DImode;
31639 j = 3;
31640 n = 2;
31641 goto half;
31643 case V8SFmode:
31644 half_mode = V4SFmode;
31645 j = 4;
31646 n = 4;
31647 goto half;
31649 case V4DFmode:
31650 half_mode = V2DFmode;
31651 j = 5;
31652 n = 2;
31653 goto half;
31655 half:
31656 /* Compute offset. */
31657 i = elt / n;
31658 elt %= n;
31660 gcc_assert (i <= 1);
31662 /* Extract the half. */
31663 tmp = gen_reg_rtx (half_mode);
31664 emit_insn (gen_extract[j][i] (tmp, target));
31666 /* Put val in tmp at elt. */
31667 ix86_expand_vector_set (false, tmp, val, elt);
31669 /* Put it back. */
31670 emit_insn (gen_insert[j][i] (target, target, tmp));
31671 return;
31673 default:
31674 break;
31677 if (use_vec_merge)
31679 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
31680 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
31681 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
31683 else
31685 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
31687 emit_move_insn (mem, target);
31689 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
31690 emit_move_insn (tmp, val);
31692 emit_move_insn (target, mem);
31696 void
31697 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
31699 enum machine_mode mode = GET_MODE (vec);
31700 enum machine_mode inner_mode = GET_MODE_INNER (mode);
31701 bool use_vec_extr = false;
31702 rtx tmp;
31704 switch (mode)
31706 case V2SImode:
31707 case V2SFmode:
31708 if (!mmx_ok)
31709 break;
31710 /* FALLTHRU */
31712 case V2DFmode:
31713 case V2DImode:
31714 use_vec_extr = true;
31715 break;
31717 case V4SFmode:
31718 use_vec_extr = TARGET_SSE4_1;
31719 if (use_vec_extr)
31720 break;
31722 switch (elt)
31724 case 0:
31725 tmp = vec;
31726 break;
31728 case 1:
31729 case 3:
31730 tmp = gen_reg_rtx (mode);
31731 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
31732 GEN_INT (elt), GEN_INT (elt),
31733 GEN_INT (elt+4), GEN_INT (elt+4)));
31734 break;
31736 case 2:
31737 tmp = gen_reg_rtx (mode);
31738 emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec));
31739 break;
31741 default:
31742 gcc_unreachable ();
31744 vec = tmp;
31745 use_vec_extr = true;
31746 elt = 0;
31747 break;
31749 case V4SImode:
31750 use_vec_extr = TARGET_SSE4_1;
31751 if (use_vec_extr)
31752 break;
31754 if (TARGET_SSE2)
31756 switch (elt)
31758 case 0:
31759 tmp = vec;
31760 break;
31762 case 1:
31763 case 3:
31764 tmp = gen_reg_rtx (mode);
31765 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
31766 GEN_INT (elt), GEN_INT (elt),
31767 GEN_INT (elt), GEN_INT (elt)));
31768 break;
31770 case 2:
31771 tmp = gen_reg_rtx (mode);
31772 emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec));
31773 break;
31775 default:
31776 gcc_unreachable ();
31778 vec = tmp;
31779 use_vec_extr = true;
31780 elt = 0;
31782 else
31784 /* For SSE1, we have to reuse the V4SF code. */
31785 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
31786 gen_lowpart (V4SFmode, vec), elt);
31787 return;
31789 break;
31791 case V8HImode:
31792 use_vec_extr = TARGET_SSE2;
31793 break;
31794 case V4HImode:
31795 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
31796 break;
31798 case V16QImode:
31799 use_vec_extr = TARGET_SSE4_1;
31800 break;
31802 case V8QImode:
31803 /* ??? Could extract the appropriate HImode element and shift. */
31804 default:
31805 break;
31808 if (use_vec_extr)
31810 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
31811 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
31813 /* Let the rtl optimizers know about the zero extension performed. */
31814 if (inner_mode == QImode || inner_mode == HImode)
31816 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
31817 target = gen_lowpart (SImode, target);
31820 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
31822 else
31824 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
31826 emit_move_insn (mem, vec);
31828 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
31829 emit_move_insn (target, tmp);
31833 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
31834 pattern to reduce; DEST is the destination; IN is the input vector. */
31836 void
31837 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
31839 rtx tmp1, tmp2, tmp3;
31841 tmp1 = gen_reg_rtx (V4SFmode);
31842 tmp2 = gen_reg_rtx (V4SFmode);
31843 tmp3 = gen_reg_rtx (V4SFmode);
31845 emit_insn (gen_sse_movhlps (tmp1, in, in));
31846 emit_insn (fn (tmp2, tmp1, in));
31848 emit_insn (gen_sse_shufps_v4sf (tmp3, tmp2, tmp2,
31849 const1_rtx, const1_rtx,
31850 GEN_INT (1+4), GEN_INT (1+4)));
31851 emit_insn (fn (dest, tmp2, tmp3));
31854 /* Target hook for scalar_mode_supported_p. */
31855 static bool
31856 ix86_scalar_mode_supported_p (enum machine_mode mode)
31858 if (DECIMAL_FLOAT_MODE_P (mode))
31859 return default_decimal_float_supported_p ();
31860 else if (mode == TFmode)
31861 return true;
31862 else
31863 return default_scalar_mode_supported_p (mode);
31866 /* Implements target hook vector_mode_supported_p. */
31867 static bool
31868 ix86_vector_mode_supported_p (enum machine_mode mode)
31870 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
31871 return true;
31872 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
31873 return true;
31874 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
31875 return true;
31876 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
31877 return true;
31878 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
31879 return true;
31880 return false;
31883 /* Target hook for c_mode_for_suffix. */
31884 static enum machine_mode
31885 ix86_c_mode_for_suffix (char suffix)
31887 if (suffix == 'q')
31888 return TFmode;
31889 if (suffix == 'w')
31890 return XFmode;
31892 return VOIDmode;
31895 /* Worker function for TARGET_MD_ASM_CLOBBERS.
31897 We do this in the new i386 backend to maintain source compatibility
31898 with the old cc0-based compiler. */
31900 static tree
31901 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
31902 tree inputs ATTRIBUTE_UNUSED,
31903 tree clobbers)
31905 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
31906 clobbers);
31907 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
31908 clobbers);
31909 return clobbers;
31912 /* Implements target vector targetm.asm.encode_section_info. This
31913 is not used by netware. */
31915 static void ATTRIBUTE_UNUSED
31916 ix86_encode_section_info (tree decl, rtx rtl, int first)
31918 default_encode_section_info (decl, rtl, first);
31920 if (TREE_CODE (decl) == VAR_DECL
31921 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
31922 && ix86_in_large_data_p (decl))
31923 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
31926 /* Worker function for REVERSE_CONDITION. */
31928 enum rtx_code
31929 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
31931 return (mode != CCFPmode && mode != CCFPUmode
31932 ? reverse_condition (code)
31933 : reverse_condition_maybe_unordered (code));
31936 /* Output code to perform an x87 FP register move, from OPERANDS[1]
31937 to OPERANDS[0]. */
31939 const char *
31940 output_387_reg_move (rtx insn, rtx *operands)
31942 if (REG_P (operands[0]))
31944 if (REG_P (operands[1])
31945 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
31947 if (REGNO (operands[0]) == FIRST_STACK_REG)
31948 return output_387_ffreep (operands, 0);
31949 return "fstp\t%y0";
31951 if (STACK_TOP_P (operands[0]))
31952 return "fld%Z1\t%y1";
31953 return "fst\t%y0";
31955 else if (MEM_P (operands[0]))
31957 gcc_assert (REG_P (operands[1]));
31958 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
31959 return "fstp%Z0\t%y0";
31960 else
31962 /* There is no non-popping store to memory for XFmode.
31963 So if we need one, follow the store with a load. */
31964 if (GET_MODE (operands[0]) == XFmode)
31965 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
31966 else
31967 return "fst%Z0\t%y0";
31970 else
31971 gcc_unreachable();
31974 /* Output code to perform a conditional jump to LABEL, if C2 flag in
31975 FP status register is set. */
31977 void
31978 ix86_emit_fp_unordered_jump (rtx label)
31980 rtx reg = gen_reg_rtx (HImode);
31981 rtx temp;
31983 emit_insn (gen_x86_fnstsw_1 (reg));
31985 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
31987 emit_insn (gen_x86_sahf_1 (reg));
31989 temp = gen_rtx_REG (CCmode, FLAGS_REG);
31990 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
31992 else
31994 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
31996 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
31997 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
32000 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
32001 gen_rtx_LABEL_REF (VOIDmode, label),
32002 pc_rtx);
32003 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
32005 emit_jump_insn (temp);
32006 predict_jump (REG_BR_PROB_BASE * 10 / 100);
32009 /* Output code to perform a log1p XFmode calculation. */
32011 void ix86_emit_i387_log1p (rtx op0, rtx op1)
32013 rtx label1 = gen_label_rtx ();
32014 rtx label2 = gen_label_rtx ();
32016 rtx tmp = gen_reg_rtx (XFmode);
32017 rtx tmp2 = gen_reg_rtx (XFmode);
32018 rtx test;
32020 emit_insn (gen_absxf2 (tmp, op1));
32021 test = gen_rtx_GE (VOIDmode, tmp,
32022 CONST_DOUBLE_FROM_REAL_VALUE (
32023 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
32024 XFmode));
32025 emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
32027 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
32028 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
32029 emit_jump (label2);
32031 emit_label (label1);
32032 emit_move_insn (tmp, CONST1_RTX (XFmode));
32033 emit_insn (gen_addxf3 (tmp, op1, tmp));
32034 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
32035 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
32037 emit_label (label2);
32040 /* Output code to perform a Newton-Rhapson approximation of a single precision
32041 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
32043 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
32045 rtx x0, x1, e0, e1;
32047 x0 = gen_reg_rtx (mode);
32048 e0 = gen_reg_rtx (mode);
32049 e1 = gen_reg_rtx (mode);
32050 x1 = gen_reg_rtx (mode);
32052 /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
32054 /* x0 = rcp(b) estimate */
32055 emit_insn (gen_rtx_SET (VOIDmode, x0,
32056 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
32057 UNSPEC_RCP)));
32058 /* e0 = x0 * b */
32059 emit_insn (gen_rtx_SET (VOIDmode, e0,
32060 gen_rtx_MULT (mode, x0, b)));
32062 /* e0 = x0 * e0 */
32063 emit_insn (gen_rtx_SET (VOIDmode, e0,
32064 gen_rtx_MULT (mode, x0, e0)));
32066 /* e1 = x0 + x0 */
32067 emit_insn (gen_rtx_SET (VOIDmode, e1,
32068 gen_rtx_PLUS (mode, x0, x0)));
32070 /* x1 = e1 - e0 */
32071 emit_insn (gen_rtx_SET (VOIDmode, x1,
32072 gen_rtx_MINUS (mode, e1, e0)));
32074 /* res = a * x1 */
32075 emit_insn (gen_rtx_SET (VOIDmode, res,
32076 gen_rtx_MULT (mode, a, x1)));
32079 /* Output code to perform a Newton-Rhapson approximation of a
32080 single precision floating point [reciprocal] square root. */
32082 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
32083 bool recip)
32085 rtx x0, e0, e1, e2, e3, mthree, mhalf;
32086 REAL_VALUE_TYPE r;
32088 x0 = gen_reg_rtx (mode);
32089 e0 = gen_reg_rtx (mode);
32090 e1 = gen_reg_rtx (mode);
32091 e2 = gen_reg_rtx (mode);
32092 e3 = gen_reg_rtx (mode);
32094 real_from_integer (&r, VOIDmode, -3, -1, 0);
32095 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
32097 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
32098 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
32100 if (VECTOR_MODE_P (mode))
32102 mthree = ix86_build_const_vector (mode, true, mthree);
32103 mhalf = ix86_build_const_vector (mode, true, mhalf);
32106 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
32107 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
32109 /* x0 = rsqrt(a) estimate */
32110 emit_insn (gen_rtx_SET (VOIDmode, x0,
32111 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
32112 UNSPEC_RSQRT)));
32114 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
32115 if (!recip)
32117 rtx zero, mask;
32119 zero = gen_reg_rtx (mode);
32120 mask = gen_reg_rtx (mode);
32122 zero = force_reg (mode, CONST0_RTX(mode));
32123 emit_insn (gen_rtx_SET (VOIDmode, mask,
32124 gen_rtx_NE (mode, zero, a)));
32126 emit_insn (gen_rtx_SET (VOIDmode, x0,
32127 gen_rtx_AND (mode, x0, mask)));
32130 /* e0 = x0 * a */
32131 emit_insn (gen_rtx_SET (VOIDmode, e0,
32132 gen_rtx_MULT (mode, x0, a)));
32133 /* e1 = e0 * x0 */
32134 emit_insn (gen_rtx_SET (VOIDmode, e1,
32135 gen_rtx_MULT (mode, e0, x0)));
32137 /* e2 = e1 - 3. */
32138 mthree = force_reg (mode, mthree);
32139 emit_insn (gen_rtx_SET (VOIDmode, e2,
32140 gen_rtx_PLUS (mode, e1, mthree)));
32142 mhalf = force_reg (mode, mhalf);
32143 if (recip)
32144 /* e3 = -.5 * x0 */
32145 emit_insn (gen_rtx_SET (VOIDmode, e3,
32146 gen_rtx_MULT (mode, x0, mhalf)));
32147 else
32148 /* e3 = -.5 * e0 */
32149 emit_insn (gen_rtx_SET (VOIDmode, e3,
32150 gen_rtx_MULT (mode, e0, mhalf)));
32151 /* ret = e2 * e3 */
32152 emit_insn (gen_rtx_SET (VOIDmode, res,
32153 gen_rtx_MULT (mode, e2, e3)));
32156 #ifdef TARGET_SOLARIS
32157 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
32159 static void
32160 i386_solaris_elf_named_section (const char *name, unsigned int flags,
32161 tree decl)
32163 /* With Binutils 2.15, the "@unwind" marker must be specified on
32164 every occurrence of the ".eh_frame" section, not just the first
32165 one. */
32166 if (TARGET_64BIT
32167 && strcmp (name, ".eh_frame") == 0)
32169 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
32170 flags & SECTION_WRITE ? "aw" : "a");
32171 return;
32174 #ifndef USE_GAS
32175 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
32177 solaris_elf_asm_comdat_section (name, flags, decl);
32178 return;
32180 #endif
32182 default_elf_asm_named_section (name, flags, decl);
32184 #endif /* TARGET_SOLARIS */
32186 /* Return the mangling of TYPE if it is an extended fundamental type. */
32188 static const char *
32189 ix86_mangle_type (const_tree type)
32191 type = TYPE_MAIN_VARIANT (type);
32193 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
32194 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
32195 return NULL;
32197 switch (TYPE_MODE (type))
32199 case TFmode:
32200 /* __float128 is "g". */
32201 return "g";
32202 case XFmode:
32203 /* "long double" or __float80 is "e". */
32204 return "e";
32205 default:
32206 return NULL;
32210 /* For 32-bit code we can save PIC register setup by using
32211 __stack_chk_fail_local hidden function instead of calling
32212 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
32213 register, so it is better to call __stack_chk_fail directly. */
32215 static tree
32216 ix86_stack_protect_fail (void)
32218 return TARGET_64BIT
32219 ? default_external_stack_protect_fail ()
32220 : default_hidden_stack_protect_fail ();
32223 /* Select a format to encode pointers in exception handling data. CODE
32224 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
32225 true if the symbol may be affected by dynamic relocations.
32227 ??? All x86 object file formats are capable of representing this.
32228 After all, the relocation needed is the same as for the call insn.
32229 Whether or not a particular assembler allows us to enter such, I
32230 guess we'll have to see. */
32232 asm_preferred_eh_data_format (int code, int global)
32234 if (flag_pic)
32236 int type = DW_EH_PE_sdata8;
32237 if (!TARGET_64BIT
32238 || ix86_cmodel == CM_SMALL_PIC
32239 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
32240 type = DW_EH_PE_sdata4;
32241 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
32243 if (ix86_cmodel == CM_SMALL
32244 || (ix86_cmodel == CM_MEDIUM && code))
32245 return DW_EH_PE_udata4;
32246 return DW_EH_PE_absptr;
32249 /* Expand copysign from SIGN to the positive value ABS_VALUE
32250 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
32251 the sign-bit. */
32252 static void
32253 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
32255 enum machine_mode mode = GET_MODE (sign);
32256 rtx sgn = gen_reg_rtx (mode);
32257 if (mask == NULL_RTX)
32259 enum machine_mode vmode;
32261 if (mode == SFmode)
32262 vmode = V4SFmode;
32263 else if (mode == DFmode)
32264 vmode = V2DFmode;
32265 else
32266 vmode = mode;
32268 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), false);
32269 if (!VECTOR_MODE_P (mode))
32271 /* We need to generate a scalar mode mask in this case. */
32272 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
32273 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
32274 mask = gen_reg_rtx (mode);
32275 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
32278 else
32279 mask = gen_rtx_NOT (mode, mask);
32280 emit_insn (gen_rtx_SET (VOIDmode, sgn,
32281 gen_rtx_AND (mode, mask, sign)));
32282 emit_insn (gen_rtx_SET (VOIDmode, result,
32283 gen_rtx_IOR (mode, abs_value, sgn)));
32286 /* Expand fabs (OP0) and return a new rtx that holds the result. The
32287 mask for masking out the sign-bit is stored in *SMASK, if that is
32288 non-null. */
32289 static rtx
32290 ix86_expand_sse_fabs (rtx op0, rtx *smask)
32292 enum machine_mode vmode, mode = GET_MODE (op0);
32293 rtx xa, mask;
32295 xa = gen_reg_rtx (mode);
32296 if (mode == SFmode)
32297 vmode = V4SFmode;
32298 else if (mode == DFmode)
32299 vmode = V2DFmode;
32300 else
32301 vmode = mode;
32302 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), true);
32303 if (!VECTOR_MODE_P (mode))
32305 /* We need to generate a scalar mode mask in this case. */
32306 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
32307 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
32308 mask = gen_reg_rtx (mode);
32309 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
32311 emit_insn (gen_rtx_SET (VOIDmode, xa,
32312 gen_rtx_AND (mode, op0, mask)));
32314 if (smask)
32315 *smask = mask;
32317 return xa;
32320 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
32321 swapping the operands if SWAP_OPERANDS is true. The expanded
32322 code is a forward jump to a newly created label in case the
32323 comparison is true. The generated label rtx is returned. */
32324 static rtx
32325 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
32326 bool swap_operands)
32328 rtx label, tmp;
32330 if (swap_operands)
32332 tmp = op0;
32333 op0 = op1;
32334 op1 = tmp;
32337 label = gen_label_rtx ();
32338 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
32339 emit_insn (gen_rtx_SET (VOIDmode, tmp,
32340 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
32341 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
32342 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
32343 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
32344 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
32345 JUMP_LABEL (tmp) = label;
32347 return label;
32350 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
32351 using comparison code CODE. Operands are swapped for the comparison if
32352 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
32353 static rtx
32354 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
32355 bool swap_operands)
32357 rtx (*insn)(rtx, rtx, rtx, rtx);
32358 enum machine_mode mode = GET_MODE (op0);
32359 rtx mask = gen_reg_rtx (mode);
32361 if (swap_operands)
32363 rtx tmp = op0;
32364 op0 = op1;
32365 op1 = tmp;
32368 insn = mode == DFmode ? gen_setcc_df_sse : gen_setcc_sf_sse;
32370 emit_insn (insn (mask, op0, op1,
32371 gen_rtx_fmt_ee (code, mode, op0, op1)));
32372 return mask;
32375 /* Generate and return a rtx of mode MODE for 2**n where n is the number
32376 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
32377 static rtx
32378 ix86_gen_TWO52 (enum machine_mode mode)
32380 REAL_VALUE_TYPE TWO52r;
32381 rtx TWO52;
32383 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
32384 TWO52 = const_double_from_real_value (TWO52r, mode);
32385 TWO52 = force_reg (mode, TWO52);
32387 return TWO52;
32390 /* Expand SSE sequence for computing lround from OP1 storing
32391 into OP0. */
32392 void
32393 ix86_expand_lround (rtx op0, rtx op1)
32395 /* C code for the stuff we're doing below:
32396 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
32397 return (long)tmp;
32399 enum machine_mode mode = GET_MODE (op1);
32400 const struct real_format *fmt;
32401 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
32402 rtx adj;
32404 /* load nextafter (0.5, 0.0) */
32405 fmt = REAL_MODE_FORMAT (mode);
32406 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
32407 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
32409 /* adj = copysign (0.5, op1) */
32410 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
32411 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
32413 /* adj = op1 + adj */
32414 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
32416 /* op0 = (imode)adj */
32417 expand_fix (op0, adj, 0);
32420 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
32421 into OPERAND0. */
32422 void
32423 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
32425 /* C code for the stuff we're doing below (for do_floor):
32426 xi = (long)op1;
32427 xi -= (double)xi > op1 ? 1 : 0;
32428 return xi;
32430 enum machine_mode fmode = GET_MODE (op1);
32431 enum machine_mode imode = GET_MODE (op0);
32432 rtx ireg, freg, label, tmp;
32434 /* reg = (long)op1 */
32435 ireg = gen_reg_rtx (imode);
32436 expand_fix (ireg, op1, 0);
32438 /* freg = (double)reg */
32439 freg = gen_reg_rtx (fmode);
32440 expand_float (freg, ireg, 0);
32442 /* ireg = (freg > op1) ? ireg - 1 : ireg */
32443 label = ix86_expand_sse_compare_and_jump (UNLE,
32444 freg, op1, !do_floor);
32445 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
32446 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
32447 emit_move_insn (ireg, tmp);
32449 emit_label (label);
32450 LABEL_NUSES (label) = 1;
32452 emit_move_insn (op0, ireg);
32455 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
32456 result in OPERAND0. */
32457 void
32458 ix86_expand_rint (rtx operand0, rtx operand1)
32460 /* C code for the stuff we're doing below:
32461 xa = fabs (operand1);
32462 if (!isless (xa, 2**52))
32463 return operand1;
32464 xa = xa + 2**52 - 2**52;
32465 return copysign (xa, operand1);
32467 enum machine_mode mode = GET_MODE (operand0);
32468 rtx res, xa, label, TWO52, mask;
32470 res = gen_reg_rtx (mode);
32471 emit_move_insn (res, operand1);
32473 /* xa = abs (operand1) */
32474 xa = ix86_expand_sse_fabs (res, &mask);
32476 /* if (!isless (xa, TWO52)) goto label; */
32477 TWO52 = ix86_gen_TWO52 (mode);
32478 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
32480 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
32481 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
32483 ix86_sse_copysign_to_positive (res, xa, res, mask);
32485 emit_label (label);
32486 LABEL_NUSES (label) = 1;
32488 emit_move_insn (operand0, res);
32491 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
32492 into OPERAND0. */
32493 void
32494 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
32496 /* C code for the stuff we expand below.
32497 double xa = fabs (x), x2;
32498 if (!isless (xa, TWO52))
32499 return x;
32500 xa = xa + TWO52 - TWO52;
32501 x2 = copysign (xa, x);
32502 Compensate. Floor:
32503 if (x2 > x)
32504 x2 -= 1;
32505 Compensate. Ceil:
32506 if (x2 < x)
32507 x2 -= -1;
32508 return x2;
32510 enum machine_mode mode = GET_MODE (operand0);
32511 rtx xa, TWO52, tmp, label, one, res, mask;
32513 TWO52 = ix86_gen_TWO52 (mode);
32515 /* Temporary for holding the result, initialized to the input
32516 operand to ease control flow. */
32517 res = gen_reg_rtx (mode);
32518 emit_move_insn (res, operand1);
32520 /* xa = abs (operand1) */
32521 xa = ix86_expand_sse_fabs (res, &mask);
32523 /* if (!isless (xa, TWO52)) goto label; */
32524 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
32526 /* xa = xa + TWO52 - TWO52; */
32527 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
32528 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
32530 /* xa = copysign (xa, operand1) */
32531 ix86_sse_copysign_to_positive (xa, xa, res, mask);
32533 /* generate 1.0 or -1.0 */
32534 one = force_reg (mode,
32535 const_double_from_real_value (do_floor
32536 ? dconst1 : dconstm1, mode));
32538 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
32539 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
32540 emit_insn (gen_rtx_SET (VOIDmode, tmp,
32541 gen_rtx_AND (mode, one, tmp)));
32542 /* We always need to subtract here to preserve signed zero. */
32543 tmp = expand_simple_binop (mode, MINUS,
32544 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
32545 emit_move_insn (res, tmp);
32547 emit_label (label);
32548 LABEL_NUSES (label) = 1;
32550 emit_move_insn (operand0, res);
32553 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
32554 into OPERAND0. */
32555 void
32556 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
32558 /* C code for the stuff we expand below.
32559 double xa = fabs (x), x2;
32560 if (!isless (xa, TWO52))
32561 return x;
32562 x2 = (double)(long)x;
32563 Compensate. Floor:
32564 if (x2 > x)
32565 x2 -= 1;
32566 Compensate. Ceil:
32567 if (x2 < x)
32568 x2 += 1;
32569 if (HONOR_SIGNED_ZEROS (mode))
32570 return copysign (x2, x);
32571 return x2;
32573 enum machine_mode mode = GET_MODE (operand0);
32574 rtx xa, xi, TWO52, tmp, label, one, res, mask;
32576 TWO52 = ix86_gen_TWO52 (mode);
32578 /* Temporary for holding the result, initialized to the input
32579 operand to ease control flow. */
32580 res = gen_reg_rtx (mode);
32581 emit_move_insn (res, operand1);
32583 /* xa = abs (operand1) */
32584 xa = ix86_expand_sse_fabs (res, &mask);
32586 /* if (!isless (xa, TWO52)) goto label; */
32587 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
32589 /* xa = (double)(long)x */
32590 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
32591 expand_fix (xi, res, 0);
32592 expand_float (xa, xi, 0);
32594 /* generate 1.0 */
32595 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
32597 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
32598 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
32599 emit_insn (gen_rtx_SET (VOIDmode, tmp,
32600 gen_rtx_AND (mode, one, tmp)));
32601 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
32602 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
32603 emit_move_insn (res, tmp);
32605 if (HONOR_SIGNED_ZEROS (mode))
32606 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
32608 emit_label (label);
32609 LABEL_NUSES (label) = 1;
32611 emit_move_insn (operand0, res);
32614 /* Expand SSE sequence for computing round from OPERAND1 storing
32615 into OPERAND0. Sequence that works without relying on DImode truncation
32616 via cvttsd2siq that is only available on 64bit targets. */
32617 void
32618 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
32620 /* C code for the stuff we expand below.
32621 double xa = fabs (x), xa2, x2;
32622 if (!isless (xa, TWO52))
32623 return x;
32624 Using the absolute value and copying back sign makes
32625 -0.0 -> -0.0 correct.
32626 xa2 = xa + TWO52 - TWO52;
32627 Compensate.
32628 dxa = xa2 - xa;
32629 if (dxa <= -0.5)
32630 xa2 += 1;
32631 else if (dxa > 0.5)
32632 xa2 -= 1;
32633 x2 = copysign (xa2, x);
32634 return x2;
32636 enum machine_mode mode = GET_MODE (operand0);
32637 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
32639 TWO52 = ix86_gen_TWO52 (mode);
32641 /* Temporary for holding the result, initialized to the input
32642 operand to ease control flow. */
32643 res = gen_reg_rtx (mode);
32644 emit_move_insn (res, operand1);
32646 /* xa = abs (operand1) */
32647 xa = ix86_expand_sse_fabs (res, &mask);
32649 /* if (!isless (xa, TWO52)) goto label; */
32650 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
32652 /* xa2 = xa + TWO52 - TWO52; */
32653 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
32654 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
32656 /* dxa = xa2 - xa; */
32657 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
32659 /* generate 0.5, 1.0 and -0.5 */
32660 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
32661 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
32662 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
32663 0, OPTAB_DIRECT);
32665 /* Compensate. */
32666 tmp = gen_reg_rtx (mode);
32667 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
32668 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
32669 emit_insn (gen_rtx_SET (VOIDmode, tmp,
32670 gen_rtx_AND (mode, one, tmp)));
32671 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
32672 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
32673 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
32674 emit_insn (gen_rtx_SET (VOIDmode, tmp,
32675 gen_rtx_AND (mode, one, tmp)));
32676 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
32678 /* res = copysign (xa2, operand1) */
32679 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
32681 emit_label (label);
32682 LABEL_NUSES (label) = 1;
32684 emit_move_insn (operand0, res);
32687 /* Expand SSE sequence for computing trunc from OPERAND1 storing
32688 into OPERAND0. */
32689 void
32690 ix86_expand_trunc (rtx operand0, rtx operand1)
32692 /* C code for SSE variant we expand below.
32693 double xa = fabs (x), x2;
32694 if (!isless (xa, TWO52))
32695 return x;
32696 x2 = (double)(long)x;
32697 if (HONOR_SIGNED_ZEROS (mode))
32698 return copysign (x2, x);
32699 return x2;
32701 enum machine_mode mode = GET_MODE (operand0);
32702 rtx xa, xi, TWO52, label, res, mask;
32704 TWO52 = ix86_gen_TWO52 (mode);
32706 /* Temporary for holding the result, initialized to the input
32707 operand to ease control flow. */
32708 res = gen_reg_rtx (mode);
32709 emit_move_insn (res, operand1);
32711 /* xa = abs (operand1) */
32712 xa = ix86_expand_sse_fabs (res, &mask);
32714 /* if (!isless (xa, TWO52)) goto label; */
32715 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
32717 /* x = (double)(long)x */
32718 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
32719 expand_fix (xi, res, 0);
32720 expand_float (res, xi, 0);
32722 if (HONOR_SIGNED_ZEROS (mode))
32723 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
32725 emit_label (label);
32726 LABEL_NUSES (label) = 1;
32728 emit_move_insn (operand0, res);
32731 /* Expand SSE sequence for computing trunc from OPERAND1 storing
32732 into OPERAND0. */
32733 void
32734 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
32736 enum machine_mode mode = GET_MODE (operand0);
32737 rtx xa, mask, TWO52, label, one, res, smask, tmp;
32739 /* C code for SSE variant we expand below.
32740 double xa = fabs (x), x2;
32741 if (!isless (xa, TWO52))
32742 return x;
32743 xa2 = xa + TWO52 - TWO52;
32744 Compensate:
32745 if (xa2 > xa)
32746 xa2 -= 1.0;
32747 x2 = copysign (xa2, x);
32748 return x2;
32751 TWO52 = ix86_gen_TWO52 (mode);
32753 /* Temporary for holding the result, initialized to the input
32754 operand to ease control flow. */
32755 res = gen_reg_rtx (mode);
32756 emit_move_insn (res, operand1);
32758 /* xa = abs (operand1) */
32759 xa = ix86_expand_sse_fabs (res, &smask);
32761 /* if (!isless (xa, TWO52)) goto label; */
32762 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
32764 /* res = xa + TWO52 - TWO52; */
32765 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
32766 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
32767 emit_move_insn (res, tmp);
32769 /* generate 1.0 */
32770 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
32772 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
32773 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
32774 emit_insn (gen_rtx_SET (VOIDmode, mask,
32775 gen_rtx_AND (mode, mask, one)));
32776 tmp = expand_simple_binop (mode, MINUS,
32777 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
32778 emit_move_insn (res, tmp);
32780 /* res = copysign (res, operand1) */
32781 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
32783 emit_label (label);
32784 LABEL_NUSES (label) = 1;
32786 emit_move_insn (operand0, res);
32789 /* Expand SSE sequence for computing round from OPERAND1 storing
32790 into OPERAND0. */
32791 void
32792 ix86_expand_round (rtx operand0, rtx operand1)
32794 /* C code for the stuff we're doing below:
32795 double xa = fabs (x);
32796 if (!isless (xa, TWO52))
32797 return x;
32798 xa = (double)(long)(xa + nextafter (0.5, 0.0));
32799 return copysign (xa, x);
32801 enum machine_mode mode = GET_MODE (operand0);
32802 rtx res, TWO52, xa, label, xi, half, mask;
32803 const struct real_format *fmt;
32804 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
32806 /* Temporary for holding the result, initialized to the input
32807 operand to ease control flow. */
32808 res = gen_reg_rtx (mode);
32809 emit_move_insn (res, operand1);
32811 TWO52 = ix86_gen_TWO52 (mode);
32812 xa = ix86_expand_sse_fabs (res, &mask);
32813 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
32815 /* load nextafter (0.5, 0.0) */
32816 fmt = REAL_MODE_FORMAT (mode);
32817 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
32818 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
32820 /* xa = xa + 0.5 */
32821 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
32822 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
32824 /* xa = (double)(int64_t)xa */
32825 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
32826 expand_fix (xi, xa, 0);
32827 expand_float (xa, xi, 0);
32829 /* res = copysign (xa, operand1) */
32830 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
32832 emit_label (label);
32833 LABEL_NUSES (label) = 1;
32835 emit_move_insn (operand0, res);
32839 /* Table of valid machine attributes. */
32840 static const struct attribute_spec ix86_attribute_table[] =
32842 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
32843 affects_type_identity } */
32844 /* Stdcall attribute says callee is responsible for popping arguments
32845 if they are not variable. */
32846 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
32847 true },
32848 /* Fastcall attribute says callee is responsible for popping arguments
32849 if they are not variable. */
32850 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
32851 true },
32852 /* Thiscall attribute says callee is responsible for popping arguments
32853 if they are not variable. */
32854 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
32855 true },
32856 /* Cdecl attribute says the callee is a normal C declaration */
32857 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute,
32858 true },
32859 /* Regparm attribute specifies how many integer arguments are to be
32860 passed in registers. */
32861 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute,
32862 true },
32863 /* Sseregparm attribute says we are using x86_64 calling conventions
32864 for FP arguments. */
32865 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute,
32866 true },
32867 /* force_align_arg_pointer says this function realigns the stack at entry. */
32868 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
32869 false, true, true, ix86_handle_cconv_attribute, false },
32870 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
32871 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
32872 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
32873 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute,
32874 false },
32875 #endif
32876 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
32877 false },
32878 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
32879 false },
32880 #ifdef SUBTARGET_ATTRIBUTE_TABLE
32881 SUBTARGET_ATTRIBUTE_TABLE,
32882 #endif
32883 /* ms_abi and sysv_abi calling convention function attributes. */
32884 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
32885 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
32886 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute,
32887 false },
32888 { "callee_pop_aggregate_return", 1, 1, false, true, true,
32889 ix86_handle_callee_pop_aggregate_return, true },
32890 /* End element. */
32891 { NULL, 0, 0, false, false, false, NULL, false }
32894 /* Implement targetm.vectorize.builtin_vectorization_cost. */
32895 static int
32896 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
32897 tree vectype ATTRIBUTE_UNUSED,
32898 int misalign ATTRIBUTE_UNUSED)
32900 switch (type_of_cost)
32902 case scalar_stmt:
32903 return ix86_cost->scalar_stmt_cost;
32905 case scalar_load:
32906 return ix86_cost->scalar_load_cost;
32908 case scalar_store:
32909 return ix86_cost->scalar_store_cost;
32911 case vector_stmt:
32912 return ix86_cost->vec_stmt_cost;
32914 case vector_load:
32915 return ix86_cost->vec_align_load_cost;
32917 case vector_store:
32918 return ix86_cost->vec_store_cost;
32920 case vec_to_scalar:
32921 return ix86_cost->vec_to_scalar_cost;
32923 case scalar_to_vec:
32924 return ix86_cost->scalar_to_vec_cost;
32926 case unaligned_load:
32927 case unaligned_store:
32928 return ix86_cost->vec_unalign_load_cost;
32930 case cond_branch_taken:
32931 return ix86_cost->cond_taken_branch_cost;
32933 case cond_branch_not_taken:
32934 return ix86_cost->cond_not_taken_branch_cost;
32936 case vec_perm:
32937 return 1;
32939 default:
32940 gcc_unreachable ();
32945 /* Implement targetm.vectorize.builtin_vec_perm. */
32947 static tree
32948 ix86_vectorize_builtin_vec_perm (tree vec_type, tree *mask_type)
32950 tree itype = TREE_TYPE (vec_type);
32951 bool u = TYPE_UNSIGNED (itype);
32952 enum machine_mode vmode = TYPE_MODE (vec_type);
32953 enum ix86_builtins fcode;
32954 bool ok = TARGET_SSE2;
32956 switch (vmode)
32958 case V4DFmode:
32959 ok = TARGET_AVX;
32960 fcode = IX86_BUILTIN_VEC_PERM_V4DF;
32961 goto get_di;
32962 case V2DFmode:
32963 fcode = IX86_BUILTIN_VEC_PERM_V2DF;
32964 get_di:
32965 itype = ix86_get_builtin_type (IX86_BT_DI);
32966 break;
32968 case V8SFmode:
32969 ok = TARGET_AVX;
32970 fcode = IX86_BUILTIN_VEC_PERM_V8SF;
32971 goto get_si;
32972 case V4SFmode:
32973 ok = TARGET_SSE;
32974 fcode = IX86_BUILTIN_VEC_PERM_V4SF;
32975 get_si:
32976 itype = ix86_get_builtin_type (IX86_BT_SI);
32977 break;
32979 case V2DImode:
32980 fcode = u ? IX86_BUILTIN_VEC_PERM_V2DI_U : IX86_BUILTIN_VEC_PERM_V2DI;
32981 break;
32982 case V4SImode:
32983 fcode = u ? IX86_BUILTIN_VEC_PERM_V4SI_U : IX86_BUILTIN_VEC_PERM_V4SI;
32984 break;
32985 case V8HImode:
32986 fcode = u ? IX86_BUILTIN_VEC_PERM_V8HI_U : IX86_BUILTIN_VEC_PERM_V8HI;
32987 break;
32988 case V16QImode:
32989 fcode = u ? IX86_BUILTIN_VEC_PERM_V16QI_U : IX86_BUILTIN_VEC_PERM_V16QI;
32990 break;
32991 default:
32992 ok = false;
32993 break;
32996 if (!ok)
32997 return NULL_TREE;
32999 *mask_type = itype;
33000 return ix86_builtins[(int) fcode];
33003 /* Return a vector mode with twice as many elements as VMODE. */
33004 /* ??? Consider moving this to a table generated by genmodes.c. */
33006 static enum machine_mode
33007 doublesize_vector_mode (enum machine_mode vmode)
33009 switch (vmode)
33011 case V2SFmode: return V4SFmode;
33012 case V1DImode: return V2DImode;
33013 case V2SImode: return V4SImode;
33014 case V4HImode: return V8HImode;
33015 case V8QImode: return V16QImode;
33017 case V2DFmode: return V4DFmode;
33018 case V4SFmode: return V8SFmode;
33019 case V2DImode: return V4DImode;
33020 case V4SImode: return V8SImode;
33021 case V8HImode: return V16HImode;
33022 case V16QImode: return V32QImode;
33024 case V4DFmode: return V8DFmode;
33025 case V8SFmode: return V16SFmode;
33026 case V4DImode: return V8DImode;
33027 case V8SImode: return V16SImode;
33028 case V16HImode: return V32HImode;
33029 case V32QImode: return V64QImode;
33031 default:
33032 gcc_unreachable ();
33036 /* Construct (set target (vec_select op0 (parallel perm))) and
33037 return true if that's a valid instruction in the active ISA. */
33039 static bool
33040 expand_vselect (rtx target, rtx op0, const unsigned char *perm, unsigned nelt)
33042 rtx rperm[MAX_VECT_LEN], x;
33043 unsigned i;
33045 for (i = 0; i < nelt; ++i)
33046 rperm[i] = GEN_INT (perm[i]);
33048 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, rperm));
33049 x = gen_rtx_VEC_SELECT (GET_MODE (target), op0, x);
33050 x = gen_rtx_SET (VOIDmode, target, x);
33052 x = emit_insn (x);
33053 if (recog_memoized (x) < 0)
33055 remove_insn (x);
33056 return false;
33058 return true;
33061 /* Similar, but generate a vec_concat from op0 and op1 as well. */
33063 static bool
33064 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
33065 const unsigned char *perm, unsigned nelt)
33067 enum machine_mode v2mode;
33068 rtx x;
33070 v2mode = doublesize_vector_mode (GET_MODE (op0));
33071 x = gen_rtx_VEC_CONCAT (v2mode, op0, op1);
33072 return expand_vselect (target, x, perm, nelt);
33075 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
33076 in terms of blendp[sd] / pblendw / pblendvb. */
33078 static bool
33079 expand_vec_perm_blend (struct expand_vec_perm_d *d)
33081 enum machine_mode vmode = d->vmode;
33082 unsigned i, mask, nelt = d->nelt;
33083 rtx target, op0, op1, x;
33085 if (!TARGET_SSE4_1 || d->op0 == d->op1)
33086 return false;
33087 if (!(GET_MODE_SIZE (vmode) == 16 || vmode == V4DFmode || vmode == V8SFmode))
33088 return false;
33090 /* This is a blend, not a permute. Elements must stay in their
33091 respective lanes. */
33092 for (i = 0; i < nelt; ++i)
33094 unsigned e = d->perm[i];
33095 if (!(e == i || e == i + nelt))
33096 return false;
33099 if (d->testing_p)
33100 return true;
33102 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
33103 decision should be extracted elsewhere, so that we only try that
33104 sequence once all budget==3 options have been tried. */
33106 /* For bytes, see if bytes move in pairs so we can use pblendw with
33107 an immediate argument, rather than pblendvb with a vector argument. */
33108 if (vmode == V16QImode)
33110 bool pblendw_ok = true;
33111 for (i = 0; i < 16 && pblendw_ok; i += 2)
33112 pblendw_ok = (d->perm[i] + 1 == d->perm[i + 1]);
33114 if (!pblendw_ok)
33116 rtx rperm[16], vperm;
33118 for (i = 0; i < nelt; ++i)
33119 rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx);
33121 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm));
33122 vperm = force_reg (V16QImode, vperm);
33124 emit_insn (gen_sse4_1_pblendvb (d->target, d->op0, d->op1, vperm));
33125 return true;
33129 target = d->target;
33130 op0 = d->op0;
33131 op1 = d->op1;
33132 mask = 0;
33134 switch (vmode)
33136 case V4DFmode:
33137 case V8SFmode:
33138 case V2DFmode:
33139 case V4SFmode:
33140 case V8HImode:
33141 for (i = 0; i < nelt; ++i)
33142 mask |= (d->perm[i] >= nelt) << i;
33143 break;
33145 case V2DImode:
33146 for (i = 0; i < 2; ++i)
33147 mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4);
33148 goto do_subreg;
33150 case V4SImode:
33151 for (i = 0; i < 4; ++i)
33152 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
33153 goto do_subreg;
33155 case V16QImode:
33156 for (i = 0; i < 8; ++i)
33157 mask |= (d->perm[i * 2] >= 16) << i;
33159 do_subreg:
33160 vmode = V8HImode;
33161 target = gen_lowpart (vmode, target);
33162 op0 = gen_lowpart (vmode, op0);
33163 op1 = gen_lowpart (vmode, op1);
33164 break;
33166 default:
33167 gcc_unreachable ();
33170 /* This matches five different patterns with the different modes. */
33171 x = gen_rtx_VEC_MERGE (vmode, op1, op0, GEN_INT (mask));
33172 x = gen_rtx_SET (VOIDmode, target, x);
33173 emit_insn (x);
33175 return true;
33178 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
33179 in terms of the variable form of vpermilps.
33181 Note that we will have already failed the immediate input vpermilps,
33182 which requires that the high and low part shuffle be identical; the
33183 variable form doesn't require that. */
33185 static bool
33186 expand_vec_perm_vpermil (struct expand_vec_perm_d *d)
33188 rtx rperm[8], vperm;
33189 unsigned i;
33191 if (!TARGET_AVX || d->vmode != V8SFmode || d->op0 != d->op1)
33192 return false;
33194 /* We can only permute within the 128-bit lane. */
33195 for (i = 0; i < 8; ++i)
33197 unsigned e = d->perm[i];
33198 if (i < 4 ? e >= 4 : e < 4)
33199 return false;
33202 if (d->testing_p)
33203 return true;
33205 for (i = 0; i < 8; ++i)
33207 unsigned e = d->perm[i];
33209 /* Within each 128-bit lane, the elements of op0 are numbered
33210 from 0 and the elements of op1 are numbered from 4. */
33211 if (e >= 8 + 4)
33212 e -= 8;
33213 else if (e >= 4)
33214 e -= 4;
33216 rperm[i] = GEN_INT (e);
33219 vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm));
33220 vperm = force_reg (V8SImode, vperm);
33221 emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm));
33223 return true;
33226 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
33227 in terms of pshufb or vpperm. */
33229 static bool
33230 expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
33232 unsigned i, nelt, eltsz;
33233 rtx rperm[16], vperm, target, op0, op1;
33235 if (!(d->op0 == d->op1 ? TARGET_SSSE3 : TARGET_XOP))
33236 return false;
33237 if (GET_MODE_SIZE (d->vmode) != 16)
33238 return false;
33240 if (d->testing_p)
33241 return true;
33243 nelt = d->nelt;
33244 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
33246 for (i = 0; i < nelt; ++i)
33248 unsigned j, e = d->perm[i];
33249 for (j = 0; j < eltsz; ++j)
33250 rperm[i * eltsz + j] = GEN_INT (e * eltsz + j);
33253 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm));
33254 vperm = force_reg (V16QImode, vperm);
33256 target = gen_lowpart (V16QImode, d->target);
33257 op0 = gen_lowpart (V16QImode, d->op0);
33258 if (d->op0 == d->op1)
33259 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
33260 else
33262 op1 = gen_lowpart (V16QImode, d->op1);
33263 emit_insn (gen_xop_pperm (target, op0, op1, vperm));
33266 return true;
33269 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
33270 in a single instruction. */
33272 static bool
33273 expand_vec_perm_1 (struct expand_vec_perm_d *d)
33275 unsigned i, nelt = d->nelt;
33276 unsigned char perm2[MAX_VECT_LEN];
33278 /* Check plain VEC_SELECT first, because AVX has instructions that could
33279 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
33280 input where SEL+CONCAT may not. */
33281 if (d->op0 == d->op1)
33283 int mask = nelt - 1;
33285 for (i = 0; i < nelt; i++)
33286 perm2[i] = d->perm[i] & mask;
33288 if (expand_vselect (d->target, d->op0, perm2, nelt))
33289 return true;
33291 /* There are plenty of patterns in sse.md that are written for
33292 SEL+CONCAT and are not replicated for a single op. Perhaps
33293 that should be changed, to avoid the nastiness here. */
33295 /* Recognize interleave style patterns, which means incrementing
33296 every other permutation operand. */
33297 for (i = 0; i < nelt; i += 2)
33299 perm2[i] = d->perm[i] & mask;
33300 perm2[i + 1] = (d->perm[i + 1] & mask) + nelt;
33302 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt))
33303 return true;
33305 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
33306 if (nelt >= 4)
33308 for (i = 0; i < nelt; i += 4)
33310 perm2[i + 0] = d->perm[i + 0] & mask;
33311 perm2[i + 1] = d->perm[i + 1] & mask;
33312 perm2[i + 2] = (d->perm[i + 2] & mask) + nelt;
33313 perm2[i + 3] = (d->perm[i + 3] & mask) + nelt;
33316 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt))
33317 return true;
33321 /* Finally, try the fully general two operand permute. */
33322 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt))
33323 return true;
33325 /* Recognize interleave style patterns with reversed operands. */
33326 if (d->op0 != d->op1)
33328 for (i = 0; i < nelt; ++i)
33330 unsigned e = d->perm[i];
33331 if (e >= nelt)
33332 e -= nelt;
33333 else
33334 e += nelt;
33335 perm2[i] = e;
33338 if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt))
33339 return true;
33342 /* Try the SSE4.1 blend variable merge instructions. */
33343 if (expand_vec_perm_blend (d))
33344 return true;
33346 /* Try one of the AVX vpermil variable permutations. */
33347 if (expand_vec_perm_vpermil (d))
33348 return true;
33350 /* Try the SSSE3 pshufb or XOP vpperm variable permutation. */
33351 if (expand_vec_perm_pshufb (d))
33352 return true;
33354 return false;
33357 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
33358 in terms of a pair of pshuflw + pshufhw instructions. */
33360 static bool
33361 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
33363 unsigned char perm2[MAX_VECT_LEN];
33364 unsigned i;
33365 bool ok;
33367 if (d->vmode != V8HImode || d->op0 != d->op1)
33368 return false;
33370 /* The two permutations only operate in 64-bit lanes. */
33371 for (i = 0; i < 4; ++i)
33372 if (d->perm[i] >= 4)
33373 return false;
33374 for (i = 4; i < 8; ++i)
33375 if (d->perm[i] < 4)
33376 return false;
33378 if (d->testing_p)
33379 return true;
33381 /* Emit the pshuflw. */
33382 memcpy (perm2, d->perm, 4);
33383 for (i = 4; i < 8; ++i)
33384 perm2[i] = i;
33385 ok = expand_vselect (d->target, d->op0, perm2, 8);
33386 gcc_assert (ok);
33388 /* Emit the pshufhw. */
33389 memcpy (perm2 + 4, d->perm + 4, 4);
33390 for (i = 0; i < 4; ++i)
33391 perm2[i] = i;
33392 ok = expand_vselect (d->target, d->target, perm2, 8);
33393 gcc_assert (ok);
33395 return true;
33398 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
33399 the permutation using the SSSE3 palignr instruction. This succeeds
33400 when all of the elements in PERM fit within one vector and we merely
33401 need to shift them down so that a single vector permutation has a
33402 chance to succeed. */
33404 static bool
33405 expand_vec_perm_palignr (struct expand_vec_perm_d *d)
33407 unsigned i, nelt = d->nelt;
33408 unsigned min, max;
33409 bool in_order, ok;
33410 rtx shift;
33412 /* Even with AVX, palignr only operates on 128-bit vectors. */
33413 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
33414 return false;
33416 min = nelt, max = 0;
33417 for (i = 0; i < nelt; ++i)
33419 unsigned e = d->perm[i];
33420 if (e < min)
33421 min = e;
33422 if (e > max)
33423 max = e;
33425 if (min == 0 || max - min >= nelt)
33426 return false;
33428 /* Given that we have SSSE3, we know we'll be able to implement the
33429 single operand permutation after the palignr with pshufb. */
33430 if (d->testing_p)
33431 return true;
33433 shift = GEN_INT (min * GET_MODE_BITSIZE (GET_MODE_INNER (d->vmode)));
33434 emit_insn (gen_ssse3_palignrti (gen_lowpart (TImode, d->target),
33435 gen_lowpart (TImode, d->op1),
33436 gen_lowpart (TImode, d->op0), shift));
33438 d->op0 = d->op1 = d->target;
33440 in_order = true;
33441 for (i = 0; i < nelt; ++i)
33443 unsigned e = d->perm[i] - min;
33444 if (e != i)
33445 in_order = false;
33446 d->perm[i] = e;
33449 /* Test for the degenerate case where the alignment by itself
33450 produces the desired permutation. */
33451 if (in_order)
33452 return true;
33454 ok = expand_vec_perm_1 (d);
33455 gcc_assert (ok);
33457 return ok;
33460 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
33461 a two vector permutation into a single vector permutation by using
33462 an interleave operation to merge the vectors. */
33464 static bool
33465 expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
33467 struct expand_vec_perm_d dremap, dfinal;
33468 unsigned i, nelt = d->nelt, nelt2 = nelt / 2;
33469 unsigned contents, h1, h2, h3, h4;
33470 unsigned char remap[2 * MAX_VECT_LEN];
33471 rtx seq;
33472 bool ok;
33474 if (d->op0 == d->op1)
33475 return false;
33477 /* The 256-bit unpck[lh]p[sd] instructions only operate within the 128-bit
33478 lanes. We can use similar techniques with the vperm2f128 instruction,
33479 but it requires slightly different logic. */
33480 if (GET_MODE_SIZE (d->vmode) != 16)
33481 return false;
33483 /* Examine from whence the elements come. */
33484 contents = 0;
33485 for (i = 0; i < nelt; ++i)
33486 contents |= 1u << d->perm[i];
33488 /* Split the two input vectors into 4 halves. */
33489 h1 = (1u << nelt2) - 1;
33490 h2 = h1 << nelt2;
33491 h3 = h2 << nelt2;
33492 h4 = h3 << nelt2;
33494 memset (remap, 0xff, sizeof (remap));
33495 dremap = *d;
33497 /* If the elements from the low halves use interleave low, and similarly
33498 for interleave high. If the elements are from mis-matched halves, we
33499 can use shufps for V4SF/V4SI or do a DImode shuffle. */
33500 if ((contents & (h1 | h3)) == contents)
33502 for (i = 0; i < nelt2; ++i)
33504 remap[i] = i * 2;
33505 remap[i + nelt] = i * 2 + 1;
33506 dremap.perm[i * 2] = i;
33507 dremap.perm[i * 2 + 1] = i + nelt;
33510 else if ((contents & (h2 | h4)) == contents)
33512 for (i = 0; i < nelt2; ++i)
33514 remap[i + nelt2] = i * 2;
33515 remap[i + nelt + nelt2] = i * 2 + 1;
33516 dremap.perm[i * 2] = i + nelt2;
33517 dremap.perm[i * 2 + 1] = i + nelt + nelt2;
33520 else if ((contents & (h1 | h4)) == contents)
33522 for (i = 0; i < nelt2; ++i)
33524 remap[i] = i;
33525 remap[i + nelt + nelt2] = i + nelt2;
33526 dremap.perm[i] = i;
33527 dremap.perm[i + nelt2] = i + nelt + nelt2;
33529 if (nelt != 4)
33531 dremap.vmode = V2DImode;
33532 dremap.nelt = 2;
33533 dremap.perm[0] = 0;
33534 dremap.perm[1] = 3;
33537 else if ((contents & (h2 | h3)) == contents)
33539 for (i = 0; i < nelt2; ++i)
33541 remap[i + nelt2] = i;
33542 remap[i + nelt] = i + nelt2;
33543 dremap.perm[i] = i + nelt2;
33544 dremap.perm[i + nelt2] = i + nelt;
33546 if (nelt != 4)
33548 dremap.vmode = V2DImode;
33549 dremap.nelt = 2;
33550 dremap.perm[0] = 1;
33551 dremap.perm[1] = 2;
33554 else
33555 return false;
33557 /* Use the remapping array set up above to move the elements from their
33558 swizzled locations into their final destinations. */
33559 dfinal = *d;
33560 for (i = 0; i < nelt; ++i)
33562 unsigned e = remap[d->perm[i]];
33563 gcc_assert (e < nelt);
33564 dfinal.perm[i] = e;
33566 dfinal.op0 = gen_reg_rtx (dfinal.vmode);
33567 dfinal.op1 = dfinal.op0;
33568 dremap.target = dfinal.op0;
33570 /* Test if the final remap can be done with a single insn. For V4SFmode or
33571 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
33572 start_sequence ();
33573 ok = expand_vec_perm_1 (&dfinal);
33574 seq = get_insns ();
33575 end_sequence ();
33577 if (!ok)
33578 return false;
33580 if (dremap.vmode != dfinal.vmode)
33582 dremap.target = gen_lowpart (dremap.vmode, dremap.target);
33583 dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0);
33584 dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1);
33587 ok = expand_vec_perm_1 (&dremap);
33588 gcc_assert (ok);
33590 emit_insn (seq);
33591 return true;
33594 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
33595 permutation with two pshufb insns and an ior. We should have already
33596 failed all two instruction sequences. */
33598 static bool
33599 expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
33601 rtx rperm[2][16], vperm, l, h, op, m128;
33602 unsigned int i, nelt, eltsz;
33604 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
33605 return false;
33606 gcc_assert (d->op0 != d->op1);
33608 nelt = d->nelt;
33609 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
33611 /* Generate two permutation masks. If the required element is within
33612 the given vector it is shuffled into the proper lane. If the required
33613 element is in the other vector, force a zero into the lane by setting
33614 bit 7 in the permutation mask. */
33615 m128 = GEN_INT (-128);
33616 for (i = 0; i < nelt; ++i)
33618 unsigned j, e = d->perm[i];
33619 unsigned which = (e >= nelt);
33620 if (e >= nelt)
33621 e -= nelt;
33623 for (j = 0; j < eltsz; ++j)
33625 rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j);
33626 rperm[1-which][i*eltsz + j] = m128;
33630 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0]));
33631 vperm = force_reg (V16QImode, vperm);
33633 l = gen_reg_rtx (V16QImode);
33634 op = gen_lowpart (V16QImode, d->op0);
33635 emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm));
33637 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1]));
33638 vperm = force_reg (V16QImode, vperm);
33640 h = gen_reg_rtx (V16QImode);
33641 op = gen_lowpart (V16QImode, d->op1);
33642 emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm));
33644 op = gen_lowpart (V16QImode, d->target);
33645 emit_insn (gen_iorv16qi3 (op, l, h));
33647 return true;
33650 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
33651 and extract-odd permutations. */
33653 static bool
33654 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
33656 rtx t1, t2, t3;
33658 switch (d->vmode)
33660 case V4DFmode:
33661 t1 = gen_reg_rtx (V4DFmode);
33662 t2 = gen_reg_rtx (V4DFmode);
33664 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
33665 emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20)));
33666 emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31)));
33668 /* Now an unpck[lh]pd will produce the result required. */
33669 if (odd)
33670 t3 = gen_avx_unpckhpd256 (d->target, t1, t2);
33671 else
33672 t3 = gen_avx_unpcklpd256 (d->target, t1, t2);
33673 emit_insn (t3);
33674 break;
33676 case V8SFmode:
33678 int mask = odd ? 0xdd : 0x88;
33680 t1 = gen_reg_rtx (V8SFmode);
33681 t2 = gen_reg_rtx (V8SFmode);
33682 t3 = gen_reg_rtx (V8SFmode);
33684 /* Shuffle within the 128-bit lanes to produce:
33685 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
33686 emit_insn (gen_avx_shufps256 (t1, d->op0, d->op1,
33687 GEN_INT (mask)));
33689 /* Shuffle the lanes around to produce:
33690 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
33691 emit_insn (gen_avx_vperm2f128v8sf3 (t2, t1, t1,
33692 GEN_INT (0x3)));
33694 /* Shuffle within the 128-bit lanes to produce:
33695 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
33696 emit_insn (gen_avx_shufps256 (t3, t1, t2, GEN_INT (0x44)));
33698 /* Shuffle within the 128-bit lanes to produce:
33699 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
33700 emit_insn (gen_avx_shufps256 (t2, t1, t2, GEN_INT (0xee)));
33702 /* Shuffle the lanes around to produce:
33703 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
33704 emit_insn (gen_avx_vperm2f128v8sf3 (d->target, t3, t2,
33705 GEN_INT (0x20)));
33707 break;
33709 case V2DFmode:
33710 case V4SFmode:
33711 case V2DImode:
33712 case V4SImode:
33713 /* These are always directly implementable by expand_vec_perm_1. */
33714 gcc_unreachable ();
33716 case V8HImode:
33717 if (TARGET_SSSE3)
33718 return expand_vec_perm_pshufb2 (d);
33719 else
33721 /* We need 2*log2(N)-1 operations to achieve odd/even
33722 with interleave. */
33723 t1 = gen_reg_rtx (V8HImode);
33724 t2 = gen_reg_rtx (V8HImode);
33725 emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1));
33726 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1));
33727 emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1));
33728 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1));
33729 if (odd)
33730 t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2);
33731 else
33732 t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2);
33733 emit_insn (t3);
33735 break;
33737 case V16QImode:
33738 if (TARGET_SSSE3)
33739 return expand_vec_perm_pshufb2 (d);
33740 else
33742 t1 = gen_reg_rtx (V16QImode);
33743 t2 = gen_reg_rtx (V16QImode);
33744 t3 = gen_reg_rtx (V16QImode);
33745 emit_insn (gen_vec_interleave_highv16qi (t1, d->op0, d->op1));
33746 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->op0, d->op1));
33747 emit_insn (gen_vec_interleave_highv16qi (t2, d->target, t1));
33748 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t1));
33749 emit_insn (gen_vec_interleave_highv16qi (t3, d->target, t2));
33750 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t2));
33751 if (odd)
33752 t3 = gen_vec_interleave_highv16qi (d->target, d->target, t3);
33753 else
33754 t3 = gen_vec_interleave_lowv16qi (d->target, d->target, t3);
33755 emit_insn (t3);
33757 break;
33759 default:
33760 gcc_unreachable ();
33763 return true;
33766 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
33767 extract-even and extract-odd permutations. */
33769 static bool
33770 expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
33772 unsigned i, odd, nelt = d->nelt;
33774 odd = d->perm[0];
33775 if (odd != 0 && odd != 1)
33776 return false;
33778 for (i = 1; i < nelt; ++i)
33779 if (d->perm[i] != 2 * i + odd)
33780 return false;
33782 return expand_vec_perm_even_odd_1 (d, odd);
33785 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
33786 permutations. We assume that expand_vec_perm_1 has already failed. */
33788 static bool
33789 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
33791 unsigned elt = d->perm[0], nelt2 = d->nelt / 2;
33792 enum machine_mode vmode = d->vmode;
33793 unsigned char perm2[4];
33794 rtx op0 = d->op0;
33795 bool ok;
33797 switch (vmode)
33799 case V4DFmode:
33800 case V8SFmode:
33801 /* These are special-cased in sse.md so that we can optionally
33802 use the vbroadcast instruction. They expand to two insns
33803 if the input happens to be in a register. */
33804 gcc_unreachable ();
33806 case V2DFmode:
33807 case V2DImode:
33808 case V4SFmode:
33809 case V4SImode:
33810 /* These are always implementable using standard shuffle patterns. */
33811 gcc_unreachable ();
33813 case V8HImode:
33814 case V16QImode:
33815 /* These can be implemented via interleave. We save one insn by
33816 stopping once we have promoted to V4SImode and then use pshufd. */
33819 optab otab = vec_interleave_low_optab;
33821 if (elt >= nelt2)
33823 otab = vec_interleave_high_optab;
33824 elt -= nelt2;
33826 nelt2 /= 2;
33828 op0 = expand_binop (vmode, otab, op0, op0, NULL, 0, OPTAB_DIRECT);
33829 vmode = get_mode_wider_vector (vmode);
33830 op0 = gen_lowpart (vmode, op0);
33832 while (vmode != V4SImode);
33834 memset (perm2, elt, 4);
33835 ok = expand_vselect (gen_lowpart (V4SImode, d->target), op0, perm2, 4);
33836 gcc_assert (ok);
33837 return true;
33839 default:
33840 gcc_unreachable ();
33844 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
33845 broadcast permutations. */
33847 static bool
33848 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
33850 unsigned i, elt, nelt = d->nelt;
33852 if (d->op0 != d->op1)
33853 return false;
33855 elt = d->perm[0];
33856 for (i = 1; i < nelt; ++i)
33857 if (d->perm[i] != elt)
33858 return false;
33860 return expand_vec_perm_broadcast_1 (d);
33863 /* The guts of ix86_expand_vec_perm_builtin, also used by the ok hook.
33864 With all of the interface bits taken care of, perform the expansion
33865 in D and return true on success. */
33867 static bool
33868 ix86_expand_vec_perm_builtin_1 (struct expand_vec_perm_d *d)
33870 /* Try a single instruction expansion. */
33871 if (expand_vec_perm_1 (d))
33872 return true;
33874 /* Try sequences of two instructions. */
33876 if (expand_vec_perm_pshuflw_pshufhw (d))
33877 return true;
33879 if (expand_vec_perm_palignr (d))
33880 return true;
33882 if (expand_vec_perm_interleave2 (d))
33883 return true;
33885 if (expand_vec_perm_broadcast (d))
33886 return true;
33888 /* Try sequences of three instructions. */
33890 if (expand_vec_perm_pshufb2 (d))
33891 return true;
33893 /* ??? Look for narrow permutations whose element orderings would
33894 allow the promotion to a wider mode. */
33896 /* ??? Look for sequences of interleave or a wider permute that place
33897 the data into the correct lanes for a half-vector shuffle like
33898 pshuf[lh]w or vpermilps. */
33900 /* ??? Look for sequences of interleave that produce the desired results.
33901 The combinatorics of punpck[lh] get pretty ugly... */
33903 if (expand_vec_perm_even_odd (d))
33904 return true;
33906 return false;
33909 /* Extract the values from the vector CST into the permutation array in D.
33910 Return 0 on error, 1 if all values from the permutation come from the
33911 first vector, 2 if all values from the second vector, and 3 otherwise. */
33913 static int
33914 extract_vec_perm_cst (struct expand_vec_perm_d *d, tree cst)
33916 tree list = TREE_VECTOR_CST_ELTS (cst);
33917 unsigned i, nelt = d->nelt;
33918 int ret = 0;
33920 for (i = 0; i < nelt; ++i, list = TREE_CHAIN (list))
33922 unsigned HOST_WIDE_INT e;
33924 if (!host_integerp (TREE_VALUE (list), 1))
33925 return 0;
33926 e = tree_low_cst (TREE_VALUE (list), 1);
33927 if (e >= 2 * nelt)
33928 return 0;
33930 ret |= (e < nelt ? 1 : 2);
33931 d->perm[i] = e;
33933 gcc_assert (list == NULL);
33935 /* For all elements from second vector, fold the elements to first. */
33936 if (ret == 2)
33937 for (i = 0; i < nelt; ++i)
33938 d->perm[i] -= nelt;
33940 return ret;
33943 static rtx
33944 ix86_expand_vec_perm_builtin (tree exp)
33946 struct expand_vec_perm_d d;
33947 tree arg0, arg1, arg2;
33949 arg0 = CALL_EXPR_ARG (exp, 0);
33950 arg1 = CALL_EXPR_ARG (exp, 1);
33951 arg2 = CALL_EXPR_ARG (exp, 2);
33953 d.vmode = TYPE_MODE (TREE_TYPE (arg0));
33954 d.nelt = GET_MODE_NUNITS (d.vmode);
33955 d.testing_p = false;
33956 gcc_assert (VECTOR_MODE_P (d.vmode));
33958 if (TREE_CODE (arg2) != VECTOR_CST)
33960 error_at (EXPR_LOCATION (exp),
33961 "vector permutation requires vector constant");
33962 goto exit_error;
33965 switch (extract_vec_perm_cst (&d, arg2))
33967 default:
33968 gcc_unreachable();
33970 case 0:
33971 error_at (EXPR_LOCATION (exp), "invalid vector permutation constant");
33972 goto exit_error;
33974 case 3:
33975 if (!operand_equal_p (arg0, arg1, 0))
33977 d.op0 = expand_expr (arg0, NULL_RTX, d.vmode, EXPAND_NORMAL);
33978 d.op0 = force_reg (d.vmode, d.op0);
33979 d.op1 = expand_expr (arg1, NULL_RTX, d.vmode, EXPAND_NORMAL);
33980 d.op1 = force_reg (d.vmode, d.op1);
33981 break;
33984 /* The elements of PERM do not suggest that only the first operand
33985 is used, but both operands are identical. Allow easier matching
33986 of the permutation by folding the permutation into the single
33987 input vector. */
33989 unsigned i, nelt = d.nelt;
33990 for (i = 0; i < nelt; ++i)
33991 if (d.perm[i] >= nelt)
33992 d.perm[i] -= nelt;
33994 /* FALLTHRU */
33996 case 1:
33997 d.op0 = expand_expr (arg0, NULL_RTX, d.vmode, EXPAND_NORMAL);
33998 d.op0 = force_reg (d.vmode, d.op0);
33999 d.op1 = d.op0;
34000 break;
34002 case 2:
34003 d.op0 = expand_expr (arg1, NULL_RTX, d.vmode, EXPAND_NORMAL);
34004 d.op0 = force_reg (d.vmode, d.op0);
34005 d.op1 = d.op0;
34006 break;
34009 d.target = gen_reg_rtx (d.vmode);
34010 if (ix86_expand_vec_perm_builtin_1 (&d))
34011 return d.target;
34013 /* For compiler generated permutations, we should never got here, because
34014 the compiler should also be checking the ok hook. But since this is a
34015 builtin the user has access too, so don't abort. */
34016 switch (d.nelt)
34018 case 2:
34019 sorry ("vector permutation (%d %d)", d.perm[0], d.perm[1]);
34020 break;
34021 case 4:
34022 sorry ("vector permutation (%d %d %d %d)",
34023 d.perm[0], d.perm[1], d.perm[2], d.perm[3]);
34024 break;
34025 case 8:
34026 sorry ("vector permutation (%d %d %d %d %d %d %d %d)",
34027 d.perm[0], d.perm[1], d.perm[2], d.perm[3],
34028 d.perm[4], d.perm[5], d.perm[6], d.perm[7]);
34029 break;
34030 case 16:
34031 sorry ("vector permutation "
34032 "(%d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d)",
34033 d.perm[0], d.perm[1], d.perm[2], d.perm[3],
34034 d.perm[4], d.perm[5], d.perm[6], d.perm[7],
34035 d.perm[8], d.perm[9], d.perm[10], d.perm[11],
34036 d.perm[12], d.perm[13], d.perm[14], d.perm[15]);
34037 break;
34038 default:
34039 gcc_unreachable ();
34041 exit_error:
34042 return CONST0_RTX (d.vmode);
34045 /* Implement targetm.vectorize.builtin_vec_perm_ok. */
34047 static bool
34048 ix86_vectorize_builtin_vec_perm_ok (tree vec_type, tree mask)
34050 struct expand_vec_perm_d d;
34051 int vec_mask;
34052 bool ret, one_vec;
34054 d.vmode = TYPE_MODE (vec_type);
34055 d.nelt = GET_MODE_NUNITS (d.vmode);
34056 d.testing_p = true;
34058 /* Given sufficient ISA support we can just return true here
34059 for selected vector modes. */
34060 if (GET_MODE_SIZE (d.vmode) == 16)
34062 /* All implementable with a single vpperm insn. */
34063 if (TARGET_XOP)
34064 return true;
34065 /* All implementable with 2 pshufb + 1 ior. */
34066 if (TARGET_SSSE3)
34067 return true;
34068 /* All implementable with shufpd or unpck[lh]pd. */
34069 if (d.nelt == 2)
34070 return true;
34073 vec_mask = extract_vec_perm_cst (&d, mask);
34075 /* This hook is cannot be called in response to something that the
34076 user does (unlike the builtin expander) so we shouldn't ever see
34077 an error generated from the extract. */
34078 gcc_assert (vec_mask > 0 && vec_mask <= 3);
34079 one_vec = (vec_mask != 3);
34081 /* Implementable with shufps or pshufd. */
34082 if (one_vec && (d.vmode == V4SFmode || d.vmode == V4SImode))
34083 return true;
34085 /* Otherwise we have to go through the motions and see if we can
34086 figure out how to generate the requested permutation. */
34087 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
34088 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
34089 if (!one_vec)
34090 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
34092 start_sequence ();
34093 ret = ix86_expand_vec_perm_builtin_1 (&d);
34094 end_sequence ();
34096 return ret;
34099 void
34100 ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
34102 struct expand_vec_perm_d d;
34103 unsigned i, nelt;
34105 d.target = targ;
34106 d.op0 = op0;
34107 d.op1 = op1;
34108 d.vmode = GET_MODE (targ);
34109 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
34110 d.testing_p = false;
34112 for (i = 0; i < nelt; ++i)
34113 d.perm[i] = i * 2 + odd;
34115 /* We'll either be able to implement the permutation directly... */
34116 if (expand_vec_perm_1 (&d))
34117 return;
34119 /* ... or we use the special-case patterns. */
34120 expand_vec_perm_even_odd_1 (&d, odd);
34123 /* Expand an insert into a vector register through pinsr insn.
34124 Return true if successful. */
34126 bool
34127 ix86_expand_pinsr (rtx *operands)
34129 rtx dst = operands[0];
34130 rtx src = operands[3];
34132 unsigned int size = INTVAL (operands[1]);
34133 unsigned int pos = INTVAL (operands[2]);
34135 if (GET_CODE (dst) == SUBREG)
34137 pos += SUBREG_BYTE (dst) * BITS_PER_UNIT;
34138 dst = SUBREG_REG (dst);
34141 if (GET_CODE (src) == SUBREG)
34142 src = SUBREG_REG (src);
34144 switch (GET_MODE (dst))
34146 case V16QImode:
34147 case V8HImode:
34148 case V4SImode:
34149 case V2DImode:
34151 enum machine_mode srcmode, dstmode;
34152 rtx (*pinsr)(rtx, rtx, rtx, rtx);
34154 srcmode = mode_for_size (size, MODE_INT, 0);
34156 switch (srcmode)
34158 case QImode:
34159 if (!TARGET_SSE4_1)
34160 return false;
34161 dstmode = V16QImode;
34162 pinsr = gen_sse4_1_pinsrb;
34163 break;
34165 case HImode:
34166 if (!TARGET_SSE2)
34167 return false;
34168 dstmode = V8HImode;
34169 pinsr = gen_sse2_pinsrw;
34170 break;
34172 case SImode:
34173 if (!TARGET_SSE4_1)
34174 return false;
34175 dstmode = V4SImode;
34176 pinsr = gen_sse4_1_pinsrd;
34177 break;
34179 case DImode:
34180 gcc_assert (TARGET_64BIT);
34181 if (!TARGET_SSE4_1)
34182 return false;
34183 dstmode = V2DImode;
34184 pinsr = gen_sse4_1_pinsrq;
34185 break;
34187 default:
34188 return false;
34191 dst = gen_lowpart (dstmode, dst);
34192 src = gen_lowpart (srcmode, src);
34194 pos /= size;
34196 emit_insn (pinsr (dst, dst, src, GEN_INT (1 << pos)));
34197 return true;
34200 default:
34201 return false;
34205 /* This function returns the calling abi specific va_list type node.
34206 It returns the FNDECL specific va_list type. */
34208 static tree
34209 ix86_fn_abi_va_list (tree fndecl)
34211 if (!TARGET_64BIT)
34212 return va_list_type_node;
34213 gcc_assert (fndecl != NULL_TREE);
34215 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
34216 return ms_va_list_type_node;
34217 else
34218 return sysv_va_list_type_node;
34221 /* Returns the canonical va_list type specified by TYPE. If there
34222 is no valid TYPE provided, it return NULL_TREE. */
34224 static tree
34225 ix86_canonical_va_list_type (tree type)
34227 tree wtype, htype;
34229 /* Resolve references and pointers to va_list type. */
34230 if (TREE_CODE (type) == MEM_REF)
34231 type = TREE_TYPE (type);
34232 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
34233 type = TREE_TYPE (type);
34234 else if (POINTER_TYPE_P (type) && TREE_CODE (TREE_TYPE (type)) == ARRAY_TYPE)
34235 type = TREE_TYPE (type);
34237 if (TARGET_64BIT && va_list_type_node != NULL_TREE)
34239 wtype = va_list_type_node;
34240 gcc_assert (wtype != NULL_TREE);
34241 htype = type;
34242 if (TREE_CODE (wtype) == ARRAY_TYPE)
34244 /* If va_list is an array type, the argument may have decayed
34245 to a pointer type, e.g. by being passed to another function.
34246 In that case, unwrap both types so that we can compare the
34247 underlying records. */
34248 if (TREE_CODE (htype) == ARRAY_TYPE
34249 || POINTER_TYPE_P (htype))
34251 wtype = TREE_TYPE (wtype);
34252 htype = TREE_TYPE (htype);
34255 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
34256 return va_list_type_node;
34257 wtype = sysv_va_list_type_node;
34258 gcc_assert (wtype != NULL_TREE);
34259 htype = type;
34260 if (TREE_CODE (wtype) == ARRAY_TYPE)
34262 /* If va_list is an array type, the argument may have decayed
34263 to a pointer type, e.g. by being passed to another function.
34264 In that case, unwrap both types so that we can compare the
34265 underlying records. */
34266 if (TREE_CODE (htype) == ARRAY_TYPE
34267 || POINTER_TYPE_P (htype))
34269 wtype = TREE_TYPE (wtype);
34270 htype = TREE_TYPE (htype);
34273 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
34274 return sysv_va_list_type_node;
34275 wtype = ms_va_list_type_node;
34276 gcc_assert (wtype != NULL_TREE);
34277 htype = type;
34278 if (TREE_CODE (wtype) == ARRAY_TYPE)
34280 /* If va_list is an array type, the argument may have decayed
34281 to a pointer type, e.g. by being passed to another function.
34282 In that case, unwrap both types so that we can compare the
34283 underlying records. */
34284 if (TREE_CODE (htype) == ARRAY_TYPE
34285 || POINTER_TYPE_P (htype))
34287 wtype = TREE_TYPE (wtype);
34288 htype = TREE_TYPE (htype);
34291 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
34292 return ms_va_list_type_node;
34293 return NULL_TREE;
34295 return std_canonical_va_list_type (type);
34298 /* Iterate through the target-specific builtin types for va_list.
34299 IDX denotes the iterator, *PTREE is set to the result type of
34300 the va_list builtin, and *PNAME to its internal type.
34301 Returns zero if there is no element for this index, otherwise
34302 IDX should be increased upon the next call.
34303 Note, do not iterate a base builtin's name like __builtin_va_list.
34304 Used from c_common_nodes_and_builtins. */
34306 static int
34307 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
34309 if (TARGET_64BIT)
34311 switch (idx)
34313 default:
34314 break;
34316 case 0:
34317 *ptree = ms_va_list_type_node;
34318 *pname = "__builtin_ms_va_list";
34319 return 1;
34321 case 1:
34322 *ptree = sysv_va_list_type_node;
34323 *pname = "__builtin_sysv_va_list";
34324 return 1;
34328 return 0;
34331 #undef TARGET_SCHED_DISPATCH
34332 #define TARGET_SCHED_DISPATCH has_dispatch
34333 #undef TARGET_SCHED_DISPATCH_DO
34334 #define TARGET_SCHED_DISPATCH_DO do_dispatch
34336 /* The size of the dispatch window is the total number of bytes of
34337 object code allowed in a window. */
34338 #define DISPATCH_WINDOW_SIZE 16
34340 /* Number of dispatch windows considered for scheduling. */
34341 #define MAX_DISPATCH_WINDOWS 3
34343 /* Maximum number of instructions in a window. */
34344 #define MAX_INSN 4
34346 /* Maximum number of immediate operands in a window. */
34347 #define MAX_IMM 4
34349 /* Maximum number of immediate bits allowed in a window. */
34350 #define MAX_IMM_SIZE 128
34352 /* Maximum number of 32 bit immediates allowed in a window. */
34353 #define MAX_IMM_32 4
34355 /* Maximum number of 64 bit immediates allowed in a window. */
34356 #define MAX_IMM_64 2
34358 /* Maximum total of loads or prefetches allowed in a window. */
34359 #define MAX_LOAD 2
34361 /* Maximum total of stores allowed in a window. */
34362 #define MAX_STORE 1
34364 #undef BIG
34365 #define BIG 100
34368 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
34369 enum dispatch_group {
34370 disp_no_group = 0,
34371 disp_load,
34372 disp_store,
34373 disp_load_store,
34374 disp_prefetch,
34375 disp_imm,
34376 disp_imm_32,
34377 disp_imm_64,
34378 disp_branch,
34379 disp_cmp,
34380 disp_jcc,
34381 disp_last
34384 /* Number of allowable groups in a dispatch window. It is an array
34385 indexed by dispatch_group enum. 100 is used as a big number,
34386 because the number of these kind of operations does not have any
34387 effect in dispatch window, but we need them for other reasons in
34388 the table. */
34389 static unsigned int num_allowable_groups[disp_last] = {
34390 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG, BIG
34393 char group_name[disp_last + 1][16] = {
34394 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
34395 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
34396 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
34399 /* Instruction path. */
34400 enum insn_path {
34401 no_path = 0,
34402 path_single, /* Single micro op. */
34403 path_double, /* Double micro op. */
34404 path_multi, /* Instructions with more than 2 micro op.. */
34405 last_path
34408 /* sched_insn_info defines a window to the instructions scheduled in
34409 the basic block. It contains a pointer to the insn_info table and
34410 the instruction scheduled.
34412 Windows are allocated for each basic block and are linked
34413 together. */
34414 typedef struct sched_insn_info_s {
34415 rtx insn;
34416 enum dispatch_group group;
34417 enum insn_path path;
34418 int byte_len;
34419 int imm_bytes;
34420 } sched_insn_info;
34422 /* Linked list of dispatch windows. This is a two way list of
34423 dispatch windows of a basic block. It contains information about
34424 the number of uops in the window and the total number of
34425 instructions and of bytes in the object code for this dispatch
34426 window. */
34427 typedef struct dispatch_windows_s {
34428 int num_insn; /* Number of insn in the window. */
34429 int num_uops; /* Number of uops in the window. */
34430 int window_size; /* Number of bytes in the window. */
34431 int window_num; /* Window number between 0 or 1. */
34432 int num_imm; /* Number of immediates in an insn. */
34433 int num_imm_32; /* Number of 32 bit immediates in an insn. */
34434 int num_imm_64; /* Number of 64 bit immediates in an insn. */
34435 int imm_size; /* Total immediates in the window. */
34436 int num_loads; /* Total memory loads in the window. */
34437 int num_stores; /* Total memory stores in the window. */
34438 int violation; /* Violation exists in window. */
34439 sched_insn_info *window; /* Pointer to the window. */
34440 struct dispatch_windows_s *next;
34441 struct dispatch_windows_s *prev;
34442 } dispatch_windows;
34444 /* Immediate valuse used in an insn. */
34445 typedef struct imm_info_s
34447 int imm;
34448 int imm32;
34449 int imm64;
34450 } imm_info;
34452 static dispatch_windows *dispatch_window_list;
34453 static dispatch_windows *dispatch_window_list1;
34455 /* Get dispatch group of insn. */
34457 static enum dispatch_group
34458 get_mem_group (rtx insn)
34460 enum attr_memory memory;
34462 if (INSN_CODE (insn) < 0)
34463 return disp_no_group;
34464 memory = get_attr_memory (insn);
34465 if (memory == MEMORY_STORE)
34466 return disp_store;
34468 if (memory == MEMORY_LOAD)
34469 return disp_load;
34471 if (memory == MEMORY_BOTH)
34472 return disp_load_store;
34474 return disp_no_group;
34477 /* Return true if insn is a compare instruction. */
34479 static bool
34480 is_cmp (rtx insn)
34482 enum attr_type type;
34484 type = get_attr_type (insn);
34485 return (type == TYPE_TEST
34486 || type == TYPE_ICMP
34487 || type == TYPE_FCMP
34488 || GET_CODE (PATTERN (insn)) == COMPARE);
34491 /* Return true if a dispatch violation encountered. */
34493 static bool
34494 dispatch_violation (void)
34496 if (dispatch_window_list->next)
34497 return dispatch_window_list->next->violation;
34498 return dispatch_window_list->violation;
34501 /* Return true if insn is a branch instruction. */
34503 static bool
34504 is_branch (rtx insn)
34506 return (CALL_P (insn) || JUMP_P (insn));
34509 /* Return true if insn is a prefetch instruction. */
34511 static bool
34512 is_prefetch (rtx insn)
34514 return NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == PREFETCH;
34517 /* This function initializes a dispatch window and the list container holding a
34518 pointer to the window. */
34520 static void
34521 init_window (int window_num)
34523 int i;
34524 dispatch_windows *new_list;
34526 if (window_num == 0)
34527 new_list = dispatch_window_list;
34528 else
34529 new_list = dispatch_window_list1;
34531 new_list->num_insn = 0;
34532 new_list->num_uops = 0;
34533 new_list->window_size = 0;
34534 new_list->next = NULL;
34535 new_list->prev = NULL;
34536 new_list->window_num = window_num;
34537 new_list->num_imm = 0;
34538 new_list->num_imm_32 = 0;
34539 new_list->num_imm_64 = 0;
34540 new_list->imm_size = 0;
34541 new_list->num_loads = 0;
34542 new_list->num_stores = 0;
34543 new_list->violation = false;
34545 for (i = 0; i < MAX_INSN; i++)
34547 new_list->window[i].insn = NULL;
34548 new_list->window[i].group = disp_no_group;
34549 new_list->window[i].path = no_path;
34550 new_list->window[i].byte_len = 0;
34551 new_list->window[i].imm_bytes = 0;
34553 return;
34556 /* This function allocates and initializes a dispatch window and the
34557 list container holding a pointer to the window. */
34559 static dispatch_windows *
34560 allocate_window (void)
34562 dispatch_windows *new_list = XNEW (struct dispatch_windows_s);
34563 new_list->window = XNEWVEC (struct sched_insn_info_s, MAX_INSN + 1);
34565 return new_list;
34568 /* This routine initializes the dispatch scheduling information. It
34569 initiates building dispatch scheduler tables and constructs the
34570 first dispatch window. */
34572 static void
34573 init_dispatch_sched (void)
34575 /* Allocate a dispatch list and a window. */
34576 dispatch_window_list = allocate_window ();
34577 dispatch_window_list1 = allocate_window ();
34578 init_window (0);
34579 init_window (1);
34582 /* This function returns true if a branch is detected. End of a basic block
34583 does not have to be a branch, but here we assume only branches end a
34584 window. */
34586 static bool
34587 is_end_basic_block (enum dispatch_group group)
34589 return group == disp_branch;
34592 /* This function is called when the end of a window processing is reached. */
34594 static void
34595 process_end_window (void)
34597 gcc_assert (dispatch_window_list->num_insn <= MAX_INSN);
34598 if (dispatch_window_list->next)
34600 gcc_assert (dispatch_window_list1->num_insn <= MAX_INSN);
34601 gcc_assert (dispatch_window_list->window_size
34602 + dispatch_window_list1->window_size <= 48);
34603 init_window (1);
34605 init_window (0);
34608 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
34609 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
34610 for 48 bytes of instructions. Note that these windows are not dispatch
34611 windows that their sizes are DISPATCH_WINDOW_SIZE. */
34613 static dispatch_windows *
34614 allocate_next_window (int window_num)
34616 if (window_num == 0)
34618 if (dispatch_window_list->next)
34619 init_window (1);
34620 init_window (0);
34621 return dispatch_window_list;
34624 dispatch_window_list->next = dispatch_window_list1;
34625 dispatch_window_list1->prev = dispatch_window_list;
34627 return dispatch_window_list1;
34630 /* Increment the number of immediate operands of an instruction. */
34632 static int
34633 find_constant_1 (rtx *in_rtx, imm_info *imm_values)
34635 if (*in_rtx == 0)
34636 return 0;
34638 switch ( GET_CODE (*in_rtx))
34640 case CONST:
34641 case SYMBOL_REF:
34642 case CONST_INT:
34643 (imm_values->imm)++;
34644 if (x86_64_immediate_operand (*in_rtx, SImode))
34645 (imm_values->imm32)++;
34646 else
34647 (imm_values->imm64)++;
34648 break;
34650 case CONST_DOUBLE:
34651 (imm_values->imm)++;
34652 (imm_values->imm64)++;
34653 break;
34655 case CODE_LABEL:
34656 if (LABEL_KIND (*in_rtx) == LABEL_NORMAL)
34658 (imm_values->imm)++;
34659 (imm_values->imm32)++;
34661 break;
34663 default:
34664 break;
34667 return 0;
34670 /* Compute number of immediate operands of an instruction. */
34672 static void
34673 find_constant (rtx in_rtx, imm_info *imm_values)
34675 for_each_rtx (INSN_P (in_rtx) ? &PATTERN (in_rtx) : &in_rtx,
34676 (rtx_function) find_constant_1, (void *) imm_values);
34679 /* Return total size of immediate operands of an instruction along with number
34680 of corresponding immediate-operands. It initializes its parameters to zero
34681 befor calling FIND_CONSTANT.
34682 INSN is the input instruction. IMM is the total of immediates.
34683 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
34684 bit immediates. */
34686 static int
34687 get_num_immediates (rtx insn, int *imm, int *imm32, int *imm64)
34689 imm_info imm_values = {0, 0, 0};
34691 find_constant (insn, &imm_values);
34692 *imm = imm_values.imm;
34693 *imm32 = imm_values.imm32;
34694 *imm64 = imm_values.imm64;
34695 return imm_values.imm32 * 4 + imm_values.imm64 * 8;
34698 /* This function indicates if an operand of an instruction is an
34699 immediate. */
34701 static bool
34702 has_immediate (rtx insn)
34704 int num_imm_operand;
34705 int num_imm32_operand;
34706 int num_imm64_operand;
34708 if (insn)
34709 return get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
34710 &num_imm64_operand);
34711 return false;
34714 /* Return single or double path for instructions. */
34716 static enum insn_path
34717 get_insn_path (rtx insn)
34719 enum attr_amdfam10_decode path = get_attr_amdfam10_decode (insn);
34721 if ((int)path == 0)
34722 return path_single;
34724 if ((int)path == 1)
34725 return path_double;
34727 return path_multi;
34730 /* Return insn dispatch group. */
34732 static enum dispatch_group
34733 get_insn_group (rtx insn)
34735 enum dispatch_group group = get_mem_group (insn);
34736 if (group)
34737 return group;
34739 if (is_branch (insn))
34740 return disp_branch;
34742 if (is_cmp (insn))
34743 return disp_cmp;
34745 if (has_immediate (insn))
34746 return disp_imm;
34748 if (is_prefetch (insn))
34749 return disp_prefetch;
34751 return disp_no_group;
34754 /* Count number of GROUP restricted instructions in a dispatch
34755 window WINDOW_LIST. */
34757 static int
34758 count_num_restricted (rtx insn, dispatch_windows *window_list)
34760 enum dispatch_group group = get_insn_group (insn);
34761 int imm_size;
34762 int num_imm_operand;
34763 int num_imm32_operand;
34764 int num_imm64_operand;
34766 if (group == disp_no_group)
34767 return 0;
34769 if (group == disp_imm)
34771 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
34772 &num_imm64_operand);
34773 if (window_list->imm_size + imm_size > MAX_IMM_SIZE
34774 || num_imm_operand + window_list->num_imm > MAX_IMM
34775 || (num_imm32_operand > 0
34776 && (window_list->num_imm_32 + num_imm32_operand > MAX_IMM_32
34777 || window_list->num_imm_64 * 2 + num_imm32_operand > MAX_IMM_32))
34778 || (num_imm64_operand > 0
34779 && (window_list->num_imm_64 + num_imm64_operand > MAX_IMM_64
34780 || window_list->num_imm_32 + num_imm64_operand * 2 > MAX_IMM_32))
34781 || (window_list->imm_size + imm_size == MAX_IMM_SIZE
34782 && num_imm64_operand > 0
34783 && ((window_list->num_imm_64 > 0
34784 && window_list->num_insn >= 2)
34785 || window_list->num_insn >= 3)))
34786 return BIG;
34788 return 1;
34791 if ((group == disp_load_store
34792 && (window_list->num_loads >= MAX_LOAD
34793 || window_list->num_stores >= MAX_STORE))
34794 || ((group == disp_load
34795 || group == disp_prefetch)
34796 && window_list->num_loads >= MAX_LOAD)
34797 || (group == disp_store
34798 && window_list->num_stores >= MAX_STORE))
34799 return BIG;
34801 return 1;
34804 /* This function returns true if insn satisfies dispatch rules on the
34805 last window scheduled. */
34807 static bool
34808 fits_dispatch_window (rtx insn)
34810 dispatch_windows *window_list = dispatch_window_list;
34811 dispatch_windows *window_list_next = dispatch_window_list->next;
34812 unsigned int num_restrict;
34813 enum dispatch_group group = get_insn_group (insn);
34814 enum insn_path path = get_insn_path (insn);
34815 int sum;
34817 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
34818 instructions should be given the lowest priority in the
34819 scheduling process in Haifa scheduler to make sure they will be
34820 scheduled in the same dispatch window as the refrence to them. */
34821 if (group == disp_jcc || group == disp_cmp)
34822 return false;
34824 /* Check nonrestricted. */
34825 if (group == disp_no_group || group == disp_branch)
34826 return true;
34828 /* Get last dispatch window. */
34829 if (window_list_next)
34830 window_list = window_list_next;
34832 if (window_list->window_num == 1)
34834 sum = window_list->prev->window_size + window_list->window_size;
34836 if (sum == 32
34837 || (min_insn_size (insn) + sum) >= 48)
34838 /* Window 1 is full. Go for next window. */
34839 return true;
34842 num_restrict = count_num_restricted (insn, window_list);
34844 if (num_restrict > num_allowable_groups[group])
34845 return false;
34847 /* See if it fits in the first window. */
34848 if (window_list->window_num == 0)
34850 /* The first widow should have only single and double path
34851 uops. */
34852 if (path == path_double
34853 && (window_list->num_uops + 2) > MAX_INSN)
34854 return false;
34855 else if (path != path_single)
34856 return false;
34858 return true;
34861 /* Add an instruction INSN with NUM_UOPS micro-operations to the
34862 dispatch window WINDOW_LIST. */
34864 static void
34865 add_insn_window (rtx insn, dispatch_windows *window_list, int num_uops)
34867 int byte_len = min_insn_size (insn);
34868 int num_insn = window_list->num_insn;
34869 int imm_size;
34870 sched_insn_info *window = window_list->window;
34871 enum dispatch_group group = get_insn_group (insn);
34872 enum insn_path path = get_insn_path (insn);
34873 int num_imm_operand;
34874 int num_imm32_operand;
34875 int num_imm64_operand;
34877 if (!window_list->violation && group != disp_cmp
34878 && !fits_dispatch_window (insn))
34879 window_list->violation = true;
34881 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
34882 &num_imm64_operand);
34884 /* Initialize window with new instruction. */
34885 window[num_insn].insn = insn;
34886 window[num_insn].byte_len = byte_len;
34887 window[num_insn].group = group;
34888 window[num_insn].path = path;
34889 window[num_insn].imm_bytes = imm_size;
34891 window_list->window_size += byte_len;
34892 window_list->num_insn = num_insn + 1;
34893 window_list->num_uops = window_list->num_uops + num_uops;
34894 window_list->imm_size += imm_size;
34895 window_list->num_imm += num_imm_operand;
34896 window_list->num_imm_32 += num_imm32_operand;
34897 window_list->num_imm_64 += num_imm64_operand;
34899 if (group == disp_store)
34900 window_list->num_stores += 1;
34901 else if (group == disp_load
34902 || group == disp_prefetch)
34903 window_list->num_loads += 1;
34904 else if (group == disp_load_store)
34906 window_list->num_stores += 1;
34907 window_list->num_loads += 1;
34911 /* Adds a scheduled instruction, INSN, to the current dispatch window.
34912 If the total bytes of instructions or the number of instructions in
34913 the window exceed allowable, it allocates a new window. */
34915 static void
34916 add_to_dispatch_window (rtx insn)
34918 int byte_len;
34919 dispatch_windows *window_list;
34920 dispatch_windows *next_list;
34921 dispatch_windows *window0_list;
34922 enum insn_path path;
34923 enum dispatch_group insn_group;
34924 bool insn_fits;
34925 int num_insn;
34926 int num_uops;
34927 int window_num;
34928 int insn_num_uops;
34929 int sum;
34931 if (INSN_CODE (insn) < 0)
34932 return;
34934 byte_len = min_insn_size (insn);
34935 window_list = dispatch_window_list;
34936 next_list = window_list->next;
34937 path = get_insn_path (insn);
34938 insn_group = get_insn_group (insn);
34940 /* Get the last dispatch window. */
34941 if (next_list)
34942 window_list = dispatch_window_list->next;
34944 if (path == path_single)
34945 insn_num_uops = 1;
34946 else if (path == path_double)
34947 insn_num_uops = 2;
34948 else
34949 insn_num_uops = (int) path;
34951 /* If current window is full, get a new window.
34952 Window number zero is full, if MAX_INSN uops are scheduled in it.
34953 Window number one is full, if window zero's bytes plus window
34954 one's bytes is 32, or if the bytes of the new instruction added
34955 to the total makes it greater than 48, or it has already MAX_INSN
34956 instructions in it. */
34957 num_insn = window_list->num_insn;
34958 num_uops = window_list->num_uops;
34959 window_num = window_list->window_num;
34960 insn_fits = fits_dispatch_window (insn);
34962 if (num_insn >= MAX_INSN
34963 || num_uops + insn_num_uops > MAX_INSN
34964 || !(insn_fits))
34966 window_num = ~window_num & 1;
34967 window_list = allocate_next_window (window_num);
34970 if (window_num == 0)
34972 add_insn_window (insn, window_list, insn_num_uops);
34973 if (window_list->num_insn >= MAX_INSN
34974 && insn_group == disp_branch)
34976 process_end_window ();
34977 return;
34980 else if (window_num == 1)
34982 window0_list = window_list->prev;
34983 sum = window0_list->window_size + window_list->window_size;
34984 if (sum == 32
34985 || (byte_len + sum) >= 48)
34987 process_end_window ();
34988 window_list = dispatch_window_list;
34991 add_insn_window (insn, window_list, insn_num_uops);
34993 else
34994 gcc_unreachable ();
34996 if (is_end_basic_block (insn_group))
34998 /* End of basic block is reached do end-basic-block process. */
34999 process_end_window ();
35000 return;
35004 /* Print the dispatch window, WINDOW_NUM, to FILE. */
35006 DEBUG_FUNCTION static void
35007 debug_dispatch_window_file (FILE *file, int window_num)
35009 dispatch_windows *list;
35010 int i;
35012 if (window_num == 0)
35013 list = dispatch_window_list;
35014 else
35015 list = dispatch_window_list1;
35017 fprintf (file, "Window #%d:\n", list->window_num);
35018 fprintf (file, " num_insn = %d, num_uops = %d, window_size = %d\n",
35019 list->num_insn, list->num_uops, list->window_size);
35020 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
35021 list->num_imm, list->num_imm_32, list->num_imm_64, list->imm_size);
35023 fprintf (file, " num_loads = %d, num_stores = %d\n", list->num_loads,
35024 list->num_stores);
35025 fprintf (file, " insn info:\n");
35027 for (i = 0; i < MAX_INSN; i++)
35029 if (!list->window[i].insn)
35030 break;
35031 fprintf (file, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
35032 i, group_name[list->window[i].group],
35033 i, (void *)list->window[i].insn,
35034 i, list->window[i].path,
35035 i, list->window[i].byte_len,
35036 i, list->window[i].imm_bytes);
35040 /* Print to stdout a dispatch window. */
35042 DEBUG_FUNCTION void
35043 debug_dispatch_window (int window_num)
35045 debug_dispatch_window_file (stdout, window_num);
35048 /* Print INSN dispatch information to FILE. */
35050 DEBUG_FUNCTION static void
35051 debug_insn_dispatch_info_file (FILE *file, rtx insn)
35053 int byte_len;
35054 enum insn_path path;
35055 enum dispatch_group group;
35056 int imm_size;
35057 int num_imm_operand;
35058 int num_imm32_operand;
35059 int num_imm64_operand;
35061 if (INSN_CODE (insn) < 0)
35062 return;
35064 byte_len = min_insn_size (insn);
35065 path = get_insn_path (insn);
35066 group = get_insn_group (insn);
35067 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
35068 &num_imm64_operand);
35070 fprintf (file, " insn info:\n");
35071 fprintf (file, " group = %s, path = %d, byte_len = %d\n",
35072 group_name[group], path, byte_len);
35073 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
35074 num_imm_operand, num_imm32_operand, num_imm64_operand, imm_size);
35077 /* Print to STDERR the status of the ready list with respect to
35078 dispatch windows. */
35080 DEBUG_FUNCTION void
35081 debug_ready_dispatch (void)
35083 int i;
35084 int no_ready = number_in_ready ();
35086 fprintf (stdout, "Number of ready: %d\n", no_ready);
35088 for (i = 0; i < no_ready; i++)
35089 debug_insn_dispatch_info_file (stdout, get_ready_element (i));
35092 /* This routine is the driver of the dispatch scheduler. */
35094 static void
35095 do_dispatch (rtx insn, int mode)
35097 if (mode == DISPATCH_INIT)
35098 init_dispatch_sched ();
35099 else if (mode == ADD_TO_DISPATCH_WINDOW)
35100 add_to_dispatch_window (insn);
35103 /* Return TRUE if Dispatch Scheduling is supported. */
35105 static bool
35106 has_dispatch (rtx insn, int action)
35108 if (ix86_tune == PROCESSOR_BDVER1 && flag_dispatch_scheduler)
35109 switch (action)
35111 default:
35112 return false;
35114 case IS_DISPATCH_ON:
35115 return true;
35116 break;
35118 case IS_CMP:
35119 return is_cmp (insn);
35121 case DISPATCH_VIOLATION:
35122 return dispatch_violation ();
35124 case FITS_DISPATCH_WINDOW:
35125 return fits_dispatch_window (insn);
35128 return false;
35131 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
35132 place emms and femms instructions. */
35134 static enum machine_mode
35135 ix86_preferred_simd_mode (enum machine_mode mode)
35137 if (!TARGET_SSE)
35138 return word_mode;
35140 switch (mode)
35142 case QImode:
35143 return V16QImode;
35144 case HImode:
35145 return V8HImode;
35146 case SImode:
35147 return V4SImode;
35148 case DImode:
35149 return V2DImode;
35151 case SFmode:
35152 if (TARGET_AVX && !flag_prefer_avx128)
35153 return V8SFmode;
35154 else
35155 return V4SFmode;
35157 case DFmode:
35158 if (!TARGET_VECTORIZE_DOUBLE)
35159 return word_mode;
35160 else if (TARGET_AVX && !flag_prefer_avx128)
35161 return V4DFmode;
35162 else if (TARGET_SSE2)
35163 return V2DFmode;
35164 /* FALLTHRU */
35166 default:
35167 return word_mode;
35171 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
35172 vectors. */
35174 static unsigned int
35175 ix86_autovectorize_vector_sizes (void)
35177 return (TARGET_AVX && !flag_prefer_avx128) ? 32 | 16 : 0;
35180 /* Initialize the GCC target structure. */
35181 #undef TARGET_RETURN_IN_MEMORY
35182 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
35184 #undef TARGET_LEGITIMIZE_ADDRESS
35185 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
35187 #undef TARGET_ATTRIBUTE_TABLE
35188 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
35189 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
35190 # undef TARGET_MERGE_DECL_ATTRIBUTES
35191 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
35192 #endif
35194 #undef TARGET_COMP_TYPE_ATTRIBUTES
35195 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
35197 #undef TARGET_INIT_BUILTINS
35198 #define TARGET_INIT_BUILTINS ix86_init_builtins
35199 #undef TARGET_BUILTIN_DECL
35200 #define TARGET_BUILTIN_DECL ix86_builtin_decl
35201 #undef TARGET_EXPAND_BUILTIN
35202 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
35204 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
35205 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
35206 ix86_builtin_vectorized_function
35208 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
35209 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_vectorize_builtin_conversion
35211 #undef TARGET_BUILTIN_RECIPROCAL
35212 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
35214 #undef TARGET_ASM_FUNCTION_EPILOGUE
35215 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
35217 #undef TARGET_ENCODE_SECTION_INFO
35218 #ifndef SUBTARGET_ENCODE_SECTION_INFO
35219 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
35220 #else
35221 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
35222 #endif
35224 #undef TARGET_ASM_OPEN_PAREN
35225 #define TARGET_ASM_OPEN_PAREN ""
35226 #undef TARGET_ASM_CLOSE_PAREN
35227 #define TARGET_ASM_CLOSE_PAREN ""
35229 #undef TARGET_ASM_BYTE_OP
35230 #define TARGET_ASM_BYTE_OP ASM_BYTE
35232 #undef TARGET_ASM_ALIGNED_HI_OP
35233 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
35234 #undef TARGET_ASM_ALIGNED_SI_OP
35235 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
35236 #ifdef ASM_QUAD
35237 #undef TARGET_ASM_ALIGNED_DI_OP
35238 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
35239 #endif
35241 #undef TARGET_PROFILE_BEFORE_PROLOGUE
35242 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
35244 #undef TARGET_ASM_UNALIGNED_HI_OP
35245 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
35246 #undef TARGET_ASM_UNALIGNED_SI_OP
35247 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
35248 #undef TARGET_ASM_UNALIGNED_DI_OP
35249 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
35251 #undef TARGET_PRINT_OPERAND
35252 #define TARGET_PRINT_OPERAND ix86_print_operand
35253 #undef TARGET_PRINT_OPERAND_ADDRESS
35254 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
35255 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
35256 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
35257 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
35258 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
35260 #undef TARGET_SCHED_INIT_GLOBAL
35261 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
35262 #undef TARGET_SCHED_ADJUST_COST
35263 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
35264 #undef TARGET_SCHED_ISSUE_RATE
35265 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
35266 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
35267 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
35268 ia32_multipass_dfa_lookahead
35270 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
35271 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
35273 #ifdef HAVE_AS_TLS
35274 #undef TARGET_HAVE_TLS
35275 #define TARGET_HAVE_TLS true
35276 #endif
35277 #undef TARGET_CANNOT_FORCE_CONST_MEM
35278 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
35279 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
35280 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
35282 #undef TARGET_DELEGITIMIZE_ADDRESS
35283 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
35285 #undef TARGET_MS_BITFIELD_LAYOUT_P
35286 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
35288 #if TARGET_MACHO
35289 #undef TARGET_BINDS_LOCAL_P
35290 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
35291 #endif
35292 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
35293 #undef TARGET_BINDS_LOCAL_P
35294 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
35295 #endif
35297 #undef TARGET_ASM_OUTPUT_MI_THUNK
35298 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
35299 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
35300 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
35302 #undef TARGET_ASM_FILE_START
35303 #define TARGET_ASM_FILE_START x86_file_start
35305 #undef TARGET_DEFAULT_TARGET_FLAGS
35306 #define TARGET_DEFAULT_TARGET_FLAGS \
35307 (TARGET_DEFAULT \
35308 | TARGET_SUBTARGET_DEFAULT \
35309 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
35311 #undef TARGET_HANDLE_OPTION
35312 #define TARGET_HANDLE_OPTION ix86_handle_option
35314 #undef TARGET_OPTION_OVERRIDE
35315 #define TARGET_OPTION_OVERRIDE ix86_option_override
35316 #undef TARGET_OPTION_OPTIMIZATION_TABLE
35317 #define TARGET_OPTION_OPTIMIZATION_TABLE ix86_option_optimization_table
35318 #undef TARGET_OPTION_INIT_STRUCT
35319 #define TARGET_OPTION_INIT_STRUCT ix86_option_init_struct
35321 #undef TARGET_REGISTER_MOVE_COST
35322 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
35323 #undef TARGET_MEMORY_MOVE_COST
35324 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
35325 #undef TARGET_RTX_COSTS
35326 #define TARGET_RTX_COSTS ix86_rtx_costs
35327 #undef TARGET_ADDRESS_COST
35328 #define TARGET_ADDRESS_COST ix86_address_cost
35330 #undef TARGET_FIXED_CONDITION_CODE_REGS
35331 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
35332 #undef TARGET_CC_MODES_COMPATIBLE
35333 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
35335 #undef TARGET_MACHINE_DEPENDENT_REORG
35336 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
35338 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
35339 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
35341 #undef TARGET_BUILD_BUILTIN_VA_LIST
35342 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
35344 #undef TARGET_ENUM_VA_LIST_P
35345 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
35347 #undef TARGET_FN_ABI_VA_LIST
35348 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
35350 #undef TARGET_CANONICAL_VA_LIST_TYPE
35351 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
35353 #undef TARGET_EXPAND_BUILTIN_VA_START
35354 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
35356 #undef TARGET_MD_ASM_CLOBBERS
35357 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
35359 #undef TARGET_PROMOTE_PROTOTYPES
35360 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
35361 #undef TARGET_STRUCT_VALUE_RTX
35362 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
35363 #undef TARGET_SETUP_INCOMING_VARARGS
35364 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
35365 #undef TARGET_MUST_PASS_IN_STACK
35366 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
35367 #undef TARGET_FUNCTION_ARG_ADVANCE
35368 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
35369 #undef TARGET_FUNCTION_ARG
35370 #define TARGET_FUNCTION_ARG ix86_function_arg
35371 #undef TARGET_FUNCTION_ARG_BOUNDARY
35372 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
35373 #undef TARGET_PASS_BY_REFERENCE
35374 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
35375 #undef TARGET_INTERNAL_ARG_POINTER
35376 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
35377 #undef TARGET_UPDATE_STACK_BOUNDARY
35378 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
35379 #undef TARGET_GET_DRAP_RTX
35380 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
35381 #undef TARGET_STRICT_ARGUMENT_NAMING
35382 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
35383 #undef TARGET_STATIC_CHAIN
35384 #define TARGET_STATIC_CHAIN ix86_static_chain
35385 #undef TARGET_TRAMPOLINE_INIT
35386 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
35387 #undef TARGET_RETURN_POPS_ARGS
35388 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
35390 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
35391 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
35393 #undef TARGET_SCALAR_MODE_SUPPORTED_P
35394 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
35396 #undef TARGET_VECTOR_MODE_SUPPORTED_P
35397 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
35399 #undef TARGET_C_MODE_FOR_SUFFIX
35400 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
35402 #ifdef HAVE_AS_TLS
35403 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
35404 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
35405 #endif
35407 #ifdef SUBTARGET_INSERT_ATTRIBUTES
35408 #undef TARGET_INSERT_ATTRIBUTES
35409 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
35410 #endif
35412 #undef TARGET_MANGLE_TYPE
35413 #define TARGET_MANGLE_TYPE ix86_mangle_type
35415 #undef TARGET_STACK_PROTECT_FAIL
35416 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
35418 #undef TARGET_SUPPORTS_SPLIT_STACK
35419 #define TARGET_SUPPORTS_SPLIT_STACK ix86_supports_split_stack
35421 #undef TARGET_FUNCTION_VALUE
35422 #define TARGET_FUNCTION_VALUE ix86_function_value
35424 #undef TARGET_FUNCTION_VALUE_REGNO_P
35425 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
35427 #undef TARGET_SECONDARY_RELOAD
35428 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
35430 #undef TARGET_PREFERRED_RELOAD_CLASS
35431 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
35432 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
35433 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
35434 #undef TARGET_CLASS_LIKELY_SPILLED_P
35435 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
35437 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
35438 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
35439 ix86_builtin_vectorization_cost
35440 #undef TARGET_VECTORIZE_BUILTIN_VEC_PERM
35441 #define TARGET_VECTORIZE_BUILTIN_VEC_PERM \
35442 ix86_vectorize_builtin_vec_perm
35443 #undef TARGET_VECTORIZE_BUILTIN_VEC_PERM_OK
35444 #define TARGET_VECTORIZE_BUILTIN_VEC_PERM_OK \
35445 ix86_vectorize_builtin_vec_perm_ok
35446 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
35447 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
35448 ix86_preferred_simd_mode
35449 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
35450 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
35451 ix86_autovectorize_vector_sizes
35453 #undef TARGET_SET_CURRENT_FUNCTION
35454 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
35456 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
35457 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
35459 #undef TARGET_OPTION_SAVE
35460 #define TARGET_OPTION_SAVE ix86_function_specific_save
35462 #undef TARGET_OPTION_RESTORE
35463 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
35465 #undef TARGET_OPTION_PRINT
35466 #define TARGET_OPTION_PRINT ix86_function_specific_print
35468 #undef TARGET_CAN_INLINE_P
35469 #define TARGET_CAN_INLINE_P ix86_can_inline_p
35471 #undef TARGET_EXPAND_TO_RTL_HOOK
35472 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
35474 #undef TARGET_LEGITIMATE_ADDRESS_P
35475 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
35477 #undef TARGET_LEGITIMATE_CONSTANT_P
35478 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
35480 #undef TARGET_FRAME_POINTER_REQUIRED
35481 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
35483 #undef TARGET_CAN_ELIMINATE
35484 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
35486 #undef TARGET_EXTRA_LIVE_ON_ENTRY
35487 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
35489 #undef TARGET_ASM_CODE_END
35490 #define TARGET_ASM_CODE_END ix86_code_end
35492 #undef TARGET_CONDITIONAL_REGISTER_USAGE
35493 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
35495 #if TARGET_MACHO
35496 #undef TARGET_INIT_LIBFUNCS
35497 #define TARGET_INIT_LIBFUNCS darwin_rename_builtins
35498 #endif
35500 struct gcc_target targetm = TARGET_INITIALIZER;
35502 #include "gt-i386.h"