re PR tree-optimization/47237 (builtin_apply_args broken WRT local ABI changes.)
[official-gcc.git] / gcc / config / i386 / i386.c
blob4d927c2b259b7e3c2b5691f3791f3ec82b6a8657
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
3 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
4 Free Software Foundation, Inc.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
11 any later version.
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "tm_p.h"
29 #include "regs.h"
30 #include "hard-reg-set.h"
31 #include "insn-config.h"
32 #include "conditions.h"
33 #include "output.h"
34 #include "insn-codes.h"
35 #include "insn-attr.h"
36 #include "flags.h"
37 #include "except.h"
38 #include "function.h"
39 #include "recog.h"
40 #include "expr.h"
41 #include "optabs.h"
42 #include "diagnostic-core.h"
43 #include "toplev.h"
44 #include "basic-block.h"
45 #include "ggc.h"
46 #include "target.h"
47 #include "target-def.h"
48 #include "langhooks.h"
49 #include "cgraph.h"
50 #include "gimple.h"
51 #include "dwarf2.h"
52 #include "df.h"
53 #include "tm-constrs.h"
54 #include "params.h"
55 #include "cselib.h"
56 #include "debug.h"
57 #include "dwarf2out.h"
58 #include "sched-int.h"
59 #include "sbitmap.h"
60 #include "fibheap.h"
62 enum upper_128bits_state
64 unknown = 0,
65 unused,
66 used
69 typedef struct block_info_def
71 /* State of the upper 128bits of AVX registers at exit. */
72 enum upper_128bits_state state;
73 /* TRUE if state of the upper 128bits of AVX registers is unchanged
74 in this block. */
75 bool unchanged;
76 /* TRUE if block has been processed. */
77 bool processed;
78 /* TRUE if block has been scanned. */
79 bool scanned;
80 /* Previous state of the upper 128bits of AVX registers at entry. */
81 enum upper_128bits_state prev;
82 } *block_info;
84 #define BLOCK_INFO(B) ((block_info) (B)->aux)
86 enum call_avx256_state
88 /* Callee returns 256bit AVX register. */
89 callee_return_avx256 = -1,
90 /* Callee returns and passes 256bit AVX register. */
91 callee_return_pass_avx256,
92 /* Callee passes 256bit AVX register. */
93 callee_pass_avx256,
94 /* Callee doesn't return nor passe 256bit AVX register, or no
95 256bit AVX register in function return. */
96 call_no_avx256,
97 /* vzeroupper intrinsic. */
98 vzeroupper_intrinsic
101 /* Check if a 256bit AVX register is referenced in stores. */
103 static void
104 check_avx256_stores (rtx dest, const_rtx set, void *data)
106 if ((REG_P (dest)
107 && VALID_AVX256_REG_MODE (GET_MODE (dest)))
108 || (GET_CODE (set) == SET
109 && REG_P (SET_SRC (set))
110 && VALID_AVX256_REG_MODE (GET_MODE (SET_SRC (set)))))
112 enum upper_128bits_state *state
113 = (enum upper_128bits_state *) data;
114 *state = used;
118 /* Helper function for move_or_delete_vzeroupper_1. Look for vzeroupper
119 in basic block BB. Delete it if upper 128bit AVX registers are
120 unused. If it isn't deleted, move it to just before a jump insn.
122 STATE is state of the upper 128bits of AVX registers at entry. */
124 static void
125 move_or_delete_vzeroupper_2 (basic_block bb,
126 enum upper_128bits_state state)
128 rtx insn, bb_end;
129 rtx vzeroupper_insn = NULL_RTX;
130 rtx pat;
131 int avx256;
132 bool unchanged;
134 if (BLOCK_INFO (bb)->unchanged)
136 if (dump_file)
137 fprintf (dump_file, " [bb %i] unchanged: upper 128bits: %d\n",
138 bb->index, state);
140 BLOCK_INFO (bb)->state = state;
141 return;
144 if (BLOCK_INFO (bb)->scanned && BLOCK_INFO (bb)->prev == state)
146 if (dump_file)
147 fprintf (dump_file, " [bb %i] scanned: upper 128bits: %d\n",
148 bb->index, BLOCK_INFO (bb)->state);
149 return;
152 BLOCK_INFO (bb)->prev = state;
154 if (dump_file)
155 fprintf (dump_file, " [bb %i] entry: upper 128bits: %d\n",
156 bb->index, state);
158 unchanged = true;
160 /* BB_END changes when it is deleted. */
161 bb_end = BB_END (bb);
162 insn = BB_HEAD (bb);
163 while (insn != bb_end)
165 insn = NEXT_INSN (insn);
167 if (!NONDEBUG_INSN_P (insn))
168 continue;
170 /* Move vzeroupper before jump/call. */
171 if (JUMP_P (insn) || CALL_P (insn))
173 if (!vzeroupper_insn)
174 continue;
176 if (PREV_INSN (insn) != vzeroupper_insn)
178 if (dump_file)
180 fprintf (dump_file, "Move vzeroupper after:\n");
181 print_rtl_single (dump_file, PREV_INSN (insn));
182 fprintf (dump_file, "before:\n");
183 print_rtl_single (dump_file, insn);
185 reorder_insns_nobb (vzeroupper_insn, vzeroupper_insn,
186 PREV_INSN (insn));
188 vzeroupper_insn = NULL_RTX;
189 continue;
192 pat = PATTERN (insn);
194 /* Check insn for vzeroupper intrinsic. */
195 if (GET_CODE (pat) == UNSPEC_VOLATILE
196 && XINT (pat, 1) == UNSPECV_VZEROUPPER)
198 if (dump_file)
200 /* Found vzeroupper intrinsic. */
201 fprintf (dump_file, "Found vzeroupper:\n");
202 print_rtl_single (dump_file, insn);
205 else
207 /* Check insn for vzeroall intrinsic. */
208 if (GET_CODE (pat) == PARALLEL
209 && GET_CODE (XVECEXP (pat, 0, 0)) == UNSPEC_VOLATILE
210 && XINT (XVECEXP (pat, 0, 0), 1) == UNSPECV_VZEROALL)
212 state = unused;
213 unchanged = false;
215 /* Delete pending vzeroupper insertion. */
216 if (vzeroupper_insn)
218 delete_insn (vzeroupper_insn);
219 vzeroupper_insn = NULL_RTX;
222 else if (state != used)
224 note_stores (pat, check_avx256_stores, &state);
225 if (state == used)
226 unchanged = false;
228 continue;
231 /* Process vzeroupper intrinsic. */
232 avx256 = INTVAL (XVECEXP (pat, 0, 0));
234 if (state == unused)
236 /* Since the upper 128bits are cleared, callee must not pass
237 256bit AVX register. We only need to check if callee
238 returns 256bit AVX register. */
239 if (avx256 == callee_return_avx256)
241 state = used;
242 unchanged = false;
245 /* Remove unnecessary vzeroupper since upper 128bits are
246 cleared. */
247 if (dump_file)
249 fprintf (dump_file, "Delete redundant vzeroupper:\n");
250 print_rtl_single (dump_file, insn);
252 delete_insn (insn);
254 else
256 /* Set state to UNUSED if callee doesn't return 256bit AVX
257 register. */
258 if (avx256 != callee_return_pass_avx256)
259 state = unused;
261 if (avx256 == callee_return_pass_avx256
262 || avx256 == callee_pass_avx256)
264 /* Must remove vzeroupper since callee passes in 256bit
265 AVX register. */
266 if (dump_file)
268 fprintf (dump_file, "Delete callee pass vzeroupper:\n");
269 print_rtl_single (dump_file, insn);
271 delete_insn (insn);
273 else
275 vzeroupper_insn = insn;
276 unchanged = false;
281 BLOCK_INFO (bb)->state = state;
282 BLOCK_INFO (bb)->unchanged = unchanged;
283 BLOCK_INFO (bb)->scanned = true;
285 if (dump_file)
286 fprintf (dump_file, " [bb %i] exit: %s: upper 128bits: %d\n",
287 bb->index, unchanged ? "unchanged" : "changed",
288 state);
291 /* Helper function for move_or_delete_vzeroupper. Process vzeroupper
292 in BLOCK and check its predecessor blocks. Treat UNKNOWN state
293 as USED if UNKNOWN_IS_UNUSED is true. Return TRUE if the exit
294 state is changed. */
296 static bool
297 move_or_delete_vzeroupper_1 (basic_block block, bool unknown_is_unused)
299 edge e;
300 edge_iterator ei;
301 enum upper_128bits_state state, old_state, new_state;
302 bool seen_unknown;
304 if (dump_file)
305 fprintf (dump_file, " Process [bb %i]: status: %d\n",
306 block->index, BLOCK_INFO (block)->processed);
308 if (BLOCK_INFO (block)->processed)
309 return false;
311 state = unused;
313 /* Check all predecessor edges of this block. */
314 seen_unknown = false;
315 FOR_EACH_EDGE (e, ei, block->preds)
317 if (e->src == block)
318 continue;
319 switch (BLOCK_INFO (e->src)->state)
321 case unknown:
322 if (!unknown_is_unused)
323 seen_unknown = true;
324 case unused:
325 break;
326 case used:
327 state = used;
328 goto done;
332 if (seen_unknown)
333 state = unknown;
335 done:
336 old_state = BLOCK_INFO (block)->state;
337 move_or_delete_vzeroupper_2 (block, state);
338 new_state = BLOCK_INFO (block)->state;
340 if (state != unknown || new_state == used)
341 BLOCK_INFO (block)->processed = true;
343 /* Need to rescan if the upper 128bits of AVX registers are changed
344 to USED at exit. */
345 if (new_state != old_state)
347 if (new_state == used)
348 cfun->machine->rescan_vzeroupper_p = 1;
349 return true;
351 else
352 return false;
355 /* Go through the instruction stream looking for vzeroupper. Delete
356 it if upper 128bit AVX registers are unused. If it isn't deleted,
357 move it to just before a jump insn. */
359 static void
360 move_or_delete_vzeroupper (void)
362 edge e;
363 edge_iterator ei;
364 basic_block bb;
365 fibheap_t worklist, pending, fibheap_swap;
366 sbitmap visited, in_worklist, in_pending, sbitmap_swap;
367 int *bb_order;
368 int *rc_order;
369 int i;
371 /* Set up block info for each basic block. */
372 alloc_aux_for_blocks (sizeof (struct block_info_def));
374 /* Process outgoing edges of entry point. */
375 if (dump_file)
376 fprintf (dump_file, "Process outgoing edges of entry point\n");
378 FOR_EACH_EDGE (e, ei, ENTRY_BLOCK_PTR->succs)
380 move_or_delete_vzeroupper_2 (e->dest,
381 cfun->machine->caller_pass_avx256_p
382 ? used : unused);
383 BLOCK_INFO (e->dest)->processed = true;
386 /* Compute reverse completion order of depth first search of the CFG
387 so that the data-flow runs faster. */
388 rc_order = XNEWVEC (int, n_basic_blocks - NUM_FIXED_BLOCKS);
389 bb_order = XNEWVEC (int, last_basic_block);
390 pre_and_rev_post_order_compute (NULL, rc_order, false);
391 for (i = 0; i < n_basic_blocks - NUM_FIXED_BLOCKS; i++)
392 bb_order[rc_order[i]] = i;
393 free (rc_order);
395 worklist = fibheap_new ();
396 pending = fibheap_new ();
397 visited = sbitmap_alloc (last_basic_block);
398 in_worklist = sbitmap_alloc (last_basic_block);
399 in_pending = sbitmap_alloc (last_basic_block);
400 sbitmap_zero (in_worklist);
402 /* Don't check outgoing edges of entry point. */
403 sbitmap_ones (in_pending);
404 FOR_EACH_BB (bb)
405 if (BLOCK_INFO (bb)->processed)
406 RESET_BIT (in_pending, bb->index);
407 else
409 move_or_delete_vzeroupper_1 (bb, false);
410 fibheap_insert (pending, bb_order[bb->index], bb);
413 if (dump_file)
414 fprintf (dump_file, "Check remaining basic blocks\n");
416 while (!fibheap_empty (pending))
418 fibheap_swap = pending;
419 pending = worklist;
420 worklist = fibheap_swap;
421 sbitmap_swap = in_pending;
422 in_pending = in_worklist;
423 in_worklist = sbitmap_swap;
425 sbitmap_zero (visited);
427 cfun->machine->rescan_vzeroupper_p = 0;
429 while (!fibheap_empty (worklist))
431 bb = (basic_block) fibheap_extract_min (worklist);
432 RESET_BIT (in_worklist, bb->index);
433 gcc_assert (!TEST_BIT (visited, bb->index));
434 if (!TEST_BIT (visited, bb->index))
436 edge_iterator ei;
438 SET_BIT (visited, bb->index);
440 if (move_or_delete_vzeroupper_1 (bb, false))
441 FOR_EACH_EDGE (e, ei, bb->succs)
443 if (e->dest == EXIT_BLOCK_PTR
444 || BLOCK_INFO (e->dest)->processed)
445 continue;
447 if (TEST_BIT (visited, e->dest->index))
449 if (!TEST_BIT (in_pending, e->dest->index))
451 /* Send E->DEST to next round. */
452 SET_BIT (in_pending, e->dest->index);
453 fibheap_insert (pending,
454 bb_order[e->dest->index],
455 e->dest);
458 else if (!TEST_BIT (in_worklist, e->dest->index))
460 /* Add E->DEST to current round. */
461 SET_BIT (in_worklist, e->dest->index);
462 fibheap_insert (worklist, bb_order[e->dest->index],
463 e->dest);
469 if (!cfun->machine->rescan_vzeroupper_p)
470 break;
473 free (bb_order);
474 fibheap_delete (worklist);
475 fibheap_delete (pending);
476 sbitmap_free (visited);
477 sbitmap_free (in_worklist);
478 sbitmap_free (in_pending);
480 if (dump_file)
481 fprintf (dump_file, "Process remaining basic blocks\n");
483 FOR_EACH_BB (bb)
484 move_or_delete_vzeroupper_1 (bb, true);
486 free_aux_for_blocks ();
489 static rtx legitimize_dllimport_symbol (rtx, bool);
491 #ifndef CHECK_STACK_LIMIT
492 #define CHECK_STACK_LIMIT (-1)
493 #endif
495 /* Return index of given mode in mult and division cost tables. */
496 #define MODE_INDEX(mode) \
497 ((mode) == QImode ? 0 \
498 : (mode) == HImode ? 1 \
499 : (mode) == SImode ? 2 \
500 : (mode) == DImode ? 3 \
501 : 4)
503 /* Processor costs (relative to an add) */
504 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
505 #define COSTS_N_BYTES(N) ((N) * 2)
507 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
509 const
510 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
511 COSTS_N_BYTES (2), /* cost of an add instruction */
512 COSTS_N_BYTES (3), /* cost of a lea instruction */
513 COSTS_N_BYTES (2), /* variable shift costs */
514 COSTS_N_BYTES (3), /* constant shift costs */
515 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
516 COSTS_N_BYTES (3), /* HI */
517 COSTS_N_BYTES (3), /* SI */
518 COSTS_N_BYTES (3), /* DI */
519 COSTS_N_BYTES (5)}, /* other */
520 0, /* cost of multiply per each bit set */
521 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
522 COSTS_N_BYTES (3), /* HI */
523 COSTS_N_BYTES (3), /* SI */
524 COSTS_N_BYTES (3), /* DI */
525 COSTS_N_BYTES (5)}, /* other */
526 COSTS_N_BYTES (3), /* cost of movsx */
527 COSTS_N_BYTES (3), /* cost of movzx */
528 0, /* "large" insn */
529 2, /* MOVE_RATIO */
530 2, /* cost for loading QImode using movzbl */
531 {2, 2, 2}, /* cost of loading integer registers
532 in QImode, HImode and SImode.
533 Relative to reg-reg move (2). */
534 {2, 2, 2}, /* cost of storing integer registers */
535 2, /* cost of reg,reg fld/fst */
536 {2, 2, 2}, /* cost of loading fp registers
537 in SFmode, DFmode and XFmode */
538 {2, 2, 2}, /* cost of storing fp registers
539 in SFmode, DFmode and XFmode */
540 3, /* cost of moving MMX register */
541 {3, 3}, /* cost of loading MMX registers
542 in SImode and DImode */
543 {3, 3}, /* cost of storing MMX registers
544 in SImode and DImode */
545 3, /* cost of moving SSE register */
546 {3, 3, 3}, /* cost of loading SSE registers
547 in SImode, DImode and TImode */
548 {3, 3, 3}, /* cost of storing SSE registers
549 in SImode, DImode and TImode */
550 3, /* MMX or SSE register to integer */
551 0, /* size of l1 cache */
552 0, /* size of l2 cache */
553 0, /* size of prefetch block */
554 0, /* number of parallel prefetches */
555 2, /* Branch cost */
556 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
557 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
558 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
559 COSTS_N_BYTES (2), /* cost of FABS instruction. */
560 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
561 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
562 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
563 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
564 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
565 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
566 1, /* scalar_stmt_cost. */
567 1, /* scalar load_cost. */
568 1, /* scalar_store_cost. */
569 1, /* vec_stmt_cost. */
570 1, /* vec_to_scalar_cost. */
571 1, /* scalar_to_vec_cost. */
572 1, /* vec_align_load_cost. */
573 1, /* vec_unalign_load_cost. */
574 1, /* vec_store_cost. */
575 1, /* cond_taken_branch_cost. */
576 1, /* cond_not_taken_branch_cost. */
579 /* Processor costs (relative to an add) */
580 static const
581 struct processor_costs i386_cost = { /* 386 specific costs */
582 COSTS_N_INSNS (1), /* cost of an add instruction */
583 COSTS_N_INSNS (1), /* cost of a lea instruction */
584 COSTS_N_INSNS (3), /* variable shift costs */
585 COSTS_N_INSNS (2), /* constant shift costs */
586 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
587 COSTS_N_INSNS (6), /* HI */
588 COSTS_N_INSNS (6), /* SI */
589 COSTS_N_INSNS (6), /* DI */
590 COSTS_N_INSNS (6)}, /* other */
591 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
592 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
593 COSTS_N_INSNS (23), /* HI */
594 COSTS_N_INSNS (23), /* SI */
595 COSTS_N_INSNS (23), /* DI */
596 COSTS_N_INSNS (23)}, /* other */
597 COSTS_N_INSNS (3), /* cost of movsx */
598 COSTS_N_INSNS (2), /* cost of movzx */
599 15, /* "large" insn */
600 3, /* MOVE_RATIO */
601 4, /* cost for loading QImode using movzbl */
602 {2, 4, 2}, /* cost of loading integer registers
603 in QImode, HImode and SImode.
604 Relative to reg-reg move (2). */
605 {2, 4, 2}, /* cost of storing integer registers */
606 2, /* cost of reg,reg fld/fst */
607 {8, 8, 8}, /* cost of loading fp registers
608 in SFmode, DFmode and XFmode */
609 {8, 8, 8}, /* cost of storing fp registers
610 in SFmode, DFmode and XFmode */
611 2, /* cost of moving MMX register */
612 {4, 8}, /* cost of loading MMX registers
613 in SImode and DImode */
614 {4, 8}, /* cost of storing MMX registers
615 in SImode and DImode */
616 2, /* cost of moving SSE register */
617 {4, 8, 16}, /* cost of loading SSE registers
618 in SImode, DImode and TImode */
619 {4, 8, 16}, /* cost of storing SSE registers
620 in SImode, DImode and TImode */
621 3, /* MMX or SSE register to integer */
622 0, /* size of l1 cache */
623 0, /* size of l2 cache */
624 0, /* size of prefetch block */
625 0, /* number of parallel prefetches */
626 1, /* Branch cost */
627 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
628 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
629 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
630 COSTS_N_INSNS (22), /* cost of FABS instruction. */
631 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
632 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
633 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
634 DUMMY_STRINGOP_ALGS},
635 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
636 DUMMY_STRINGOP_ALGS},
637 1, /* scalar_stmt_cost. */
638 1, /* scalar load_cost. */
639 1, /* scalar_store_cost. */
640 1, /* vec_stmt_cost. */
641 1, /* vec_to_scalar_cost. */
642 1, /* scalar_to_vec_cost. */
643 1, /* vec_align_load_cost. */
644 2, /* vec_unalign_load_cost. */
645 1, /* vec_store_cost. */
646 3, /* cond_taken_branch_cost. */
647 1, /* cond_not_taken_branch_cost. */
650 static const
651 struct processor_costs i486_cost = { /* 486 specific costs */
652 COSTS_N_INSNS (1), /* cost of an add instruction */
653 COSTS_N_INSNS (1), /* cost of a lea instruction */
654 COSTS_N_INSNS (3), /* variable shift costs */
655 COSTS_N_INSNS (2), /* constant shift costs */
656 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
657 COSTS_N_INSNS (12), /* HI */
658 COSTS_N_INSNS (12), /* SI */
659 COSTS_N_INSNS (12), /* DI */
660 COSTS_N_INSNS (12)}, /* other */
661 1, /* cost of multiply per each bit set */
662 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
663 COSTS_N_INSNS (40), /* HI */
664 COSTS_N_INSNS (40), /* SI */
665 COSTS_N_INSNS (40), /* DI */
666 COSTS_N_INSNS (40)}, /* other */
667 COSTS_N_INSNS (3), /* cost of movsx */
668 COSTS_N_INSNS (2), /* cost of movzx */
669 15, /* "large" insn */
670 3, /* MOVE_RATIO */
671 4, /* cost for loading QImode using movzbl */
672 {2, 4, 2}, /* cost of loading integer registers
673 in QImode, HImode and SImode.
674 Relative to reg-reg move (2). */
675 {2, 4, 2}, /* cost of storing integer registers */
676 2, /* cost of reg,reg fld/fst */
677 {8, 8, 8}, /* cost of loading fp registers
678 in SFmode, DFmode and XFmode */
679 {8, 8, 8}, /* cost of storing fp registers
680 in SFmode, DFmode and XFmode */
681 2, /* cost of moving MMX register */
682 {4, 8}, /* cost of loading MMX registers
683 in SImode and DImode */
684 {4, 8}, /* cost of storing MMX registers
685 in SImode and DImode */
686 2, /* cost of moving SSE register */
687 {4, 8, 16}, /* cost of loading SSE registers
688 in SImode, DImode and TImode */
689 {4, 8, 16}, /* cost of storing SSE registers
690 in SImode, DImode and TImode */
691 3, /* MMX or SSE register to integer */
692 4, /* size of l1 cache. 486 has 8kB cache
693 shared for code and data, so 4kB is
694 not really precise. */
695 4, /* size of l2 cache */
696 0, /* size of prefetch block */
697 0, /* number of parallel prefetches */
698 1, /* Branch cost */
699 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
700 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
701 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
702 COSTS_N_INSNS (3), /* cost of FABS instruction. */
703 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
704 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
705 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
706 DUMMY_STRINGOP_ALGS},
707 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
708 DUMMY_STRINGOP_ALGS},
709 1, /* scalar_stmt_cost. */
710 1, /* scalar load_cost. */
711 1, /* scalar_store_cost. */
712 1, /* vec_stmt_cost. */
713 1, /* vec_to_scalar_cost. */
714 1, /* scalar_to_vec_cost. */
715 1, /* vec_align_load_cost. */
716 2, /* vec_unalign_load_cost. */
717 1, /* vec_store_cost. */
718 3, /* cond_taken_branch_cost. */
719 1, /* cond_not_taken_branch_cost. */
722 static const
723 struct processor_costs pentium_cost = {
724 COSTS_N_INSNS (1), /* cost of an add instruction */
725 COSTS_N_INSNS (1), /* cost of a lea instruction */
726 COSTS_N_INSNS (4), /* variable shift costs */
727 COSTS_N_INSNS (1), /* constant shift costs */
728 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
729 COSTS_N_INSNS (11), /* HI */
730 COSTS_N_INSNS (11), /* SI */
731 COSTS_N_INSNS (11), /* DI */
732 COSTS_N_INSNS (11)}, /* other */
733 0, /* cost of multiply per each bit set */
734 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
735 COSTS_N_INSNS (25), /* HI */
736 COSTS_N_INSNS (25), /* SI */
737 COSTS_N_INSNS (25), /* DI */
738 COSTS_N_INSNS (25)}, /* other */
739 COSTS_N_INSNS (3), /* cost of movsx */
740 COSTS_N_INSNS (2), /* cost of movzx */
741 8, /* "large" insn */
742 6, /* MOVE_RATIO */
743 6, /* cost for loading QImode using movzbl */
744 {2, 4, 2}, /* cost of loading integer registers
745 in QImode, HImode and SImode.
746 Relative to reg-reg move (2). */
747 {2, 4, 2}, /* cost of storing integer registers */
748 2, /* cost of reg,reg fld/fst */
749 {2, 2, 6}, /* cost of loading fp registers
750 in SFmode, DFmode and XFmode */
751 {4, 4, 6}, /* cost of storing fp registers
752 in SFmode, DFmode and XFmode */
753 8, /* cost of moving MMX register */
754 {8, 8}, /* cost of loading MMX registers
755 in SImode and DImode */
756 {8, 8}, /* cost of storing MMX registers
757 in SImode and DImode */
758 2, /* cost of moving SSE register */
759 {4, 8, 16}, /* cost of loading SSE registers
760 in SImode, DImode and TImode */
761 {4, 8, 16}, /* cost of storing SSE registers
762 in SImode, DImode and TImode */
763 3, /* MMX or SSE register to integer */
764 8, /* size of l1 cache. */
765 8, /* size of l2 cache */
766 0, /* size of prefetch block */
767 0, /* number of parallel prefetches */
768 2, /* Branch cost */
769 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
770 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
771 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
772 COSTS_N_INSNS (1), /* cost of FABS instruction. */
773 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
774 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
775 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
776 DUMMY_STRINGOP_ALGS},
777 {{libcall, {{-1, rep_prefix_4_byte}}},
778 DUMMY_STRINGOP_ALGS},
779 1, /* scalar_stmt_cost. */
780 1, /* scalar load_cost. */
781 1, /* scalar_store_cost. */
782 1, /* vec_stmt_cost. */
783 1, /* vec_to_scalar_cost. */
784 1, /* scalar_to_vec_cost. */
785 1, /* vec_align_load_cost. */
786 2, /* vec_unalign_load_cost. */
787 1, /* vec_store_cost. */
788 3, /* cond_taken_branch_cost. */
789 1, /* cond_not_taken_branch_cost. */
792 static const
793 struct processor_costs pentiumpro_cost = {
794 COSTS_N_INSNS (1), /* cost of an add instruction */
795 COSTS_N_INSNS (1), /* cost of a lea instruction */
796 COSTS_N_INSNS (1), /* variable shift costs */
797 COSTS_N_INSNS (1), /* constant shift costs */
798 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
799 COSTS_N_INSNS (4), /* HI */
800 COSTS_N_INSNS (4), /* SI */
801 COSTS_N_INSNS (4), /* DI */
802 COSTS_N_INSNS (4)}, /* other */
803 0, /* cost of multiply per each bit set */
804 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
805 COSTS_N_INSNS (17), /* HI */
806 COSTS_N_INSNS (17), /* SI */
807 COSTS_N_INSNS (17), /* DI */
808 COSTS_N_INSNS (17)}, /* other */
809 COSTS_N_INSNS (1), /* cost of movsx */
810 COSTS_N_INSNS (1), /* cost of movzx */
811 8, /* "large" insn */
812 6, /* MOVE_RATIO */
813 2, /* cost for loading QImode using movzbl */
814 {4, 4, 4}, /* cost of loading integer registers
815 in QImode, HImode and SImode.
816 Relative to reg-reg move (2). */
817 {2, 2, 2}, /* cost of storing integer registers */
818 2, /* cost of reg,reg fld/fst */
819 {2, 2, 6}, /* cost of loading fp registers
820 in SFmode, DFmode and XFmode */
821 {4, 4, 6}, /* cost of storing fp registers
822 in SFmode, DFmode and XFmode */
823 2, /* cost of moving MMX register */
824 {2, 2}, /* cost of loading MMX registers
825 in SImode and DImode */
826 {2, 2}, /* cost of storing MMX registers
827 in SImode and DImode */
828 2, /* cost of moving SSE register */
829 {2, 2, 8}, /* cost of loading SSE registers
830 in SImode, DImode and TImode */
831 {2, 2, 8}, /* cost of storing SSE registers
832 in SImode, DImode and TImode */
833 3, /* MMX or SSE register to integer */
834 8, /* size of l1 cache. */
835 256, /* size of l2 cache */
836 32, /* size of prefetch block */
837 6, /* number of parallel prefetches */
838 2, /* Branch cost */
839 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
840 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
841 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
842 COSTS_N_INSNS (2), /* cost of FABS instruction. */
843 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
844 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
845 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
846 (we ensure the alignment). For small blocks inline loop is still a
847 noticeable win, for bigger blocks either rep movsl or rep movsb is
848 way to go. Rep movsb has apparently more expensive startup time in CPU,
849 but after 4K the difference is down in the noise. */
850 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
851 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
852 DUMMY_STRINGOP_ALGS},
853 {{rep_prefix_4_byte, {{1024, unrolled_loop},
854 {8192, rep_prefix_4_byte}, {-1, libcall}}},
855 DUMMY_STRINGOP_ALGS},
856 1, /* scalar_stmt_cost. */
857 1, /* scalar load_cost. */
858 1, /* scalar_store_cost. */
859 1, /* vec_stmt_cost. */
860 1, /* vec_to_scalar_cost. */
861 1, /* scalar_to_vec_cost. */
862 1, /* vec_align_load_cost. */
863 2, /* vec_unalign_load_cost. */
864 1, /* vec_store_cost. */
865 3, /* cond_taken_branch_cost. */
866 1, /* cond_not_taken_branch_cost. */
869 static const
870 struct processor_costs geode_cost = {
871 COSTS_N_INSNS (1), /* cost of an add instruction */
872 COSTS_N_INSNS (1), /* cost of a lea instruction */
873 COSTS_N_INSNS (2), /* variable shift costs */
874 COSTS_N_INSNS (1), /* constant shift costs */
875 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
876 COSTS_N_INSNS (4), /* HI */
877 COSTS_N_INSNS (7), /* SI */
878 COSTS_N_INSNS (7), /* DI */
879 COSTS_N_INSNS (7)}, /* other */
880 0, /* cost of multiply per each bit set */
881 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
882 COSTS_N_INSNS (23), /* HI */
883 COSTS_N_INSNS (39), /* SI */
884 COSTS_N_INSNS (39), /* DI */
885 COSTS_N_INSNS (39)}, /* other */
886 COSTS_N_INSNS (1), /* cost of movsx */
887 COSTS_N_INSNS (1), /* cost of movzx */
888 8, /* "large" insn */
889 4, /* MOVE_RATIO */
890 1, /* cost for loading QImode using movzbl */
891 {1, 1, 1}, /* cost of loading integer registers
892 in QImode, HImode and SImode.
893 Relative to reg-reg move (2). */
894 {1, 1, 1}, /* cost of storing integer registers */
895 1, /* cost of reg,reg fld/fst */
896 {1, 1, 1}, /* cost of loading fp registers
897 in SFmode, DFmode and XFmode */
898 {4, 6, 6}, /* cost of storing fp registers
899 in SFmode, DFmode and XFmode */
901 1, /* cost of moving MMX register */
902 {1, 1}, /* cost of loading MMX registers
903 in SImode and DImode */
904 {1, 1}, /* cost of storing MMX registers
905 in SImode and DImode */
906 1, /* cost of moving SSE register */
907 {1, 1, 1}, /* cost of loading SSE registers
908 in SImode, DImode and TImode */
909 {1, 1, 1}, /* cost of storing SSE registers
910 in SImode, DImode and TImode */
911 1, /* MMX or SSE register to integer */
912 64, /* size of l1 cache. */
913 128, /* size of l2 cache. */
914 32, /* size of prefetch block */
915 1, /* number of parallel prefetches */
916 1, /* Branch cost */
917 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
918 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
919 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
920 COSTS_N_INSNS (1), /* cost of FABS instruction. */
921 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
922 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
923 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
924 DUMMY_STRINGOP_ALGS},
925 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
926 DUMMY_STRINGOP_ALGS},
927 1, /* scalar_stmt_cost. */
928 1, /* scalar load_cost. */
929 1, /* scalar_store_cost. */
930 1, /* vec_stmt_cost. */
931 1, /* vec_to_scalar_cost. */
932 1, /* scalar_to_vec_cost. */
933 1, /* vec_align_load_cost. */
934 2, /* vec_unalign_load_cost. */
935 1, /* vec_store_cost. */
936 3, /* cond_taken_branch_cost. */
937 1, /* cond_not_taken_branch_cost. */
940 static const
941 struct processor_costs k6_cost = {
942 COSTS_N_INSNS (1), /* cost of an add instruction */
943 COSTS_N_INSNS (2), /* cost of a lea instruction */
944 COSTS_N_INSNS (1), /* variable shift costs */
945 COSTS_N_INSNS (1), /* constant shift costs */
946 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
947 COSTS_N_INSNS (3), /* HI */
948 COSTS_N_INSNS (3), /* SI */
949 COSTS_N_INSNS (3), /* DI */
950 COSTS_N_INSNS (3)}, /* other */
951 0, /* cost of multiply per each bit set */
952 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
953 COSTS_N_INSNS (18), /* HI */
954 COSTS_N_INSNS (18), /* SI */
955 COSTS_N_INSNS (18), /* DI */
956 COSTS_N_INSNS (18)}, /* other */
957 COSTS_N_INSNS (2), /* cost of movsx */
958 COSTS_N_INSNS (2), /* cost of movzx */
959 8, /* "large" insn */
960 4, /* MOVE_RATIO */
961 3, /* cost for loading QImode using movzbl */
962 {4, 5, 4}, /* cost of loading integer registers
963 in QImode, HImode and SImode.
964 Relative to reg-reg move (2). */
965 {2, 3, 2}, /* cost of storing integer registers */
966 4, /* cost of reg,reg fld/fst */
967 {6, 6, 6}, /* cost of loading fp registers
968 in SFmode, DFmode and XFmode */
969 {4, 4, 4}, /* cost of storing fp registers
970 in SFmode, DFmode and XFmode */
971 2, /* cost of moving MMX register */
972 {2, 2}, /* cost of loading MMX registers
973 in SImode and DImode */
974 {2, 2}, /* cost of storing MMX registers
975 in SImode and DImode */
976 2, /* cost of moving SSE register */
977 {2, 2, 8}, /* cost of loading SSE registers
978 in SImode, DImode and TImode */
979 {2, 2, 8}, /* cost of storing SSE registers
980 in SImode, DImode and TImode */
981 6, /* MMX or SSE register to integer */
982 32, /* size of l1 cache. */
983 32, /* size of l2 cache. Some models
984 have integrated l2 cache, but
985 optimizing for k6 is not important
986 enough to worry about that. */
987 32, /* size of prefetch block */
988 1, /* number of parallel prefetches */
989 1, /* Branch cost */
990 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
991 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
992 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
993 COSTS_N_INSNS (2), /* cost of FABS instruction. */
994 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
995 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
996 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
997 DUMMY_STRINGOP_ALGS},
998 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
999 DUMMY_STRINGOP_ALGS},
1000 1, /* scalar_stmt_cost. */
1001 1, /* scalar load_cost. */
1002 1, /* scalar_store_cost. */
1003 1, /* vec_stmt_cost. */
1004 1, /* vec_to_scalar_cost. */
1005 1, /* scalar_to_vec_cost. */
1006 1, /* vec_align_load_cost. */
1007 2, /* vec_unalign_load_cost. */
1008 1, /* vec_store_cost. */
1009 3, /* cond_taken_branch_cost. */
1010 1, /* cond_not_taken_branch_cost. */
1013 static const
1014 struct processor_costs athlon_cost = {
1015 COSTS_N_INSNS (1), /* cost of an add instruction */
1016 COSTS_N_INSNS (2), /* cost of a lea instruction */
1017 COSTS_N_INSNS (1), /* variable shift costs */
1018 COSTS_N_INSNS (1), /* constant shift costs */
1019 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
1020 COSTS_N_INSNS (5), /* HI */
1021 COSTS_N_INSNS (5), /* SI */
1022 COSTS_N_INSNS (5), /* DI */
1023 COSTS_N_INSNS (5)}, /* other */
1024 0, /* cost of multiply per each bit set */
1025 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1026 COSTS_N_INSNS (26), /* HI */
1027 COSTS_N_INSNS (42), /* SI */
1028 COSTS_N_INSNS (74), /* DI */
1029 COSTS_N_INSNS (74)}, /* other */
1030 COSTS_N_INSNS (1), /* cost of movsx */
1031 COSTS_N_INSNS (1), /* cost of movzx */
1032 8, /* "large" insn */
1033 9, /* MOVE_RATIO */
1034 4, /* cost for loading QImode using movzbl */
1035 {3, 4, 3}, /* cost of loading integer registers
1036 in QImode, HImode and SImode.
1037 Relative to reg-reg move (2). */
1038 {3, 4, 3}, /* cost of storing integer registers */
1039 4, /* cost of reg,reg fld/fst */
1040 {4, 4, 12}, /* cost of loading fp registers
1041 in SFmode, DFmode and XFmode */
1042 {6, 6, 8}, /* cost of storing fp registers
1043 in SFmode, DFmode and XFmode */
1044 2, /* cost of moving MMX register */
1045 {4, 4}, /* cost of loading MMX registers
1046 in SImode and DImode */
1047 {4, 4}, /* cost of storing MMX registers
1048 in SImode and DImode */
1049 2, /* cost of moving SSE register */
1050 {4, 4, 6}, /* cost of loading SSE registers
1051 in SImode, DImode and TImode */
1052 {4, 4, 5}, /* cost of storing SSE registers
1053 in SImode, DImode and TImode */
1054 5, /* MMX or SSE register to integer */
1055 64, /* size of l1 cache. */
1056 256, /* size of l2 cache. */
1057 64, /* size of prefetch block */
1058 6, /* number of parallel prefetches */
1059 5, /* Branch cost */
1060 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1061 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1062 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
1063 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1064 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1065 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1066 /* For some reason, Athlon deals better with REP prefix (relative to loops)
1067 compared to K8. Alignment becomes important after 8 bytes for memcpy and
1068 128 bytes for memset. */
1069 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
1070 DUMMY_STRINGOP_ALGS},
1071 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
1072 DUMMY_STRINGOP_ALGS},
1073 1, /* scalar_stmt_cost. */
1074 1, /* scalar load_cost. */
1075 1, /* scalar_store_cost. */
1076 1, /* vec_stmt_cost. */
1077 1, /* vec_to_scalar_cost. */
1078 1, /* scalar_to_vec_cost. */
1079 1, /* vec_align_load_cost. */
1080 2, /* vec_unalign_load_cost. */
1081 1, /* vec_store_cost. */
1082 3, /* cond_taken_branch_cost. */
1083 1, /* cond_not_taken_branch_cost. */
1086 static const
1087 struct processor_costs k8_cost = {
1088 COSTS_N_INSNS (1), /* cost of an add instruction */
1089 COSTS_N_INSNS (2), /* cost of a lea instruction */
1090 COSTS_N_INSNS (1), /* variable shift costs */
1091 COSTS_N_INSNS (1), /* constant shift costs */
1092 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1093 COSTS_N_INSNS (4), /* HI */
1094 COSTS_N_INSNS (3), /* SI */
1095 COSTS_N_INSNS (4), /* DI */
1096 COSTS_N_INSNS (5)}, /* other */
1097 0, /* cost of multiply per each bit set */
1098 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1099 COSTS_N_INSNS (26), /* HI */
1100 COSTS_N_INSNS (42), /* SI */
1101 COSTS_N_INSNS (74), /* DI */
1102 COSTS_N_INSNS (74)}, /* other */
1103 COSTS_N_INSNS (1), /* cost of movsx */
1104 COSTS_N_INSNS (1), /* cost of movzx */
1105 8, /* "large" insn */
1106 9, /* MOVE_RATIO */
1107 4, /* cost for loading QImode using movzbl */
1108 {3, 4, 3}, /* cost of loading integer registers
1109 in QImode, HImode and SImode.
1110 Relative to reg-reg move (2). */
1111 {3, 4, 3}, /* cost of storing integer registers */
1112 4, /* cost of reg,reg fld/fst */
1113 {4, 4, 12}, /* cost of loading fp registers
1114 in SFmode, DFmode and XFmode */
1115 {6, 6, 8}, /* cost of storing fp registers
1116 in SFmode, DFmode and XFmode */
1117 2, /* cost of moving MMX register */
1118 {3, 3}, /* cost of loading MMX registers
1119 in SImode and DImode */
1120 {4, 4}, /* cost of storing MMX registers
1121 in SImode and DImode */
1122 2, /* cost of moving SSE register */
1123 {4, 3, 6}, /* cost of loading SSE registers
1124 in SImode, DImode and TImode */
1125 {4, 4, 5}, /* cost of storing SSE registers
1126 in SImode, DImode and TImode */
1127 5, /* MMX or SSE register to integer */
1128 64, /* size of l1 cache. */
1129 512, /* size of l2 cache. */
1130 64, /* size of prefetch block */
1131 /* New AMD processors never drop prefetches; if they cannot be performed
1132 immediately, they are queued. We set number of simultaneous prefetches
1133 to a large constant to reflect this (it probably is not a good idea not
1134 to limit number of prefetches at all, as their execution also takes some
1135 time). */
1136 100, /* number of parallel prefetches */
1137 3, /* Branch cost */
1138 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1139 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1140 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1141 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1142 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1143 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1144 /* K8 has optimized REP instruction for medium sized blocks, but for very
1145 small blocks it is better to use loop. For large blocks, libcall can
1146 do nontemporary accesses and beat inline considerably. */
1147 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1148 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1149 {{libcall, {{8, loop}, {24, unrolled_loop},
1150 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1151 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1152 4, /* scalar_stmt_cost. */
1153 2, /* scalar load_cost. */
1154 2, /* scalar_store_cost. */
1155 5, /* vec_stmt_cost. */
1156 0, /* vec_to_scalar_cost. */
1157 2, /* scalar_to_vec_cost. */
1158 2, /* vec_align_load_cost. */
1159 3, /* vec_unalign_load_cost. */
1160 3, /* vec_store_cost. */
1161 3, /* cond_taken_branch_cost. */
1162 2, /* cond_not_taken_branch_cost. */
1165 struct processor_costs amdfam10_cost = {
1166 COSTS_N_INSNS (1), /* cost of an add instruction */
1167 COSTS_N_INSNS (2), /* cost of a lea instruction */
1168 COSTS_N_INSNS (1), /* variable shift costs */
1169 COSTS_N_INSNS (1), /* constant shift costs */
1170 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1171 COSTS_N_INSNS (4), /* HI */
1172 COSTS_N_INSNS (3), /* SI */
1173 COSTS_N_INSNS (4), /* DI */
1174 COSTS_N_INSNS (5)}, /* other */
1175 0, /* cost of multiply per each bit set */
1176 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1177 COSTS_N_INSNS (35), /* HI */
1178 COSTS_N_INSNS (51), /* SI */
1179 COSTS_N_INSNS (83), /* DI */
1180 COSTS_N_INSNS (83)}, /* other */
1181 COSTS_N_INSNS (1), /* cost of movsx */
1182 COSTS_N_INSNS (1), /* cost of movzx */
1183 8, /* "large" insn */
1184 9, /* MOVE_RATIO */
1185 4, /* cost for loading QImode using movzbl */
1186 {3, 4, 3}, /* cost of loading integer registers
1187 in QImode, HImode and SImode.
1188 Relative to reg-reg move (2). */
1189 {3, 4, 3}, /* cost of storing integer registers */
1190 4, /* cost of reg,reg fld/fst */
1191 {4, 4, 12}, /* cost of loading fp registers
1192 in SFmode, DFmode and XFmode */
1193 {6, 6, 8}, /* cost of storing fp registers
1194 in SFmode, DFmode and XFmode */
1195 2, /* cost of moving MMX register */
1196 {3, 3}, /* cost of loading MMX registers
1197 in SImode and DImode */
1198 {4, 4}, /* cost of storing MMX registers
1199 in SImode and DImode */
1200 2, /* cost of moving SSE register */
1201 {4, 4, 3}, /* cost of loading SSE registers
1202 in SImode, DImode and TImode */
1203 {4, 4, 5}, /* cost of storing SSE registers
1204 in SImode, DImode and TImode */
1205 3, /* MMX or SSE register to integer */
1206 /* On K8:
1207 MOVD reg64, xmmreg Double FSTORE 4
1208 MOVD reg32, xmmreg Double FSTORE 4
1209 On AMDFAM10:
1210 MOVD reg64, xmmreg Double FADD 3
1211 1/1 1/1
1212 MOVD reg32, xmmreg Double FADD 3
1213 1/1 1/1 */
1214 64, /* size of l1 cache. */
1215 512, /* size of l2 cache. */
1216 64, /* size of prefetch block */
1217 /* New AMD processors never drop prefetches; if they cannot be performed
1218 immediately, they are queued. We set number of simultaneous prefetches
1219 to a large constant to reflect this (it probably is not a good idea not
1220 to limit number of prefetches at all, as their execution also takes some
1221 time). */
1222 100, /* number of parallel prefetches */
1223 2, /* Branch cost */
1224 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1225 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1226 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1227 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1228 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1229 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1231 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
1232 very small blocks it is better to use loop. For large blocks, libcall can
1233 do nontemporary accesses and beat inline considerably. */
1234 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1235 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1236 {{libcall, {{8, loop}, {24, unrolled_loop},
1237 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1238 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1239 4, /* scalar_stmt_cost. */
1240 2, /* scalar load_cost. */
1241 2, /* scalar_store_cost. */
1242 6, /* vec_stmt_cost. */
1243 0, /* vec_to_scalar_cost. */
1244 2, /* scalar_to_vec_cost. */
1245 2, /* vec_align_load_cost. */
1246 2, /* vec_unalign_load_cost. */
1247 2, /* vec_store_cost. */
1248 2, /* cond_taken_branch_cost. */
1249 1, /* cond_not_taken_branch_cost. */
1252 struct processor_costs bdver1_cost = {
1253 COSTS_N_INSNS (1), /* cost of an add instruction */
1254 COSTS_N_INSNS (1), /* cost of a lea instruction */
1255 COSTS_N_INSNS (1), /* variable shift costs */
1256 COSTS_N_INSNS (1), /* constant shift costs */
1257 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1258 COSTS_N_INSNS (4), /* HI */
1259 COSTS_N_INSNS (4), /* SI */
1260 COSTS_N_INSNS (6), /* DI */
1261 COSTS_N_INSNS (6)}, /* other */
1262 0, /* cost of multiply per each bit set */
1263 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1264 COSTS_N_INSNS (35), /* HI */
1265 COSTS_N_INSNS (51), /* SI */
1266 COSTS_N_INSNS (83), /* DI */
1267 COSTS_N_INSNS (83)}, /* other */
1268 COSTS_N_INSNS (1), /* cost of movsx */
1269 COSTS_N_INSNS (1), /* cost of movzx */
1270 8, /* "large" insn */
1271 9, /* MOVE_RATIO */
1272 4, /* cost for loading QImode using movzbl */
1273 {5, 5, 4}, /* cost of loading integer registers
1274 in QImode, HImode and SImode.
1275 Relative to reg-reg move (2). */
1276 {4, 4, 4}, /* cost of storing integer registers */
1277 2, /* cost of reg,reg fld/fst */
1278 {5, 5, 12}, /* cost of loading fp registers
1279 in SFmode, DFmode and XFmode */
1280 {4, 4, 8}, /* cost of storing fp registers
1281 in SFmode, DFmode and XFmode */
1282 2, /* cost of moving MMX register */
1283 {4, 4}, /* cost of loading MMX registers
1284 in SImode and DImode */
1285 {4, 4}, /* cost of storing MMX registers
1286 in SImode and DImode */
1287 2, /* cost of moving SSE register */
1288 {4, 4, 4}, /* cost of loading SSE registers
1289 in SImode, DImode and TImode */
1290 {4, 4, 4}, /* cost of storing SSE registers
1291 in SImode, DImode and TImode */
1292 2, /* MMX or SSE register to integer */
1293 /* On K8:
1294 MOVD reg64, xmmreg Double FSTORE 4
1295 MOVD reg32, xmmreg Double FSTORE 4
1296 On AMDFAM10:
1297 MOVD reg64, xmmreg Double FADD 3
1298 1/1 1/1
1299 MOVD reg32, xmmreg Double FADD 3
1300 1/1 1/1 */
1301 16, /* size of l1 cache. */
1302 2048, /* size of l2 cache. */
1303 64, /* size of prefetch block */
1304 /* New AMD processors never drop prefetches; if they cannot be performed
1305 immediately, they are queued. We set number of simultaneous prefetches
1306 to a large constant to reflect this (it probably is not a good idea not
1307 to limit number of prefetches at all, as their execution also takes some
1308 time). */
1309 100, /* number of parallel prefetches */
1310 2, /* Branch cost */
1311 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1312 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1313 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1314 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1315 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1316 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1318 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
1319 very small blocks it is better to use loop. For large blocks, libcall
1320 can do nontemporary accesses and beat inline considerably. */
1321 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1322 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1323 {{libcall, {{8, loop}, {24, unrolled_loop},
1324 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1325 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1326 6, /* scalar_stmt_cost. */
1327 4, /* scalar load_cost. */
1328 4, /* scalar_store_cost. */
1329 6, /* vec_stmt_cost. */
1330 0, /* vec_to_scalar_cost. */
1331 2, /* scalar_to_vec_cost. */
1332 4, /* vec_align_load_cost. */
1333 4, /* vec_unalign_load_cost. */
1334 4, /* vec_store_cost. */
1335 2, /* cond_taken_branch_cost. */
1336 1, /* cond_not_taken_branch_cost. */
1339 struct processor_costs btver1_cost = {
1340 COSTS_N_INSNS (1), /* cost of an add instruction */
1341 COSTS_N_INSNS (2), /* cost of a lea instruction */
1342 COSTS_N_INSNS (1), /* variable shift costs */
1343 COSTS_N_INSNS (1), /* constant shift costs */
1344 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1345 COSTS_N_INSNS (4), /* HI */
1346 COSTS_N_INSNS (3), /* SI */
1347 COSTS_N_INSNS (4), /* DI */
1348 COSTS_N_INSNS (5)}, /* other */
1349 0, /* cost of multiply per each bit set */
1350 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1351 COSTS_N_INSNS (35), /* HI */
1352 COSTS_N_INSNS (51), /* SI */
1353 COSTS_N_INSNS (83), /* DI */
1354 COSTS_N_INSNS (83)}, /* other */
1355 COSTS_N_INSNS (1), /* cost of movsx */
1356 COSTS_N_INSNS (1), /* cost of movzx */
1357 8, /* "large" insn */
1358 9, /* MOVE_RATIO */
1359 4, /* cost for loading QImode using movzbl */
1360 {3, 4, 3}, /* cost of loading integer registers
1361 in QImode, HImode and SImode.
1362 Relative to reg-reg move (2). */
1363 {3, 4, 3}, /* cost of storing integer registers */
1364 4, /* cost of reg,reg fld/fst */
1365 {4, 4, 12}, /* cost of loading fp registers
1366 in SFmode, DFmode and XFmode */
1367 {6, 6, 8}, /* cost of storing fp registers
1368 in SFmode, DFmode and XFmode */
1369 2, /* cost of moving MMX register */
1370 {3, 3}, /* cost of loading MMX registers
1371 in SImode and DImode */
1372 {4, 4}, /* cost of storing MMX registers
1373 in SImode and DImode */
1374 2, /* cost of moving SSE register */
1375 {4, 4, 3}, /* cost of loading SSE registers
1376 in SImode, DImode and TImode */
1377 {4, 4, 5}, /* cost of storing SSE registers
1378 in SImode, DImode and TImode */
1379 3, /* MMX or SSE register to integer */
1380 /* On K8:
1381 MOVD reg64, xmmreg Double FSTORE 4
1382 MOVD reg32, xmmreg Double FSTORE 4
1383 On AMDFAM10:
1384 MOVD reg64, xmmreg Double FADD 3
1385 1/1 1/1
1386 MOVD reg32, xmmreg Double FADD 3
1387 1/1 1/1 */
1388 32, /* size of l1 cache. */
1389 512, /* size of l2 cache. */
1390 64, /* size of prefetch block */
1391 100, /* number of parallel prefetches */
1392 2, /* Branch cost */
1393 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1394 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1395 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1396 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1397 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1398 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1400 /* BTVER1 has optimized REP instruction for medium sized blocks, but for
1401 very small blocks it is better to use loop. For large blocks, libcall can
1402 do nontemporary accesses and beat inline considerably. */
1403 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1404 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1405 {{libcall, {{8, loop}, {24, unrolled_loop},
1406 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1407 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1408 4, /* scalar_stmt_cost. */
1409 2, /* scalar load_cost. */
1410 2, /* scalar_store_cost. */
1411 6, /* vec_stmt_cost. */
1412 0, /* vec_to_scalar_cost. */
1413 2, /* scalar_to_vec_cost. */
1414 2, /* vec_align_load_cost. */
1415 2, /* vec_unalign_load_cost. */
1416 2, /* vec_store_cost. */
1417 2, /* cond_taken_branch_cost. */
1418 1, /* cond_not_taken_branch_cost. */
1421 static const
1422 struct processor_costs pentium4_cost = {
1423 COSTS_N_INSNS (1), /* cost of an add instruction */
1424 COSTS_N_INSNS (3), /* cost of a lea instruction */
1425 COSTS_N_INSNS (4), /* variable shift costs */
1426 COSTS_N_INSNS (4), /* constant shift costs */
1427 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
1428 COSTS_N_INSNS (15), /* HI */
1429 COSTS_N_INSNS (15), /* SI */
1430 COSTS_N_INSNS (15), /* DI */
1431 COSTS_N_INSNS (15)}, /* other */
1432 0, /* cost of multiply per each bit set */
1433 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
1434 COSTS_N_INSNS (56), /* HI */
1435 COSTS_N_INSNS (56), /* SI */
1436 COSTS_N_INSNS (56), /* DI */
1437 COSTS_N_INSNS (56)}, /* other */
1438 COSTS_N_INSNS (1), /* cost of movsx */
1439 COSTS_N_INSNS (1), /* cost of movzx */
1440 16, /* "large" insn */
1441 6, /* MOVE_RATIO */
1442 2, /* cost for loading QImode using movzbl */
1443 {4, 5, 4}, /* cost of loading integer registers
1444 in QImode, HImode and SImode.
1445 Relative to reg-reg move (2). */
1446 {2, 3, 2}, /* cost of storing integer registers */
1447 2, /* cost of reg,reg fld/fst */
1448 {2, 2, 6}, /* cost of loading fp registers
1449 in SFmode, DFmode and XFmode */
1450 {4, 4, 6}, /* cost of storing fp registers
1451 in SFmode, DFmode and XFmode */
1452 2, /* cost of moving MMX register */
1453 {2, 2}, /* cost of loading MMX registers
1454 in SImode and DImode */
1455 {2, 2}, /* cost of storing MMX registers
1456 in SImode and DImode */
1457 12, /* cost of moving SSE register */
1458 {12, 12, 12}, /* cost of loading SSE registers
1459 in SImode, DImode and TImode */
1460 {2, 2, 8}, /* cost of storing SSE registers
1461 in SImode, DImode and TImode */
1462 10, /* MMX or SSE register to integer */
1463 8, /* size of l1 cache. */
1464 256, /* size of l2 cache. */
1465 64, /* size of prefetch block */
1466 6, /* number of parallel prefetches */
1467 2, /* Branch cost */
1468 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1469 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
1470 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
1471 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1472 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1473 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
1474 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
1475 DUMMY_STRINGOP_ALGS},
1476 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
1477 {-1, libcall}}},
1478 DUMMY_STRINGOP_ALGS},
1479 1, /* scalar_stmt_cost. */
1480 1, /* scalar load_cost. */
1481 1, /* scalar_store_cost. */
1482 1, /* vec_stmt_cost. */
1483 1, /* vec_to_scalar_cost. */
1484 1, /* scalar_to_vec_cost. */
1485 1, /* vec_align_load_cost. */
1486 2, /* vec_unalign_load_cost. */
1487 1, /* vec_store_cost. */
1488 3, /* cond_taken_branch_cost. */
1489 1, /* cond_not_taken_branch_cost. */
1492 static const
1493 struct processor_costs nocona_cost = {
1494 COSTS_N_INSNS (1), /* cost of an add instruction */
1495 COSTS_N_INSNS (1), /* cost of a lea instruction */
1496 COSTS_N_INSNS (1), /* variable shift costs */
1497 COSTS_N_INSNS (1), /* constant shift costs */
1498 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
1499 COSTS_N_INSNS (10), /* HI */
1500 COSTS_N_INSNS (10), /* SI */
1501 COSTS_N_INSNS (10), /* DI */
1502 COSTS_N_INSNS (10)}, /* other */
1503 0, /* cost of multiply per each bit set */
1504 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
1505 COSTS_N_INSNS (66), /* HI */
1506 COSTS_N_INSNS (66), /* SI */
1507 COSTS_N_INSNS (66), /* DI */
1508 COSTS_N_INSNS (66)}, /* other */
1509 COSTS_N_INSNS (1), /* cost of movsx */
1510 COSTS_N_INSNS (1), /* cost of movzx */
1511 16, /* "large" insn */
1512 17, /* MOVE_RATIO */
1513 4, /* cost for loading QImode using movzbl */
1514 {4, 4, 4}, /* cost of loading integer registers
1515 in QImode, HImode and SImode.
1516 Relative to reg-reg move (2). */
1517 {4, 4, 4}, /* cost of storing integer registers */
1518 3, /* cost of reg,reg fld/fst */
1519 {12, 12, 12}, /* cost of loading fp registers
1520 in SFmode, DFmode and XFmode */
1521 {4, 4, 4}, /* cost of storing fp registers
1522 in SFmode, DFmode and XFmode */
1523 6, /* cost of moving MMX register */
1524 {12, 12}, /* cost of loading MMX registers
1525 in SImode and DImode */
1526 {12, 12}, /* cost of storing MMX registers
1527 in SImode and DImode */
1528 6, /* cost of moving SSE register */
1529 {12, 12, 12}, /* cost of loading SSE registers
1530 in SImode, DImode and TImode */
1531 {12, 12, 12}, /* cost of storing SSE registers
1532 in SImode, DImode and TImode */
1533 8, /* MMX or SSE register to integer */
1534 8, /* size of l1 cache. */
1535 1024, /* size of l2 cache. */
1536 128, /* size of prefetch block */
1537 8, /* number of parallel prefetches */
1538 1, /* Branch cost */
1539 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1540 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1541 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1542 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1543 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1544 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1545 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
1546 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
1547 {100000, unrolled_loop}, {-1, libcall}}}},
1548 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
1549 {-1, libcall}}},
1550 {libcall, {{24, loop}, {64, unrolled_loop},
1551 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1552 1, /* scalar_stmt_cost. */
1553 1, /* scalar load_cost. */
1554 1, /* scalar_store_cost. */
1555 1, /* vec_stmt_cost. */
1556 1, /* vec_to_scalar_cost. */
1557 1, /* scalar_to_vec_cost. */
1558 1, /* vec_align_load_cost. */
1559 2, /* vec_unalign_load_cost. */
1560 1, /* vec_store_cost. */
1561 3, /* cond_taken_branch_cost. */
1562 1, /* cond_not_taken_branch_cost. */
1565 static const
1566 struct processor_costs atom_cost = {
1567 COSTS_N_INSNS (1), /* cost of an add instruction */
1568 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1569 COSTS_N_INSNS (1), /* variable shift costs */
1570 COSTS_N_INSNS (1), /* constant shift costs */
1571 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1572 COSTS_N_INSNS (4), /* HI */
1573 COSTS_N_INSNS (3), /* SI */
1574 COSTS_N_INSNS (4), /* DI */
1575 COSTS_N_INSNS (2)}, /* other */
1576 0, /* cost of multiply per each bit set */
1577 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1578 COSTS_N_INSNS (26), /* HI */
1579 COSTS_N_INSNS (42), /* SI */
1580 COSTS_N_INSNS (74), /* DI */
1581 COSTS_N_INSNS (74)}, /* other */
1582 COSTS_N_INSNS (1), /* cost of movsx */
1583 COSTS_N_INSNS (1), /* cost of movzx */
1584 8, /* "large" insn */
1585 17, /* MOVE_RATIO */
1586 2, /* cost for loading QImode using movzbl */
1587 {4, 4, 4}, /* cost of loading integer registers
1588 in QImode, HImode and SImode.
1589 Relative to reg-reg move (2). */
1590 {4, 4, 4}, /* cost of storing integer registers */
1591 4, /* cost of reg,reg fld/fst */
1592 {12, 12, 12}, /* cost of loading fp registers
1593 in SFmode, DFmode and XFmode */
1594 {6, 6, 8}, /* cost of storing fp registers
1595 in SFmode, DFmode and XFmode */
1596 2, /* cost of moving MMX register */
1597 {8, 8}, /* cost of loading MMX registers
1598 in SImode and DImode */
1599 {8, 8}, /* cost of storing MMX registers
1600 in SImode and DImode */
1601 2, /* cost of moving SSE register */
1602 {8, 8, 8}, /* cost of loading SSE registers
1603 in SImode, DImode and TImode */
1604 {8, 8, 8}, /* cost of storing SSE registers
1605 in SImode, DImode and TImode */
1606 5, /* MMX or SSE register to integer */
1607 32, /* size of l1 cache. */
1608 256, /* size of l2 cache. */
1609 64, /* size of prefetch block */
1610 6, /* number of parallel prefetches */
1611 3, /* Branch cost */
1612 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1613 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1614 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1615 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1616 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1617 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1618 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1619 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1620 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1621 {{libcall, {{8, loop}, {15, unrolled_loop},
1622 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1623 {libcall, {{24, loop}, {32, unrolled_loop},
1624 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1625 1, /* scalar_stmt_cost. */
1626 1, /* scalar load_cost. */
1627 1, /* scalar_store_cost. */
1628 1, /* vec_stmt_cost. */
1629 1, /* vec_to_scalar_cost. */
1630 1, /* scalar_to_vec_cost. */
1631 1, /* vec_align_load_cost. */
1632 2, /* vec_unalign_load_cost. */
1633 1, /* vec_store_cost. */
1634 3, /* cond_taken_branch_cost. */
1635 1, /* cond_not_taken_branch_cost. */
1638 /* Generic64 should produce code tuned for Nocona and K8. */
1639 static const
1640 struct processor_costs generic64_cost = {
1641 COSTS_N_INSNS (1), /* cost of an add instruction */
1642 /* On all chips taken into consideration lea is 2 cycles and more. With
1643 this cost however our current implementation of synth_mult results in
1644 use of unnecessary temporary registers causing regression on several
1645 SPECfp benchmarks. */
1646 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1647 COSTS_N_INSNS (1), /* variable shift costs */
1648 COSTS_N_INSNS (1), /* constant shift costs */
1649 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1650 COSTS_N_INSNS (4), /* HI */
1651 COSTS_N_INSNS (3), /* SI */
1652 COSTS_N_INSNS (4), /* DI */
1653 COSTS_N_INSNS (2)}, /* other */
1654 0, /* cost of multiply per each bit set */
1655 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1656 COSTS_N_INSNS (26), /* HI */
1657 COSTS_N_INSNS (42), /* SI */
1658 COSTS_N_INSNS (74), /* DI */
1659 COSTS_N_INSNS (74)}, /* other */
1660 COSTS_N_INSNS (1), /* cost of movsx */
1661 COSTS_N_INSNS (1), /* cost of movzx */
1662 8, /* "large" insn */
1663 17, /* MOVE_RATIO */
1664 4, /* cost for loading QImode using movzbl */
1665 {4, 4, 4}, /* cost of loading integer registers
1666 in QImode, HImode and SImode.
1667 Relative to reg-reg move (2). */
1668 {4, 4, 4}, /* cost of storing integer registers */
1669 4, /* cost of reg,reg fld/fst */
1670 {12, 12, 12}, /* cost of loading fp registers
1671 in SFmode, DFmode and XFmode */
1672 {6, 6, 8}, /* cost of storing fp registers
1673 in SFmode, DFmode and XFmode */
1674 2, /* cost of moving MMX register */
1675 {8, 8}, /* cost of loading MMX registers
1676 in SImode and DImode */
1677 {8, 8}, /* cost of storing MMX registers
1678 in SImode and DImode */
1679 2, /* cost of moving SSE register */
1680 {8, 8, 8}, /* cost of loading SSE registers
1681 in SImode, DImode and TImode */
1682 {8, 8, 8}, /* cost of storing SSE registers
1683 in SImode, DImode and TImode */
1684 5, /* MMX or SSE register to integer */
1685 32, /* size of l1 cache. */
1686 512, /* size of l2 cache. */
1687 64, /* size of prefetch block */
1688 6, /* number of parallel prefetches */
1689 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1690 value is increased to perhaps more appropriate value of 5. */
1691 3, /* Branch cost */
1692 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1693 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1694 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1695 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1696 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1697 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1698 {DUMMY_STRINGOP_ALGS,
1699 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1700 {DUMMY_STRINGOP_ALGS,
1701 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1702 1, /* scalar_stmt_cost. */
1703 1, /* scalar load_cost. */
1704 1, /* scalar_store_cost. */
1705 1, /* vec_stmt_cost. */
1706 1, /* vec_to_scalar_cost. */
1707 1, /* scalar_to_vec_cost. */
1708 1, /* vec_align_load_cost. */
1709 2, /* vec_unalign_load_cost. */
1710 1, /* vec_store_cost. */
1711 3, /* cond_taken_branch_cost. */
1712 1, /* cond_not_taken_branch_cost. */
1715 /* Generic32 should produce code tuned for PPro, Pentium4, Nocona,
1716 Athlon and K8. */
1717 static const
1718 struct processor_costs generic32_cost = {
1719 COSTS_N_INSNS (1), /* cost of an add instruction */
1720 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1721 COSTS_N_INSNS (1), /* variable shift costs */
1722 COSTS_N_INSNS (1), /* constant shift costs */
1723 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1724 COSTS_N_INSNS (4), /* HI */
1725 COSTS_N_INSNS (3), /* SI */
1726 COSTS_N_INSNS (4), /* DI */
1727 COSTS_N_INSNS (2)}, /* other */
1728 0, /* cost of multiply per each bit set */
1729 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1730 COSTS_N_INSNS (26), /* HI */
1731 COSTS_N_INSNS (42), /* SI */
1732 COSTS_N_INSNS (74), /* DI */
1733 COSTS_N_INSNS (74)}, /* other */
1734 COSTS_N_INSNS (1), /* cost of movsx */
1735 COSTS_N_INSNS (1), /* cost of movzx */
1736 8, /* "large" insn */
1737 17, /* MOVE_RATIO */
1738 4, /* cost for loading QImode using movzbl */
1739 {4, 4, 4}, /* cost of loading integer registers
1740 in QImode, HImode and SImode.
1741 Relative to reg-reg move (2). */
1742 {4, 4, 4}, /* cost of storing integer registers */
1743 4, /* cost of reg,reg fld/fst */
1744 {12, 12, 12}, /* cost of loading fp registers
1745 in SFmode, DFmode and XFmode */
1746 {6, 6, 8}, /* cost of storing fp registers
1747 in SFmode, DFmode and XFmode */
1748 2, /* cost of moving MMX register */
1749 {8, 8}, /* cost of loading MMX registers
1750 in SImode and DImode */
1751 {8, 8}, /* cost of storing MMX registers
1752 in SImode and DImode */
1753 2, /* cost of moving SSE register */
1754 {8, 8, 8}, /* cost of loading SSE registers
1755 in SImode, DImode and TImode */
1756 {8, 8, 8}, /* cost of storing SSE registers
1757 in SImode, DImode and TImode */
1758 5, /* MMX or SSE register to integer */
1759 32, /* size of l1 cache. */
1760 256, /* size of l2 cache. */
1761 64, /* size of prefetch block */
1762 6, /* number of parallel prefetches */
1763 3, /* Branch cost */
1764 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1765 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1766 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1767 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1768 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1769 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1770 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1771 DUMMY_STRINGOP_ALGS},
1772 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1773 DUMMY_STRINGOP_ALGS},
1774 1, /* scalar_stmt_cost. */
1775 1, /* scalar load_cost. */
1776 1, /* scalar_store_cost. */
1777 1, /* vec_stmt_cost. */
1778 1, /* vec_to_scalar_cost. */
1779 1, /* scalar_to_vec_cost. */
1780 1, /* vec_align_load_cost. */
1781 2, /* vec_unalign_load_cost. */
1782 1, /* vec_store_cost. */
1783 3, /* cond_taken_branch_cost. */
1784 1, /* cond_not_taken_branch_cost. */
1787 const struct processor_costs *ix86_cost = &pentium_cost;
1789 /* Processor feature/optimization bitmasks. */
1790 #define m_386 (1<<PROCESSOR_I386)
1791 #define m_486 (1<<PROCESSOR_I486)
1792 #define m_PENT (1<<PROCESSOR_PENTIUM)
1793 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1794 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1795 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1796 #define m_CORE2_32 (1<<PROCESSOR_CORE2_32)
1797 #define m_CORE2_64 (1<<PROCESSOR_CORE2_64)
1798 #define m_COREI7_32 (1<<PROCESSOR_COREI7_32)
1799 #define m_COREI7_64 (1<<PROCESSOR_COREI7_64)
1800 #define m_COREI7 (m_COREI7_32 | m_COREI7_64)
1801 #define m_CORE2I7_32 (m_CORE2_32 | m_COREI7_32)
1802 #define m_CORE2I7_64 (m_CORE2_64 | m_COREI7_64)
1803 #define m_CORE2I7 (m_CORE2I7_32 | m_CORE2I7_64)
1804 #define m_ATOM (1<<PROCESSOR_ATOM)
1806 #define m_GEODE (1<<PROCESSOR_GEODE)
1807 #define m_K6 (1<<PROCESSOR_K6)
1808 #define m_K6_GEODE (m_K6 | m_GEODE)
1809 #define m_K8 (1<<PROCESSOR_K8)
1810 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1811 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1812 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1813 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
1814 #define m_BTVER1 (1<<PROCESSOR_BTVER1)
1815 #define m_AMD_MULTIPLE (m_K8 | m_ATHLON | m_AMDFAM10 | m_BDVER1 | m_BTVER1)
1817 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1818 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1820 /* Generic instruction choice should be common subset of supported CPUs
1821 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1822 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1824 /* Feature tests against the various tunings. */
1825 unsigned char ix86_tune_features[X86_TUNE_LAST];
1827 /* Feature tests against the various tunings used to create ix86_tune_features
1828 based on the processor mask. */
1829 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
1830 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1831 negatively, so enabling for Generic64 seems like good code size
1832 tradeoff. We can't enable it for 32bit generic because it does not
1833 work well with PPro base chips. */
1834 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_CORE2I7_64 | m_GENERIC64,
1836 /* X86_TUNE_PUSH_MEMORY */
1837 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4
1838 | m_NOCONA | m_CORE2I7 | m_GENERIC,
1840 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1841 m_486 | m_PENT,
1843 /* X86_TUNE_UNROLL_STRLEN */
1844 m_486 | m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_K6
1845 | m_CORE2I7 | m_GENERIC,
1847 /* X86_TUNE_DEEP_BRANCH_PREDICTION */
1848 m_ATOM | m_PPRO | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4
1849 | m_CORE2I7 | m_GENERIC,
1851 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1852 on simulation result. But after P4 was made, no performance benefit
1853 was observed with branch hints. It also increases the code size.
1854 As a result, icc never generates branch hints. */
1857 /* X86_TUNE_DOUBLE_WITH_ADD */
1858 ~m_386,
1860 /* X86_TUNE_USE_SAHF */
1861 m_ATOM | m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_BDVER1 | m_BTVER1
1862 | m_PENT4 | m_NOCONA | m_CORE2I7 | m_GENERIC,
1864 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1865 partial dependencies. */
1866 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_PENT4 | m_NOCONA
1867 | m_CORE2I7 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
1869 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1870 register stalls on Generic32 compilation setting as well. However
1871 in current implementation the partial register stalls are not eliminated
1872 very well - they can be introduced via subregs synthesized by combine
1873 and can happen in caller/callee saving sequences. Because this option
1874 pays back little on PPro based chips and is in conflict with partial reg
1875 dependencies used by Athlon/P4 based chips, it is better to leave it off
1876 for generic32 for now. */
1877 m_PPRO,
1879 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1880 m_CORE2I7 | m_GENERIC,
1882 /* X86_TUNE_USE_HIMODE_FIOP */
1883 m_386 | m_486 | m_K6_GEODE,
1885 /* X86_TUNE_USE_SIMODE_FIOP */
1886 ~(m_PPRO | m_AMD_MULTIPLE | m_PENT | m_ATOM | m_CORE2I7 | m_GENERIC),
1888 /* X86_TUNE_USE_MOV0 */
1889 m_K6,
1891 /* X86_TUNE_USE_CLTD */
1892 ~(m_PENT | m_ATOM | m_K6 | m_CORE2I7 | m_GENERIC),
1894 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1895 m_PENT4,
1897 /* X86_TUNE_SPLIT_LONG_MOVES */
1898 m_PPRO,
1900 /* X86_TUNE_READ_MODIFY_WRITE */
1901 ~m_PENT,
1903 /* X86_TUNE_READ_MODIFY */
1904 ~(m_PENT | m_PPRO),
1906 /* X86_TUNE_PROMOTE_QIMODE */
1907 m_K6_GEODE | m_PENT | m_ATOM | m_386 | m_486 | m_AMD_MULTIPLE
1908 | m_CORE2I7 | m_GENERIC /* | m_PENT4 ? */,
1910 /* X86_TUNE_FAST_PREFIX */
1911 ~(m_PENT | m_486 | m_386),
1913 /* X86_TUNE_SINGLE_STRINGOP */
1914 m_386 | m_PENT4 | m_NOCONA,
1916 /* X86_TUNE_QIMODE_MATH */
1919 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1920 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1921 might be considered for Generic32 if our scheme for avoiding partial
1922 stalls was more effective. */
1923 ~m_PPRO,
1925 /* X86_TUNE_PROMOTE_QI_REGS */
1928 /* X86_TUNE_PROMOTE_HI_REGS */
1929 m_PPRO,
1931 /* X86_TUNE_SINGLE_POP: Enable if single pop insn is preferred
1932 over esp addition. */
1933 m_386 | m_486 | m_PENT | m_PPRO,
1935 /* X86_TUNE_DOUBLE_POP: Enable if double pop insn is preferred
1936 over esp addition. */
1937 m_PENT,
1939 /* X86_TUNE_SINGLE_PUSH: Enable if single push insn is preferred
1940 over esp subtraction. */
1941 m_386 | m_486 | m_PENT | m_K6_GEODE,
1943 /* X86_TUNE_DOUBLE_PUSH. Enable if double push insn is preferred
1944 over esp subtraction. */
1945 m_PENT | m_K6_GEODE,
1947 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1948 for DFmode copies */
1949 ~(m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2I7
1950 | m_GENERIC | m_GEODE),
1952 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1953 m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2I7 | m_GENERIC,
1955 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1956 conflict here in between PPro/Pentium4 based chips that thread 128bit
1957 SSE registers as single units versus K8 based chips that divide SSE
1958 registers to two 64bit halves. This knob promotes all store destinations
1959 to be 128bit to allow register renaming on 128bit SSE units, but usually
1960 results in one extra microop on 64bit SSE units. Experimental results
1961 shows that disabling this option on P4 brings over 20% SPECfp regression,
1962 while enabling it on K8 brings roughly 2.4% regression that can be partly
1963 masked by careful scheduling of moves. */
1964 m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2I7 | m_GENERIC
1965 | m_AMDFAM10 | m_BDVER1,
1967 /* X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL */
1968 m_AMDFAM10 | m_BDVER1 | m_BTVER1 | m_COREI7,
1970 /* X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL */
1971 m_BDVER1 | m_COREI7,
1973 /* X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL */
1974 m_BDVER1,
1976 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1977 are resolved on SSE register parts instead of whole registers, so we may
1978 maintain just lower part of scalar values in proper format leaving the
1979 upper part undefined. */
1980 m_ATHLON_K8,
1982 /* X86_TUNE_SSE_TYPELESS_STORES */
1983 m_AMD_MULTIPLE,
1985 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1986 m_PPRO | m_PENT4 | m_NOCONA,
1988 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1989 m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2I7 | m_GENERIC,
1991 /* X86_TUNE_PROLOGUE_USING_MOVE */
1992 m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2I7 | m_GENERIC,
1994 /* X86_TUNE_EPILOGUE_USING_MOVE */
1995 m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2I7 | m_GENERIC,
1997 /* X86_TUNE_SHIFT1 */
1998 ~m_486,
2000 /* X86_TUNE_USE_FFREEP */
2001 m_AMD_MULTIPLE,
2003 /* X86_TUNE_INTER_UNIT_MOVES */
2004 ~(m_AMD_MULTIPLE | m_GENERIC),
2006 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
2007 ~(m_AMDFAM10 | m_BDVER1),
2009 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
2010 than 4 branch instructions in the 16 byte window. */
2011 m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2I7
2012 | m_GENERIC,
2014 /* X86_TUNE_SCHEDULE */
2015 m_PPRO | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT | m_ATOM | m_CORE2I7
2016 | m_GENERIC,
2018 /* X86_TUNE_USE_BT */
2019 m_AMD_MULTIPLE | m_ATOM | m_CORE2I7 | m_GENERIC,
2021 /* X86_TUNE_USE_INCDEC */
2022 ~(m_PENT4 | m_NOCONA | m_CORE2I7 | m_GENERIC | m_ATOM),
2024 /* X86_TUNE_PAD_RETURNS */
2025 m_AMD_MULTIPLE | m_CORE2I7 | m_GENERIC,
2027 /* X86_TUNE_PAD_SHORT_FUNCTION: Pad short funtion. */
2028 m_ATOM,
2030 /* X86_TUNE_EXT_80387_CONSTANTS */
2031 m_K6_GEODE | m_ATHLON_K8 | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO
2032 | m_CORE2I7 | m_GENERIC,
2034 /* X86_TUNE_SHORTEN_X87_SSE */
2035 ~m_K8,
2037 /* X86_TUNE_AVOID_VECTOR_DECODE */
2038 m_K8 | m_CORE2I7_64 | m_GENERIC64,
2040 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
2041 and SImode multiply, but 386 and 486 do HImode multiply faster. */
2042 ~(m_386 | m_486),
2044 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
2045 vector path on AMD machines. */
2046 m_K8 | m_CORE2I7_64 | m_GENERIC64 | m_AMDFAM10 | m_BDVER1 | m_BTVER1,
2048 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
2049 machines. */
2050 m_K8 | m_CORE2I7_64 | m_GENERIC64 | m_AMDFAM10 | m_BDVER1 | m_BTVER1,
2052 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
2053 than a MOV. */
2054 m_PENT,
2056 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
2057 but one byte longer. */
2058 m_PENT,
2060 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
2061 operand that cannot be represented using a modRM byte. The XOR
2062 replacement is long decoded, so this split helps here as well. */
2063 m_K6,
2065 /* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion
2066 from FP to FP. */
2067 m_AMDFAM10 | m_CORE2I7 | m_GENERIC,
2069 /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
2070 from integer to FP. */
2071 m_AMDFAM10,
2073 /* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction
2074 with a subsequent conditional jump instruction into a single
2075 compare-and-branch uop. */
2076 m_BDVER1,
2078 /* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
2079 will impact LEA instruction selection. */
2080 m_ATOM,
2082 /* X86_TUNE_VECTORIZE_DOUBLE: Enable double precision vector
2083 instructions. */
2084 ~m_ATOM,
2087 /* Feature tests against the various architecture variations. */
2088 unsigned char ix86_arch_features[X86_ARCH_LAST];
2090 /* Feature tests against the various architecture variations, used to create
2091 ix86_arch_features based on the processor mask. */
2092 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
2093 /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro. */
2094 ~(m_386 | m_486 | m_PENT | m_K6),
2096 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
2097 ~m_386,
2099 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
2100 ~(m_386 | m_486),
2102 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
2103 ~m_386,
2105 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
2106 ~m_386,
2109 static const unsigned int x86_accumulate_outgoing_args
2110 = m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2I7
2111 | m_GENERIC;
2113 static const unsigned int x86_arch_always_fancy_math_387
2114 = m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4
2115 | m_NOCONA | m_CORE2I7 | m_GENERIC;
2117 static enum stringop_alg stringop_alg = no_stringop;
2119 /* In case the average insn count for single function invocation is
2120 lower than this constant, emit fast (but longer) prologue and
2121 epilogue code. */
2122 #define FAST_PROLOGUE_INSN_COUNT 20
2124 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
2125 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
2126 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
2127 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
2129 /* Array of the smallest class containing reg number REGNO, indexed by
2130 REGNO. Used by REGNO_REG_CLASS in i386.h. */
2132 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
2134 /* ax, dx, cx, bx */
2135 AREG, DREG, CREG, BREG,
2136 /* si, di, bp, sp */
2137 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
2138 /* FP registers */
2139 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
2140 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
2141 /* arg pointer */
2142 NON_Q_REGS,
2143 /* flags, fpsr, fpcr, frame */
2144 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
2145 /* SSE registers */
2146 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2147 SSE_REGS, SSE_REGS,
2148 /* MMX registers */
2149 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
2150 MMX_REGS, MMX_REGS,
2151 /* REX registers */
2152 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2153 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2154 /* SSE REX registers */
2155 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2156 SSE_REGS, SSE_REGS,
2159 /* The "default" register map used in 32bit mode. */
2161 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
2163 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
2164 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
2165 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2166 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
2167 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
2168 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2169 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2172 /* The "default" register map used in 64bit mode. */
2174 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
2176 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
2177 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
2178 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2179 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
2180 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
2181 8,9,10,11,12,13,14,15, /* extended integer registers */
2182 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
2185 /* Define the register numbers to be used in Dwarf debugging information.
2186 The SVR4 reference port C compiler uses the following register numbers
2187 in its Dwarf output code:
2188 0 for %eax (gcc regno = 0)
2189 1 for %ecx (gcc regno = 2)
2190 2 for %edx (gcc regno = 1)
2191 3 for %ebx (gcc regno = 3)
2192 4 for %esp (gcc regno = 7)
2193 5 for %ebp (gcc regno = 6)
2194 6 for %esi (gcc regno = 4)
2195 7 for %edi (gcc regno = 5)
2196 The following three DWARF register numbers are never generated by
2197 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
2198 believes these numbers have these meanings.
2199 8 for %eip (no gcc equivalent)
2200 9 for %eflags (gcc regno = 17)
2201 10 for %trapno (no gcc equivalent)
2202 It is not at all clear how we should number the FP stack registers
2203 for the x86 architecture. If the version of SDB on x86/svr4 were
2204 a bit less brain dead with respect to floating-point then we would
2205 have a precedent to follow with respect to DWARF register numbers
2206 for x86 FP registers, but the SDB on x86/svr4 is so completely
2207 broken with respect to FP registers that it is hardly worth thinking
2208 of it as something to strive for compatibility with.
2209 The version of x86/svr4 SDB I have at the moment does (partially)
2210 seem to believe that DWARF register number 11 is associated with
2211 the x86 register %st(0), but that's about all. Higher DWARF
2212 register numbers don't seem to be associated with anything in
2213 particular, and even for DWARF regno 11, SDB only seems to under-
2214 stand that it should say that a variable lives in %st(0) (when
2215 asked via an `=' command) if we said it was in DWARF regno 11,
2216 but SDB still prints garbage when asked for the value of the
2217 variable in question (via a `/' command).
2218 (Also note that the labels SDB prints for various FP stack regs
2219 when doing an `x' command are all wrong.)
2220 Note that these problems generally don't affect the native SVR4
2221 C compiler because it doesn't allow the use of -O with -g and
2222 because when it is *not* optimizing, it allocates a memory
2223 location for each floating-point variable, and the memory
2224 location is what gets described in the DWARF AT_location
2225 attribute for the variable in question.
2226 Regardless of the severe mental illness of the x86/svr4 SDB, we
2227 do something sensible here and we use the following DWARF
2228 register numbers. Note that these are all stack-top-relative
2229 numbers.
2230 11 for %st(0) (gcc regno = 8)
2231 12 for %st(1) (gcc regno = 9)
2232 13 for %st(2) (gcc regno = 10)
2233 14 for %st(3) (gcc regno = 11)
2234 15 for %st(4) (gcc regno = 12)
2235 16 for %st(5) (gcc regno = 13)
2236 17 for %st(6) (gcc regno = 14)
2237 18 for %st(7) (gcc regno = 15)
2239 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
2241 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
2242 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
2243 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2244 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
2245 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
2246 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2247 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2250 /* Define parameter passing and return registers. */
2252 static int const x86_64_int_parameter_registers[6] =
2254 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
2257 static int const x86_64_ms_abi_int_parameter_registers[4] =
2259 CX_REG, DX_REG, R8_REG, R9_REG
2262 static int const x86_64_int_return_registers[4] =
2264 AX_REG, DX_REG, DI_REG, SI_REG
2267 /* Define the structure for the machine field in struct function. */
2269 struct GTY(()) stack_local_entry {
2270 unsigned short mode;
2271 unsigned short n;
2272 rtx rtl;
2273 struct stack_local_entry *next;
2276 /* Structure describing stack frame layout.
2277 Stack grows downward:
2279 [arguments]
2280 <- ARG_POINTER
2281 saved pc
2283 saved static chain if ix86_static_chain_on_stack
2285 saved frame pointer if frame_pointer_needed
2286 <- HARD_FRAME_POINTER
2287 [saved regs]
2288 <- regs_save_offset
2289 [padding0]
2291 [saved SSE regs]
2292 <- sse_regs_save_offset
2293 [padding1] |
2294 | <- FRAME_POINTER
2295 [va_arg registers] |
2297 [frame] |
2299 [padding2] | = to_allocate
2300 <- STACK_POINTER
2302 struct ix86_frame
2304 int nsseregs;
2305 int nregs;
2306 int va_arg_size;
2307 int red_zone_size;
2308 int outgoing_arguments_size;
2309 HOST_WIDE_INT frame;
2311 /* The offsets relative to ARG_POINTER. */
2312 HOST_WIDE_INT frame_pointer_offset;
2313 HOST_WIDE_INT hard_frame_pointer_offset;
2314 HOST_WIDE_INT stack_pointer_offset;
2315 HOST_WIDE_INT hfp_save_offset;
2316 HOST_WIDE_INT reg_save_offset;
2317 HOST_WIDE_INT sse_reg_save_offset;
2319 /* When save_regs_using_mov is set, emit prologue using
2320 move instead of push instructions. */
2321 bool save_regs_using_mov;
2324 /* Code model option. */
2325 enum cmodel ix86_cmodel;
2326 /* Asm dialect. */
2327 enum asm_dialect ix86_asm_dialect = ASM_ATT;
2328 /* TLS dialects. */
2329 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
2331 /* Which unit we are generating floating point math for. */
2332 enum fpmath_unit ix86_fpmath;
2334 /* Which cpu are we scheduling for. */
2335 enum attr_cpu ix86_schedule;
2337 /* Which cpu are we optimizing for. */
2338 enum processor_type ix86_tune;
2340 /* Which instruction set architecture to use. */
2341 enum processor_type ix86_arch;
2343 /* true if sse prefetch instruction is not NOOP. */
2344 int x86_prefetch_sse;
2346 /* ix86_regparm_string as a number */
2347 static int ix86_regparm;
2349 /* -mstackrealign option */
2350 static const char ix86_force_align_arg_pointer_string[]
2351 = "force_align_arg_pointer";
2353 static rtx (*ix86_gen_leave) (void);
2354 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
2355 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
2356 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
2357 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
2358 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
2359 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
2360 static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx);
2361 static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx);
2362 static rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx);
2364 /* Preferred alignment for stack boundary in bits. */
2365 unsigned int ix86_preferred_stack_boundary;
2367 /* Alignment for incoming stack boundary in bits specified at
2368 command line. */
2369 static unsigned int ix86_user_incoming_stack_boundary;
2371 /* Default alignment for incoming stack boundary in bits. */
2372 static unsigned int ix86_default_incoming_stack_boundary;
2374 /* Alignment for incoming stack boundary in bits. */
2375 unsigned int ix86_incoming_stack_boundary;
2377 /* The abi used by target. */
2378 enum calling_abi ix86_abi;
2380 /* Values 1-5: see jump.c */
2381 int ix86_branch_cost;
2383 /* Calling abi specific va_list type nodes. */
2384 static GTY(()) tree sysv_va_list_type_node;
2385 static GTY(()) tree ms_va_list_type_node;
2387 /* Variables which are this size or smaller are put in the data/bss
2388 or ldata/lbss sections. */
2390 int ix86_section_threshold = 65536;
2392 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
2393 char internal_label_prefix[16];
2394 int internal_label_prefix_len;
2396 /* Fence to use after loop using movnt. */
2397 tree x86_mfence;
2399 /* Register class used for passing given 64bit part of the argument.
2400 These represent classes as documented by the PS ABI, with the exception
2401 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
2402 use SF or DFmode move instead of DImode to avoid reformatting penalties.
2404 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
2405 whenever possible (upper half does contain padding). */
2406 enum x86_64_reg_class
2408 X86_64_NO_CLASS,
2409 X86_64_INTEGER_CLASS,
2410 X86_64_INTEGERSI_CLASS,
2411 X86_64_SSE_CLASS,
2412 X86_64_SSESF_CLASS,
2413 X86_64_SSEDF_CLASS,
2414 X86_64_SSEUP_CLASS,
2415 X86_64_X87_CLASS,
2416 X86_64_X87UP_CLASS,
2417 X86_64_COMPLEX_X87_CLASS,
2418 X86_64_MEMORY_CLASS
2421 #define MAX_CLASSES 4
2423 /* Table of constants used by fldpi, fldln2, etc.... */
2424 static REAL_VALUE_TYPE ext_80387_constants_table [5];
2425 static bool ext_80387_constants_init = 0;
2428 static struct machine_function * ix86_init_machine_status (void);
2429 static rtx ix86_function_value (const_tree, const_tree, bool);
2430 static bool ix86_function_value_regno_p (const unsigned int);
2431 static unsigned int ix86_function_arg_boundary (enum machine_mode,
2432 const_tree);
2433 static rtx ix86_static_chain (const_tree, bool);
2434 static int ix86_function_regparm (const_tree, const_tree);
2435 static void ix86_compute_frame_layout (struct ix86_frame *);
2436 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
2437 rtx, rtx, int);
2438 static void ix86_add_new_builtins (int);
2439 static rtx ix86_expand_vec_perm_builtin (tree);
2440 static tree ix86_canonical_va_list_type (tree);
2441 static void predict_jump (int);
2442 static unsigned int split_stack_prologue_scratch_regno (void);
2443 static bool i386_asm_output_addr_const_extra (FILE *, rtx);
2445 enum ix86_function_specific_strings
2447 IX86_FUNCTION_SPECIFIC_ARCH,
2448 IX86_FUNCTION_SPECIFIC_TUNE,
2449 IX86_FUNCTION_SPECIFIC_FPMATH,
2450 IX86_FUNCTION_SPECIFIC_MAX
2453 static char *ix86_target_string (int, int, const char *, const char *,
2454 const char *, bool);
2455 static void ix86_debug_options (void) ATTRIBUTE_UNUSED;
2456 static void ix86_function_specific_save (struct cl_target_option *);
2457 static void ix86_function_specific_restore (struct cl_target_option *);
2458 static void ix86_function_specific_print (FILE *, int,
2459 struct cl_target_option *);
2460 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
2461 static bool ix86_valid_target_attribute_inner_p (tree, char *[]);
2462 static bool ix86_can_inline_p (tree, tree);
2463 static void ix86_set_current_function (tree);
2464 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2466 static enum calling_abi ix86_function_abi (const_tree);
2469 #ifndef SUBTARGET32_DEFAULT_CPU
2470 #define SUBTARGET32_DEFAULT_CPU "i386"
2471 #endif
2473 /* The svr4 ABI for the i386 says that records and unions are returned
2474 in memory. */
2475 #ifndef DEFAULT_PCC_STRUCT_RETURN
2476 #define DEFAULT_PCC_STRUCT_RETURN 1
2477 #endif
2479 /* Whether -mtune= or -march= were specified */
2480 static int ix86_tune_defaulted;
2481 static int ix86_arch_specified;
2483 /* A mask of ix86_isa_flags that includes bit X if X
2484 was set or cleared on the command line. */
2485 static int ix86_isa_flags_explicit;
2487 /* Define a set of ISAs which are available when a given ISA is
2488 enabled. MMX and SSE ISAs are handled separately. */
2490 #define OPTION_MASK_ISA_MMX_SET OPTION_MASK_ISA_MMX
2491 #define OPTION_MASK_ISA_3DNOW_SET \
2492 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_MMX_SET)
2494 #define OPTION_MASK_ISA_SSE_SET OPTION_MASK_ISA_SSE
2495 #define OPTION_MASK_ISA_SSE2_SET \
2496 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE_SET)
2497 #define OPTION_MASK_ISA_SSE3_SET \
2498 (OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSE2_SET)
2499 #define OPTION_MASK_ISA_SSSE3_SET \
2500 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE3_SET)
2501 #define OPTION_MASK_ISA_SSE4_1_SET \
2502 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSSE3_SET)
2503 #define OPTION_MASK_ISA_SSE4_2_SET \
2504 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_SSE4_1_SET)
2505 #define OPTION_MASK_ISA_AVX_SET \
2506 (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_SSE4_2_SET)
2507 #define OPTION_MASK_ISA_FMA_SET \
2508 (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_AVX_SET)
2510 /* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
2511 as -msse4.2. */
2512 #define OPTION_MASK_ISA_SSE4_SET OPTION_MASK_ISA_SSE4_2_SET
2514 #define OPTION_MASK_ISA_SSE4A_SET \
2515 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE3_SET)
2516 #define OPTION_MASK_ISA_FMA4_SET \
2517 (OPTION_MASK_ISA_FMA4 | OPTION_MASK_ISA_SSE4A_SET \
2518 | OPTION_MASK_ISA_AVX_SET)
2519 #define OPTION_MASK_ISA_XOP_SET \
2520 (OPTION_MASK_ISA_XOP | OPTION_MASK_ISA_FMA4_SET)
2521 #define OPTION_MASK_ISA_LWP_SET \
2522 OPTION_MASK_ISA_LWP
2524 /* AES and PCLMUL need SSE2 because they use xmm registers */
2525 #define OPTION_MASK_ISA_AES_SET \
2526 (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2_SET)
2527 #define OPTION_MASK_ISA_PCLMUL_SET \
2528 (OPTION_MASK_ISA_PCLMUL | OPTION_MASK_ISA_SSE2_SET)
2530 #define OPTION_MASK_ISA_ABM_SET \
2531 (OPTION_MASK_ISA_ABM | OPTION_MASK_ISA_POPCNT)
2533 #define OPTION_MASK_ISA_BMI_SET OPTION_MASK_ISA_BMI
2534 #define OPTION_MASK_ISA_TBM_SET OPTION_MASK_ISA_TBM
2535 #define OPTION_MASK_ISA_POPCNT_SET OPTION_MASK_ISA_POPCNT
2536 #define OPTION_MASK_ISA_CX16_SET OPTION_MASK_ISA_CX16
2537 #define OPTION_MASK_ISA_SAHF_SET OPTION_MASK_ISA_SAHF
2538 #define OPTION_MASK_ISA_MOVBE_SET OPTION_MASK_ISA_MOVBE
2539 #define OPTION_MASK_ISA_CRC32_SET OPTION_MASK_ISA_CRC32
2541 #define OPTION_MASK_ISA_FSGSBASE_SET OPTION_MASK_ISA_FSGSBASE
2542 #define OPTION_MASK_ISA_RDRND_SET OPTION_MASK_ISA_RDRND
2543 #define OPTION_MASK_ISA_F16C_SET \
2544 (OPTION_MASK_ISA_F16C | OPTION_MASK_ISA_AVX_SET)
2546 /* Define a set of ISAs which aren't available when a given ISA is
2547 disabled. MMX and SSE ISAs are handled separately. */
2549 #define OPTION_MASK_ISA_MMX_UNSET \
2550 (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_3DNOW_UNSET)
2551 #define OPTION_MASK_ISA_3DNOW_UNSET \
2552 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_A_UNSET)
2553 #define OPTION_MASK_ISA_3DNOW_A_UNSET OPTION_MASK_ISA_3DNOW_A
2555 #define OPTION_MASK_ISA_SSE_UNSET \
2556 (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_SSE2_UNSET)
2557 #define OPTION_MASK_ISA_SSE2_UNSET \
2558 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE3_UNSET)
2559 #define OPTION_MASK_ISA_SSE3_UNSET \
2560 (OPTION_MASK_ISA_SSE3 \
2561 | OPTION_MASK_ISA_SSSE3_UNSET \
2562 | OPTION_MASK_ISA_SSE4A_UNSET )
2563 #define OPTION_MASK_ISA_SSSE3_UNSET \
2564 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE4_1_UNSET)
2565 #define OPTION_MASK_ISA_SSE4_1_UNSET \
2566 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_2_UNSET)
2567 #define OPTION_MASK_ISA_SSE4_2_UNSET \
2568 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_AVX_UNSET )
2569 #define OPTION_MASK_ISA_AVX_UNSET \
2570 (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_FMA_UNSET \
2571 | OPTION_MASK_ISA_FMA4_UNSET | OPTION_MASK_ISA_F16C_UNSET)
2572 #define OPTION_MASK_ISA_FMA_UNSET OPTION_MASK_ISA_FMA
2574 /* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same
2575 as -mno-sse4.1. */
2576 #define OPTION_MASK_ISA_SSE4_UNSET OPTION_MASK_ISA_SSE4_1_UNSET
2578 #define OPTION_MASK_ISA_SSE4A_UNSET \
2579 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_FMA4_UNSET)
2581 #define OPTION_MASK_ISA_FMA4_UNSET \
2582 (OPTION_MASK_ISA_FMA4 | OPTION_MASK_ISA_XOP_UNSET)
2583 #define OPTION_MASK_ISA_XOP_UNSET OPTION_MASK_ISA_XOP
2584 #define OPTION_MASK_ISA_LWP_UNSET OPTION_MASK_ISA_LWP
2586 #define OPTION_MASK_ISA_AES_UNSET OPTION_MASK_ISA_AES
2587 #define OPTION_MASK_ISA_PCLMUL_UNSET OPTION_MASK_ISA_PCLMUL
2588 #define OPTION_MASK_ISA_ABM_UNSET OPTION_MASK_ISA_ABM
2589 #define OPTION_MASK_ISA_BMI_UNSET OPTION_MASK_ISA_BMI
2590 #define OPTION_MASK_ISA_TBM_UNSET OPTION_MASK_ISA_TBM
2591 #define OPTION_MASK_ISA_POPCNT_UNSET OPTION_MASK_ISA_POPCNT
2592 #define OPTION_MASK_ISA_CX16_UNSET OPTION_MASK_ISA_CX16
2593 #define OPTION_MASK_ISA_SAHF_UNSET OPTION_MASK_ISA_SAHF
2594 #define OPTION_MASK_ISA_MOVBE_UNSET OPTION_MASK_ISA_MOVBE
2595 #define OPTION_MASK_ISA_CRC32_UNSET OPTION_MASK_ISA_CRC32
2597 #define OPTION_MASK_ISA_FSGSBASE_UNSET OPTION_MASK_ISA_FSGSBASE
2598 #define OPTION_MASK_ISA_RDRND_UNSET OPTION_MASK_ISA_RDRND
2599 #define OPTION_MASK_ISA_F16C_UNSET OPTION_MASK_ISA_F16C
2601 /* Vectorization library interface and handlers. */
2602 static tree (*ix86_veclib_handler) (enum built_in_function, tree, tree);
2604 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
2605 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
2607 /* Processor target table, indexed by processor number */
2608 struct ptt
2610 const struct processor_costs *cost; /* Processor costs */
2611 const int align_loop; /* Default alignments. */
2612 const int align_loop_max_skip;
2613 const int align_jump;
2614 const int align_jump_max_skip;
2615 const int align_func;
2618 static const struct ptt processor_target_table[PROCESSOR_max] =
2620 {&i386_cost, 4, 3, 4, 3, 4},
2621 {&i486_cost, 16, 15, 16, 15, 16},
2622 {&pentium_cost, 16, 7, 16, 7, 16},
2623 {&pentiumpro_cost, 16, 15, 16, 10, 16},
2624 {&geode_cost, 0, 0, 0, 0, 0},
2625 {&k6_cost, 32, 7, 32, 7, 32},
2626 {&athlon_cost, 16, 7, 16, 7, 16},
2627 {&pentium4_cost, 0, 0, 0, 0, 0},
2628 {&k8_cost, 16, 7, 16, 7, 16},
2629 {&nocona_cost, 0, 0, 0, 0, 0},
2630 /* Core 2 32-bit. */
2631 {&generic32_cost, 16, 10, 16, 10, 16},
2632 /* Core 2 64-bit. */
2633 {&generic64_cost, 16, 10, 16, 10, 16},
2634 /* Core i7 32-bit. */
2635 {&generic32_cost, 16, 10, 16, 10, 16},
2636 /* Core i7 64-bit. */
2637 {&generic64_cost, 16, 10, 16, 10, 16},
2638 {&generic32_cost, 16, 7, 16, 7, 16},
2639 {&generic64_cost, 16, 10, 16, 10, 16},
2640 {&amdfam10_cost, 32, 24, 32, 7, 32},
2641 {&bdver1_cost, 32, 24, 32, 7, 32},
2642 {&btver1_cost, 32, 24, 32, 7, 32},
2643 {&atom_cost, 16, 7, 16, 7, 16}
2646 static const char *const cpu_names[TARGET_CPU_DEFAULT_max] =
2648 "generic",
2649 "i386",
2650 "i486",
2651 "pentium",
2652 "pentium-mmx",
2653 "pentiumpro",
2654 "pentium2",
2655 "pentium3",
2656 "pentium4",
2657 "pentium-m",
2658 "prescott",
2659 "nocona",
2660 "core2",
2661 "corei7",
2662 "atom",
2663 "geode",
2664 "k6",
2665 "k6-2",
2666 "k6-3",
2667 "athlon",
2668 "athlon-4",
2669 "k8",
2670 "amdfam10",
2671 "bdver1",
2672 "btver1"
2675 /* Return true if a red-zone is in use. */
2677 static inline bool
2678 ix86_using_red_zone (void)
2680 return TARGET_RED_ZONE && !TARGET_64BIT_MS_ABI;
2683 /* Implement TARGET_HANDLE_OPTION. */
2685 static bool
2686 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
2688 switch (code)
2690 case OPT_mmmx:
2691 if (value)
2693 ix86_isa_flags |= OPTION_MASK_ISA_MMX_SET;
2694 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_SET;
2696 else
2698 ix86_isa_flags &= ~OPTION_MASK_ISA_MMX_UNSET;
2699 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_UNSET;
2701 return true;
2703 case OPT_m3dnow:
2704 if (value)
2706 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_SET;
2707 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_SET;
2709 else
2711 ix86_isa_flags &= ~OPTION_MASK_ISA_3DNOW_UNSET;
2712 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_UNSET;
2714 return true;
2716 case OPT_m3dnowa:
2717 return false;
2719 case OPT_msse:
2720 if (value)
2722 ix86_isa_flags |= OPTION_MASK_ISA_SSE_SET;
2723 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_SET;
2725 else
2727 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE_UNSET;
2728 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_UNSET;
2730 return true;
2732 case OPT_msse2:
2733 if (value)
2735 ix86_isa_flags |= OPTION_MASK_ISA_SSE2_SET;
2736 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_SET;
2738 else
2740 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE2_UNSET;
2741 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_UNSET;
2743 return true;
2745 case OPT_msse3:
2746 if (value)
2748 ix86_isa_flags |= OPTION_MASK_ISA_SSE3_SET;
2749 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_SET;
2751 else
2753 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE3_UNSET;
2754 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_UNSET;
2756 return true;
2758 case OPT_mssse3:
2759 if (value)
2761 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3_SET;
2762 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_SET;
2764 else
2766 ix86_isa_flags &= ~OPTION_MASK_ISA_SSSE3_UNSET;
2767 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_UNSET;
2769 return true;
2771 case OPT_msse4_1:
2772 if (value)
2774 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1_SET;
2775 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_SET;
2777 else
2779 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_1_UNSET;
2780 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_UNSET;
2782 return true;
2784 case OPT_msse4_2:
2785 if (value)
2787 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2_SET;
2788 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_SET;
2790 else
2792 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_2_UNSET;
2793 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_UNSET;
2795 return true;
2797 case OPT_mavx:
2798 if (value)
2800 ix86_isa_flags |= OPTION_MASK_ISA_AVX_SET;
2801 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_SET;
2803 else
2805 ix86_isa_flags &= ~OPTION_MASK_ISA_AVX_UNSET;
2806 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_UNSET;
2808 return true;
2810 case OPT_mfma:
2811 if (value)
2813 ix86_isa_flags |= OPTION_MASK_ISA_FMA_SET;
2814 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_SET;
2816 else
2818 ix86_isa_flags &= ~OPTION_MASK_ISA_FMA_UNSET;
2819 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_UNSET;
2821 return true;
2823 case OPT_msse4:
2824 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_SET;
2825 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_SET;
2826 return true;
2828 case OPT_mno_sse4:
2829 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_UNSET;
2830 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_UNSET;
2831 return true;
2833 case OPT_msse4a:
2834 if (value)
2836 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A_SET;
2837 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_SET;
2839 else
2841 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4A_UNSET;
2842 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_UNSET;
2844 return true;
2846 case OPT_mfma4:
2847 if (value)
2849 ix86_isa_flags |= OPTION_MASK_ISA_FMA4_SET;
2850 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA4_SET;
2852 else
2854 ix86_isa_flags &= ~OPTION_MASK_ISA_FMA4_UNSET;
2855 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA4_UNSET;
2857 return true;
2859 case OPT_mxop:
2860 if (value)
2862 ix86_isa_flags |= OPTION_MASK_ISA_XOP_SET;
2863 ix86_isa_flags_explicit |= OPTION_MASK_ISA_XOP_SET;
2865 else
2867 ix86_isa_flags &= ~OPTION_MASK_ISA_XOP_UNSET;
2868 ix86_isa_flags_explicit |= OPTION_MASK_ISA_XOP_UNSET;
2870 return true;
2872 case OPT_mlwp:
2873 if (value)
2875 ix86_isa_flags |= OPTION_MASK_ISA_LWP_SET;
2876 ix86_isa_flags_explicit |= OPTION_MASK_ISA_LWP_SET;
2878 else
2880 ix86_isa_flags &= ~OPTION_MASK_ISA_LWP_UNSET;
2881 ix86_isa_flags_explicit |= OPTION_MASK_ISA_LWP_UNSET;
2883 return true;
2885 case OPT_mabm:
2886 if (value)
2888 ix86_isa_flags |= OPTION_MASK_ISA_ABM_SET;
2889 ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_SET;
2891 else
2893 ix86_isa_flags &= ~OPTION_MASK_ISA_ABM_UNSET;
2894 ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_UNSET;
2896 return true;
2898 case OPT_mbmi:
2899 if (value)
2901 ix86_isa_flags |= OPTION_MASK_ISA_BMI_SET;
2902 ix86_isa_flags_explicit |= OPTION_MASK_ISA_BMI_SET;
2904 else
2906 ix86_isa_flags &= ~OPTION_MASK_ISA_BMI_UNSET;
2907 ix86_isa_flags_explicit |= OPTION_MASK_ISA_BMI_UNSET;
2909 return true;
2911 case OPT_mtbm:
2912 if (value)
2914 ix86_isa_flags |= OPTION_MASK_ISA_TBM_SET;
2915 ix86_isa_flags_explicit |= OPTION_MASK_ISA_TBM_SET;
2917 else
2919 ix86_isa_flags &= ~OPTION_MASK_ISA_TBM_UNSET;
2920 ix86_isa_flags_explicit |= OPTION_MASK_ISA_TBM_UNSET;
2922 return true;
2924 case OPT_mpopcnt:
2925 if (value)
2927 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT_SET;
2928 ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_SET;
2930 else
2932 ix86_isa_flags &= ~OPTION_MASK_ISA_POPCNT_UNSET;
2933 ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_UNSET;
2935 return true;
2937 case OPT_msahf:
2938 if (value)
2940 ix86_isa_flags |= OPTION_MASK_ISA_SAHF_SET;
2941 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_SET;
2943 else
2945 ix86_isa_flags &= ~OPTION_MASK_ISA_SAHF_UNSET;
2946 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_UNSET;
2948 return true;
2950 case OPT_mcx16:
2951 if (value)
2953 ix86_isa_flags |= OPTION_MASK_ISA_CX16_SET;
2954 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_SET;
2956 else
2958 ix86_isa_flags &= ~OPTION_MASK_ISA_CX16_UNSET;
2959 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_UNSET;
2961 return true;
2963 case OPT_mmovbe:
2964 if (value)
2966 ix86_isa_flags |= OPTION_MASK_ISA_MOVBE_SET;
2967 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MOVBE_SET;
2969 else
2971 ix86_isa_flags &= ~OPTION_MASK_ISA_MOVBE_UNSET;
2972 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MOVBE_UNSET;
2974 return true;
2976 case OPT_mcrc32:
2977 if (value)
2979 ix86_isa_flags |= OPTION_MASK_ISA_CRC32_SET;
2980 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CRC32_SET;
2982 else
2984 ix86_isa_flags &= ~OPTION_MASK_ISA_CRC32_UNSET;
2985 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CRC32_UNSET;
2987 return true;
2989 case OPT_maes:
2990 if (value)
2992 ix86_isa_flags |= OPTION_MASK_ISA_AES_SET;
2993 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_SET;
2995 else
2997 ix86_isa_flags &= ~OPTION_MASK_ISA_AES_UNSET;
2998 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_UNSET;
3000 return true;
3002 case OPT_mpclmul:
3003 if (value)
3005 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL_SET;
3006 ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_SET;
3008 else
3010 ix86_isa_flags &= ~OPTION_MASK_ISA_PCLMUL_UNSET;
3011 ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_UNSET;
3013 return true;
3015 case OPT_mfsgsbase:
3016 if (value)
3018 ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE_SET;
3019 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FSGSBASE_SET;
3021 else
3023 ix86_isa_flags &= ~OPTION_MASK_ISA_FSGSBASE_UNSET;
3024 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FSGSBASE_UNSET;
3026 return true;
3028 case OPT_mrdrnd:
3029 if (value)
3031 ix86_isa_flags |= OPTION_MASK_ISA_RDRND_SET;
3032 ix86_isa_flags_explicit |= OPTION_MASK_ISA_RDRND_SET;
3034 else
3036 ix86_isa_flags &= ~OPTION_MASK_ISA_RDRND_UNSET;
3037 ix86_isa_flags_explicit |= OPTION_MASK_ISA_RDRND_UNSET;
3039 return true;
3041 case OPT_mf16c:
3042 if (value)
3044 ix86_isa_flags |= OPTION_MASK_ISA_F16C_SET;
3045 ix86_isa_flags_explicit |= OPTION_MASK_ISA_F16C_SET;
3047 else
3049 ix86_isa_flags &= ~OPTION_MASK_ISA_F16C_UNSET;
3050 ix86_isa_flags_explicit |= OPTION_MASK_ISA_F16C_UNSET;
3052 return true;
3054 default:
3055 return true;
3059 /* Return a string that documents the current -m options. The caller is
3060 responsible for freeing the string. */
3062 static char *
3063 ix86_target_string (int isa, int flags, const char *arch, const char *tune,
3064 const char *fpmath, bool add_nl_p)
3066 struct ix86_target_opts
3068 const char *option; /* option string */
3069 int mask; /* isa mask options */
3072 /* This table is ordered so that options like -msse4.2 that imply
3073 preceding options while match those first. */
3074 static struct ix86_target_opts isa_opts[] =
3076 { "-m64", OPTION_MASK_ISA_64BIT },
3077 { "-mfma4", OPTION_MASK_ISA_FMA4 },
3078 { "-mfma", OPTION_MASK_ISA_FMA },
3079 { "-mxop", OPTION_MASK_ISA_XOP },
3080 { "-mlwp", OPTION_MASK_ISA_LWP },
3081 { "-msse4a", OPTION_MASK_ISA_SSE4A },
3082 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
3083 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
3084 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
3085 { "-msse3", OPTION_MASK_ISA_SSE3 },
3086 { "-msse2", OPTION_MASK_ISA_SSE2 },
3087 { "-msse", OPTION_MASK_ISA_SSE },
3088 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
3089 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
3090 { "-mmmx", OPTION_MASK_ISA_MMX },
3091 { "-mabm", OPTION_MASK_ISA_ABM },
3092 { "-mbmi", OPTION_MASK_ISA_BMI },
3093 { "-mtbm", OPTION_MASK_ISA_TBM },
3094 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
3095 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
3096 { "-mcrc32", OPTION_MASK_ISA_CRC32 },
3097 { "-maes", OPTION_MASK_ISA_AES },
3098 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
3099 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE },
3100 { "-mrdrnd", OPTION_MASK_ISA_RDRND },
3101 { "-mf16c", OPTION_MASK_ISA_F16C },
3104 /* Flag options. */
3105 static struct ix86_target_opts flag_opts[] =
3107 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
3108 { "-m80387", MASK_80387 },
3109 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
3110 { "-malign-double", MASK_ALIGN_DOUBLE },
3111 { "-mcld", MASK_CLD },
3112 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
3113 { "-mieee-fp", MASK_IEEE_FP },
3114 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
3115 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
3116 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
3117 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
3118 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
3119 { "-mno-push-args", MASK_NO_PUSH_ARGS },
3120 { "-mno-red-zone", MASK_NO_RED_ZONE },
3121 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
3122 { "-mrecip", MASK_RECIP },
3123 { "-mrtd", MASK_RTD },
3124 { "-msseregparm", MASK_SSEREGPARM },
3125 { "-mstack-arg-probe", MASK_STACK_PROBE },
3126 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
3127 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS },
3128 { "-m8bit-idiv", MASK_USE_8BIT_IDIV },
3129 { "-mvzeroupper", MASK_VZEROUPPER },
3132 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
3134 char isa_other[40];
3135 char target_other[40];
3136 unsigned num = 0;
3137 unsigned i, j;
3138 char *ret;
3139 char *ptr;
3140 size_t len;
3141 size_t line_len;
3142 size_t sep_len;
3144 memset (opts, '\0', sizeof (opts));
3146 /* Add -march= option. */
3147 if (arch)
3149 opts[num][0] = "-march=";
3150 opts[num++][1] = arch;
3153 /* Add -mtune= option. */
3154 if (tune)
3156 opts[num][0] = "-mtune=";
3157 opts[num++][1] = tune;
3160 /* Pick out the options in isa options. */
3161 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
3163 if ((isa & isa_opts[i].mask) != 0)
3165 opts[num++][0] = isa_opts[i].option;
3166 isa &= ~ isa_opts[i].mask;
3170 if (isa && add_nl_p)
3172 opts[num++][0] = isa_other;
3173 sprintf (isa_other, "(other isa: %#x)", isa);
3176 /* Add flag options. */
3177 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
3179 if ((flags & flag_opts[i].mask) != 0)
3181 opts[num++][0] = flag_opts[i].option;
3182 flags &= ~ flag_opts[i].mask;
3186 if (flags && add_nl_p)
3188 opts[num++][0] = target_other;
3189 sprintf (target_other, "(other flags: %#x)", flags);
3192 /* Add -fpmath= option. */
3193 if (fpmath)
3195 opts[num][0] = "-mfpmath=";
3196 opts[num++][1] = fpmath;
3199 /* Any options? */
3200 if (num == 0)
3201 return NULL;
3203 gcc_assert (num < ARRAY_SIZE (opts));
3205 /* Size the string. */
3206 len = 0;
3207 sep_len = (add_nl_p) ? 3 : 1;
3208 for (i = 0; i < num; i++)
3210 len += sep_len;
3211 for (j = 0; j < 2; j++)
3212 if (opts[i][j])
3213 len += strlen (opts[i][j]);
3216 /* Build the string. */
3217 ret = ptr = (char *) xmalloc (len);
3218 line_len = 0;
3220 for (i = 0; i < num; i++)
3222 size_t len2[2];
3224 for (j = 0; j < 2; j++)
3225 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
3227 if (i != 0)
3229 *ptr++ = ' ';
3230 line_len++;
3232 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
3234 *ptr++ = '\\';
3235 *ptr++ = '\n';
3236 line_len = 0;
3240 for (j = 0; j < 2; j++)
3241 if (opts[i][j])
3243 memcpy (ptr, opts[i][j], len2[j]);
3244 ptr += len2[j];
3245 line_len += len2[j];
3249 *ptr = '\0';
3250 gcc_assert (ret + len >= ptr);
3252 return ret;
3255 /* Return TRUE if software prefetching is beneficial for the
3256 given CPU. */
3258 static bool
3259 software_prefetching_beneficial_p (void)
3261 switch (ix86_tune)
3263 case PROCESSOR_GEODE:
3264 case PROCESSOR_K6:
3265 case PROCESSOR_ATHLON:
3266 case PROCESSOR_K8:
3267 case PROCESSOR_AMDFAM10:
3268 case PROCESSOR_BTVER1:
3269 return true;
3271 default:
3272 return false;
3276 /* Return true, if profiling code should be emitted before
3277 prologue. Otherwise it returns false.
3278 Note: For x86 with "hotfix" it is sorried. */
3279 static bool
3280 ix86_profile_before_prologue (void)
3282 return flag_fentry != 0;
3285 /* Function that is callable from the debugger to print the current
3286 options. */
3287 void
3288 ix86_debug_options (void)
3290 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
3291 ix86_arch_string, ix86_tune_string,
3292 ix86_fpmath_string, true);
3294 if (opts)
3296 fprintf (stderr, "%s\n\n", opts);
3297 free (opts);
3299 else
3300 fputs ("<no options>\n\n", stderr);
3302 return;
3305 /* Override various settings based on options. If MAIN_ARGS_P, the
3306 options are from the command line, otherwise they are from
3307 attributes. */
3309 static void
3310 ix86_option_override_internal (bool main_args_p)
3312 int i;
3313 unsigned int ix86_arch_mask, ix86_tune_mask;
3314 const bool ix86_tune_specified = (ix86_tune_string != NULL);
3315 const char *prefix;
3316 const char *suffix;
3317 const char *sw;
3319 /* Comes from final.c -- no real reason to change it. */
3320 #define MAX_CODE_ALIGN 16
3322 enum pta_flags
3324 PTA_SSE = 1 << 0,
3325 PTA_SSE2 = 1 << 1,
3326 PTA_SSE3 = 1 << 2,
3327 PTA_MMX = 1 << 3,
3328 PTA_PREFETCH_SSE = 1 << 4,
3329 PTA_3DNOW = 1 << 5,
3330 PTA_3DNOW_A = 1 << 6,
3331 PTA_64BIT = 1 << 7,
3332 PTA_SSSE3 = 1 << 8,
3333 PTA_CX16 = 1 << 9,
3334 PTA_POPCNT = 1 << 10,
3335 PTA_ABM = 1 << 11,
3336 PTA_SSE4A = 1 << 12,
3337 PTA_NO_SAHF = 1 << 13,
3338 PTA_SSE4_1 = 1 << 14,
3339 PTA_SSE4_2 = 1 << 15,
3340 PTA_AES = 1 << 16,
3341 PTA_PCLMUL = 1 << 17,
3342 PTA_AVX = 1 << 18,
3343 PTA_FMA = 1 << 19,
3344 PTA_MOVBE = 1 << 20,
3345 PTA_FMA4 = 1 << 21,
3346 PTA_XOP = 1 << 22,
3347 PTA_LWP = 1 << 23,
3348 PTA_FSGSBASE = 1 << 24,
3349 PTA_RDRND = 1 << 25,
3350 PTA_F16C = 1 << 26,
3351 PTA_BMI = 1 << 27,
3352 PTA_TBM = 1 << 28
3353 /* if this reaches 32, need to widen struct pta flags below */
3356 static struct pta
3358 const char *const name; /* processor name or nickname. */
3359 const enum processor_type processor;
3360 const enum attr_cpu schedule;
3361 const unsigned /*enum pta_flags*/ flags;
3363 const processor_alias_table[] =
3365 {"i386", PROCESSOR_I386, CPU_NONE, 0},
3366 {"i486", PROCESSOR_I486, CPU_NONE, 0},
3367 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3368 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3369 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
3370 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
3371 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
3372 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
3373 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_SSE},
3374 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3375 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3376 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX},
3377 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3378 PTA_MMX | PTA_SSE},
3379 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3380 PTA_MMX | PTA_SSE},
3381 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3382 PTA_MMX | PTA_SSE | PTA_SSE2},
3383 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
3384 PTA_MMX |PTA_SSE | PTA_SSE2},
3385 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
3386 PTA_MMX | PTA_SSE | PTA_SSE2},
3387 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
3388 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3},
3389 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
3390 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3391 | PTA_CX16 | PTA_NO_SAHF},
3392 {"core2", PROCESSOR_CORE2_64, CPU_CORE2,
3393 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3394 | PTA_SSSE3 | PTA_CX16},
3395 {"corei7", PROCESSOR_COREI7_64, CPU_COREI7,
3396 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3397 | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_CX16},
3398 {"corei7-avx", PROCESSOR_COREI7_64, CPU_COREI7,
3399 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3400 | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AVX
3401 | PTA_CX16 | PTA_POPCNT | PTA_AES | PTA_PCLMUL},
3402 {"atom", PROCESSOR_ATOM, CPU_ATOM,
3403 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3404 | PTA_SSSE3 | PTA_CX16 | PTA_MOVBE},
3405 {"geode", PROCESSOR_GEODE, CPU_GEODE,
3406 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A |PTA_PREFETCH_SSE},
3407 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
3408 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
3409 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
3410 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
3411 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
3412 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
3413 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
3414 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
3415 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
3416 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
3417 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
3418 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
3419 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
3420 {"x86-64", PROCESSOR_K8, CPU_K8,
3421 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF},
3422 {"k8", PROCESSOR_K8, CPU_K8,
3423 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3424 | PTA_SSE2 | PTA_NO_SAHF},
3425 {"k8-sse3", PROCESSOR_K8, CPU_K8,
3426 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3427 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
3428 {"opteron", PROCESSOR_K8, CPU_K8,
3429 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3430 | PTA_SSE2 | PTA_NO_SAHF},
3431 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
3432 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3433 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
3434 {"athlon64", PROCESSOR_K8, CPU_K8,
3435 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3436 | PTA_SSE2 | PTA_NO_SAHF},
3437 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
3438 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3439 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
3440 {"athlon-fx", PROCESSOR_K8, CPU_K8,
3441 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3442 | PTA_SSE2 | PTA_NO_SAHF},
3443 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3444 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3445 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
3446 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3447 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3448 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
3449 {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
3450 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3451 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3452 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3453 | PTA_XOP | PTA_LWP},
3454 {"btver1", PROCESSOR_BTVER1, CPU_GENERIC64,
3455 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3456 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16},
3457 {"generic32", PROCESSOR_GENERIC32, CPU_PENTIUMPRO,
3458 0 /* flags are only used for -march switch. */ },
3459 {"generic64", PROCESSOR_GENERIC64, CPU_GENERIC64,
3460 PTA_64BIT /* flags are only used for -march switch. */ },
3463 int const pta_size = ARRAY_SIZE (processor_alias_table);
3465 /* Set up prefix/suffix so the error messages refer to either the command
3466 line argument, or the attribute(target). */
3467 if (main_args_p)
3469 prefix = "-m";
3470 suffix = "";
3471 sw = "switch";
3473 else
3475 prefix = "option(\"";
3476 suffix = "\")";
3477 sw = "attribute";
3480 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3481 SUBTARGET_OVERRIDE_OPTIONS;
3482 #endif
3484 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3485 SUBSUBTARGET_OVERRIDE_OPTIONS;
3486 #endif
3488 /* -fPIC is the default for x86_64. */
3489 if (TARGET_MACHO && TARGET_64BIT)
3490 flag_pic = 2;
3492 /* Need to check -mtune=generic first. */
3493 if (ix86_tune_string)
3495 if (!strcmp (ix86_tune_string, "generic")
3496 || !strcmp (ix86_tune_string, "i686")
3497 /* As special support for cross compilers we read -mtune=native
3498 as -mtune=generic. With native compilers we won't see the
3499 -mtune=native, as it was changed by the driver. */
3500 || !strcmp (ix86_tune_string, "native"))
3502 if (TARGET_64BIT)
3503 ix86_tune_string = "generic64";
3504 else
3505 ix86_tune_string = "generic32";
3507 /* If this call is for setting the option attribute, allow the
3508 generic32/generic64 that was previously set. */
3509 else if (!main_args_p
3510 && (!strcmp (ix86_tune_string, "generic32")
3511 || !strcmp (ix86_tune_string, "generic64")))
3513 else if (!strncmp (ix86_tune_string, "generic", 7))
3514 error ("bad value (%s) for %stune=%s %s",
3515 ix86_tune_string, prefix, suffix, sw);
3516 else if (!strcmp (ix86_tune_string, "x86-64"))
3517 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated; use "
3518 "%stune=k8%s or %stune=generic%s instead as appropriate",
3519 prefix, suffix, prefix, suffix, prefix, suffix);
3521 else
3523 if (ix86_arch_string)
3524 ix86_tune_string = ix86_arch_string;
3525 if (!ix86_tune_string)
3527 ix86_tune_string = cpu_names[TARGET_CPU_DEFAULT];
3528 ix86_tune_defaulted = 1;
3531 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
3532 need to use a sensible tune option. */
3533 if (!strcmp (ix86_tune_string, "generic")
3534 || !strcmp (ix86_tune_string, "x86-64")
3535 || !strcmp (ix86_tune_string, "i686"))
3537 if (TARGET_64BIT)
3538 ix86_tune_string = "generic64";
3539 else
3540 ix86_tune_string = "generic32";
3544 if (ix86_stringop_string)
3546 if (!strcmp (ix86_stringop_string, "rep_byte"))
3547 stringop_alg = rep_prefix_1_byte;
3548 else if (!strcmp (ix86_stringop_string, "libcall"))
3549 stringop_alg = libcall;
3550 else if (!strcmp (ix86_stringop_string, "rep_4byte"))
3551 stringop_alg = rep_prefix_4_byte;
3552 else if (!strcmp (ix86_stringop_string, "rep_8byte")
3553 && TARGET_64BIT)
3554 /* rep; movq isn't available in 32-bit code. */
3555 stringop_alg = rep_prefix_8_byte;
3556 else if (!strcmp (ix86_stringop_string, "byte_loop"))
3557 stringop_alg = loop_1_byte;
3558 else if (!strcmp (ix86_stringop_string, "loop"))
3559 stringop_alg = loop;
3560 else if (!strcmp (ix86_stringop_string, "unrolled_loop"))
3561 stringop_alg = unrolled_loop;
3562 else
3563 error ("bad value (%s) for %sstringop-strategy=%s %s",
3564 ix86_stringop_string, prefix, suffix, sw);
3567 if (!ix86_arch_string)
3568 ix86_arch_string = TARGET_64BIT ? "x86-64" : SUBTARGET32_DEFAULT_CPU;
3569 else
3570 ix86_arch_specified = 1;
3572 /* Validate -mabi= value. */
3573 if (ix86_abi_string)
3575 if (strcmp (ix86_abi_string, "sysv") == 0)
3576 ix86_abi = SYSV_ABI;
3577 else if (strcmp (ix86_abi_string, "ms") == 0)
3578 ix86_abi = MS_ABI;
3579 else
3580 error ("unknown ABI (%s) for %sabi=%s %s",
3581 ix86_abi_string, prefix, suffix, sw);
3583 else
3584 ix86_abi = DEFAULT_ABI;
3586 if (ix86_cmodel_string != 0)
3588 if (!strcmp (ix86_cmodel_string, "small"))
3589 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
3590 else if (!strcmp (ix86_cmodel_string, "medium"))
3591 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
3592 else if (!strcmp (ix86_cmodel_string, "large"))
3593 ix86_cmodel = flag_pic ? CM_LARGE_PIC : CM_LARGE;
3594 else if (flag_pic)
3595 error ("code model %s does not support PIC mode", ix86_cmodel_string);
3596 else if (!strcmp (ix86_cmodel_string, "32"))
3597 ix86_cmodel = CM_32;
3598 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
3599 ix86_cmodel = CM_KERNEL;
3600 else
3601 error ("bad value (%s) for %scmodel=%s %s",
3602 ix86_cmodel_string, prefix, suffix, sw);
3604 else
3606 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3607 use of rip-relative addressing. This eliminates fixups that
3608 would otherwise be needed if this object is to be placed in a
3609 DLL, and is essentially just as efficient as direct addressing. */
3610 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
3611 ix86_cmodel = CM_SMALL_PIC, flag_pic = 1;
3612 else if (TARGET_64BIT)
3613 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
3614 else
3615 ix86_cmodel = CM_32;
3617 if (ix86_asm_string != 0)
3619 if (! TARGET_MACHO
3620 && !strcmp (ix86_asm_string, "intel"))
3621 ix86_asm_dialect = ASM_INTEL;
3622 else if (!strcmp (ix86_asm_string, "att"))
3623 ix86_asm_dialect = ASM_ATT;
3624 else
3625 error ("bad value (%s) for %sasm=%s %s",
3626 ix86_asm_string, prefix, suffix, sw);
3628 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
3629 error ("code model %qs not supported in the %s bit mode",
3630 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
3631 if ((TARGET_64BIT != 0) != ((ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
3632 sorry ("%i-bit mode not compiled in",
3633 (ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
3635 for (i = 0; i < pta_size; i++)
3636 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
3638 ix86_schedule = processor_alias_table[i].schedule;
3639 ix86_arch = processor_alias_table[i].processor;
3640 /* Default cpu tuning to the architecture. */
3641 ix86_tune = ix86_arch;
3643 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
3644 error ("CPU you selected does not support x86-64 "
3645 "instruction set");
3647 if (processor_alias_table[i].flags & PTA_MMX
3648 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
3649 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
3650 if (processor_alias_table[i].flags & PTA_3DNOW
3651 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
3652 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
3653 if (processor_alias_table[i].flags & PTA_3DNOW_A
3654 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
3655 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
3656 if (processor_alias_table[i].flags & PTA_SSE
3657 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
3658 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
3659 if (processor_alias_table[i].flags & PTA_SSE2
3660 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
3661 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
3662 if (processor_alias_table[i].flags & PTA_SSE3
3663 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
3664 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
3665 if (processor_alias_table[i].flags & PTA_SSSE3
3666 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
3667 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
3668 if (processor_alias_table[i].flags & PTA_SSE4_1
3669 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
3670 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
3671 if (processor_alias_table[i].flags & PTA_SSE4_2
3672 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
3673 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
3674 if (processor_alias_table[i].flags & PTA_AVX
3675 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
3676 ix86_isa_flags |= OPTION_MASK_ISA_AVX;
3677 if (processor_alias_table[i].flags & PTA_FMA
3678 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
3679 ix86_isa_flags |= OPTION_MASK_ISA_FMA;
3680 if (processor_alias_table[i].flags & PTA_SSE4A
3681 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
3682 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
3683 if (processor_alias_table[i].flags & PTA_FMA4
3684 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
3685 ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
3686 if (processor_alias_table[i].flags & PTA_XOP
3687 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
3688 ix86_isa_flags |= OPTION_MASK_ISA_XOP;
3689 if (processor_alias_table[i].flags & PTA_LWP
3690 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
3691 ix86_isa_flags |= OPTION_MASK_ISA_LWP;
3692 if (processor_alias_table[i].flags & PTA_ABM
3693 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
3694 ix86_isa_flags |= OPTION_MASK_ISA_ABM;
3695 if (processor_alias_table[i].flags & PTA_BMI
3696 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI))
3697 ix86_isa_flags |= OPTION_MASK_ISA_BMI;
3698 if (processor_alias_table[i].flags & PTA_TBM
3699 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_TBM))
3700 ix86_isa_flags |= OPTION_MASK_ISA_TBM;
3701 if (processor_alias_table[i].flags & PTA_CX16
3702 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
3703 ix86_isa_flags |= OPTION_MASK_ISA_CX16;
3704 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
3705 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
3706 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
3707 if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF))
3708 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
3709 ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
3710 if (processor_alias_table[i].flags & PTA_MOVBE
3711 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
3712 ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
3713 if (processor_alias_table[i].flags & PTA_AES
3714 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
3715 ix86_isa_flags |= OPTION_MASK_ISA_AES;
3716 if (processor_alias_table[i].flags & PTA_PCLMUL
3717 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
3718 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
3719 if (processor_alias_table[i].flags & PTA_FSGSBASE
3720 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE))
3721 ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE;
3722 if (processor_alias_table[i].flags & PTA_RDRND
3723 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND))
3724 ix86_isa_flags |= OPTION_MASK_ISA_RDRND;
3725 if (processor_alias_table[i].flags & PTA_F16C
3726 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C))
3727 ix86_isa_flags |= OPTION_MASK_ISA_F16C;
3728 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
3729 x86_prefetch_sse = true;
3731 break;
3734 if (!strcmp (ix86_arch_string, "generic"))
3735 error ("generic CPU can be used only for %stune=%s %s",
3736 prefix, suffix, sw);
3737 else if (!strncmp (ix86_arch_string, "generic", 7) || i == pta_size)
3738 error ("bad value (%s) for %sarch=%s %s",
3739 ix86_arch_string, prefix, suffix, sw);
3741 ix86_arch_mask = 1u << ix86_arch;
3742 for (i = 0; i < X86_ARCH_LAST; ++i)
3743 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3745 for (i = 0; i < pta_size; i++)
3746 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
3748 ix86_schedule = processor_alias_table[i].schedule;
3749 ix86_tune = processor_alias_table[i].processor;
3750 if (TARGET_64BIT)
3752 if (!(processor_alias_table[i].flags & PTA_64BIT))
3754 if (ix86_tune_defaulted)
3756 ix86_tune_string = "x86-64";
3757 for (i = 0; i < pta_size; i++)
3758 if (! strcmp (ix86_tune_string,
3759 processor_alias_table[i].name))
3760 break;
3761 ix86_schedule = processor_alias_table[i].schedule;
3762 ix86_tune = processor_alias_table[i].processor;
3764 else
3765 error ("CPU you selected does not support x86-64 "
3766 "instruction set");
3769 else
3771 /* Adjust tuning when compiling for 32-bit ABI. */
3772 switch (ix86_tune)
3774 case PROCESSOR_GENERIC64:
3775 ix86_tune = PROCESSOR_GENERIC32;
3776 ix86_schedule = CPU_PENTIUMPRO;
3777 break;
3779 case PROCESSOR_CORE2_64:
3780 ix86_tune = PROCESSOR_CORE2_32;
3781 break;
3783 case PROCESSOR_COREI7_64:
3784 ix86_tune = PROCESSOR_COREI7_32;
3785 break;
3787 default:
3788 break;
3791 /* Intel CPUs have always interpreted SSE prefetch instructions as
3792 NOPs; so, we can enable SSE prefetch instructions even when
3793 -mtune (rather than -march) points us to a processor that has them.
3794 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3795 higher processors. */
3796 if (TARGET_CMOVE
3797 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
3798 x86_prefetch_sse = true;
3799 break;
3802 if (ix86_tune_specified && i == pta_size)
3803 error ("bad value (%s) for %stune=%s %s",
3804 ix86_tune_string, prefix, suffix, sw);
3806 ix86_tune_mask = 1u << ix86_tune;
3807 for (i = 0; i < X86_TUNE_LAST; ++i)
3808 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3810 #ifndef USE_IX86_FRAME_POINTER
3811 #define USE_IX86_FRAME_POINTER 0
3812 #endif
3814 #ifndef USE_X86_64_FRAME_POINTER
3815 #define USE_X86_64_FRAME_POINTER 0
3816 #endif
3818 /* Set the default values for switches whose default depends on TARGET_64BIT
3819 in case they weren't overwritten by command line options. */
3820 if (TARGET_64BIT)
3822 if (optimize > 1 && !global_options_set.x_flag_zee)
3823 flag_zee = 1;
3824 if (optimize >= 1 && !global_options_set.x_flag_omit_frame_pointer)
3825 flag_omit_frame_pointer = !USE_X86_64_FRAME_POINTER;
3826 if (flag_asynchronous_unwind_tables == 2)
3827 flag_unwind_tables = flag_asynchronous_unwind_tables = 1;
3828 if (flag_pcc_struct_return == 2)
3829 flag_pcc_struct_return = 0;
3831 else
3833 if (optimize >= 1 && !global_options_set.x_flag_omit_frame_pointer)
3834 flag_omit_frame_pointer = !(USE_IX86_FRAME_POINTER || optimize_size);
3835 if (flag_asynchronous_unwind_tables == 2)
3836 flag_asynchronous_unwind_tables = !USE_IX86_FRAME_POINTER;
3837 if (flag_pcc_struct_return == 2)
3838 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
3841 if (optimize_size)
3842 ix86_cost = &ix86_size_cost;
3843 else
3844 ix86_cost = processor_target_table[ix86_tune].cost;
3846 /* Arrange to set up i386_stack_locals for all functions. */
3847 init_machine_status = ix86_init_machine_status;
3849 /* Validate -mregparm= value. */
3850 if (ix86_regparm_string)
3852 if (TARGET_64BIT)
3853 warning (0, "%sregparm%s is ignored in 64-bit mode", prefix, suffix);
3854 i = atoi (ix86_regparm_string);
3855 if (i < 0 || i > REGPARM_MAX)
3856 error ("%sregparm=%d%s is not between 0 and %d",
3857 prefix, i, suffix, REGPARM_MAX);
3858 else
3859 ix86_regparm = i;
3861 if (TARGET_64BIT)
3862 ix86_regparm = REGPARM_MAX;
3864 /* If the user has provided any of the -malign-* options,
3865 warn and use that value only if -falign-* is not set.
3866 Remove this code in GCC 3.2 or later. */
3867 if (ix86_align_loops_string)
3869 warning (0, "%salign-loops%s is obsolete, use -falign-loops%s",
3870 prefix, suffix, suffix);
3871 if (align_loops == 0)
3873 i = atoi (ix86_align_loops_string);
3874 if (i < 0 || i > MAX_CODE_ALIGN)
3875 error ("%salign-loops=%d%s is not between 0 and %d",
3876 prefix, i, suffix, MAX_CODE_ALIGN);
3877 else
3878 align_loops = 1 << i;
3882 if (ix86_align_jumps_string)
3884 warning (0, "%salign-jumps%s is obsolete, use -falign-jumps%s",
3885 prefix, suffix, suffix);
3886 if (align_jumps == 0)
3888 i = atoi (ix86_align_jumps_string);
3889 if (i < 0 || i > MAX_CODE_ALIGN)
3890 error ("%salign-loops=%d%s is not between 0 and %d",
3891 prefix, i, suffix, MAX_CODE_ALIGN);
3892 else
3893 align_jumps = 1 << i;
3897 if (ix86_align_funcs_string)
3899 warning (0, "%salign-functions%s is obsolete, use -falign-functions%s",
3900 prefix, suffix, suffix);
3901 if (align_functions == 0)
3903 i = atoi (ix86_align_funcs_string);
3904 if (i < 0 || i > MAX_CODE_ALIGN)
3905 error ("%salign-loops=%d%s is not between 0 and %d",
3906 prefix, i, suffix, MAX_CODE_ALIGN);
3907 else
3908 align_functions = 1 << i;
3912 /* Default align_* from the processor table. */
3913 if (align_loops == 0)
3915 align_loops = processor_target_table[ix86_tune].align_loop;
3916 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3918 if (align_jumps == 0)
3920 align_jumps = processor_target_table[ix86_tune].align_jump;
3921 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3923 if (align_functions == 0)
3925 align_functions = processor_target_table[ix86_tune].align_func;
3928 /* Validate -mbranch-cost= value, or provide default. */
3929 ix86_branch_cost = ix86_cost->branch_cost;
3930 if (ix86_branch_cost_string)
3932 i = atoi (ix86_branch_cost_string);
3933 if (i < 0 || i > 5)
3934 error ("%sbranch-cost=%d%s is not between 0 and 5", prefix, i, suffix);
3935 else
3936 ix86_branch_cost = i;
3938 if (ix86_section_threshold_string)
3940 i = atoi (ix86_section_threshold_string);
3941 if (i < 0)
3942 error ("%slarge-data-threshold=%d%s is negative", prefix, i, suffix);
3943 else
3944 ix86_section_threshold = i;
3947 if (ix86_tls_dialect_string)
3949 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
3950 ix86_tls_dialect = TLS_DIALECT_GNU;
3951 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
3952 ix86_tls_dialect = TLS_DIALECT_GNU2;
3953 else
3954 error ("bad value (%s) for %stls-dialect=%s %s",
3955 ix86_tls_dialect_string, prefix, suffix, sw);
3958 if (ix87_precision_string)
3960 i = atoi (ix87_precision_string);
3961 if (i != 32 && i != 64 && i != 80)
3962 error ("pc%d is not valid precision setting (32, 64 or 80)", i);
3965 if (TARGET_64BIT)
3967 target_flags |= TARGET_SUBTARGET64_DEFAULT & ~target_flags_explicit;
3969 /* Enable by default the SSE and MMX builtins. Do allow the user to
3970 explicitly disable any of these. In particular, disabling SSE and
3971 MMX for kernel code is extremely useful. */
3972 if (!ix86_arch_specified)
3973 ix86_isa_flags
3974 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3975 | TARGET_SUBTARGET64_ISA_DEFAULT) & ~ix86_isa_flags_explicit);
3977 if (TARGET_RTD)
3978 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3980 else
3982 target_flags |= TARGET_SUBTARGET32_DEFAULT & ~target_flags_explicit;
3984 if (!ix86_arch_specified)
3985 ix86_isa_flags
3986 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~ix86_isa_flags_explicit;
3988 /* i386 ABI does not specify red zone. It still makes sense to use it
3989 when programmer takes care to stack from being destroyed. */
3990 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
3991 target_flags |= MASK_NO_RED_ZONE;
3994 /* Keep nonleaf frame pointers. */
3995 if (flag_omit_frame_pointer)
3996 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3997 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
3998 flag_omit_frame_pointer = 1;
4000 /* If we're doing fast math, we don't care about comparison order
4001 wrt NaNs. This lets us use a shorter comparison sequence. */
4002 if (flag_finite_math_only)
4003 target_flags &= ~MASK_IEEE_FP;
4005 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
4006 since the insns won't need emulation. */
4007 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
4008 target_flags &= ~MASK_NO_FANCY_MATH_387;
4010 /* Likewise, if the target doesn't have a 387, or we've specified
4011 software floating point, don't use 387 inline intrinsics. */
4012 if (!TARGET_80387)
4013 target_flags |= MASK_NO_FANCY_MATH_387;
4015 /* Turn on MMX builtins for -msse. */
4016 if (TARGET_SSE)
4018 ix86_isa_flags |= OPTION_MASK_ISA_MMX & ~ix86_isa_flags_explicit;
4019 x86_prefetch_sse = true;
4022 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
4023 if (TARGET_SSE4_2 || TARGET_ABM)
4024 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT & ~ix86_isa_flags_explicit;
4026 /* Validate -mpreferred-stack-boundary= value or default it to
4027 PREFERRED_STACK_BOUNDARY_DEFAULT. */
4028 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
4029 if (ix86_preferred_stack_boundary_string)
4031 int min = (TARGET_64BIT ? 4 : 2);
4032 int max = (TARGET_SEH ? 4 : 12);
4034 i = atoi (ix86_preferred_stack_boundary_string);
4035 if (i < min || i > max)
4037 if (min == max)
4038 error ("%spreferred-stack-boundary%s is not supported "
4039 "for this target", prefix, suffix);
4040 else
4041 error ("%spreferred-stack-boundary=%d%s is not between %d and %d",
4042 prefix, i, suffix, min, max);
4044 else
4045 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
4048 /* Set the default value for -mstackrealign. */
4049 if (ix86_force_align_arg_pointer == -1)
4050 ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
4052 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
4054 /* Validate -mincoming-stack-boundary= value or default it to
4055 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
4056 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
4057 if (ix86_incoming_stack_boundary_string)
4059 i = atoi (ix86_incoming_stack_boundary_string);
4060 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
4061 error ("-mincoming-stack-boundary=%d is not between %d and 12",
4062 i, TARGET_64BIT ? 4 : 2);
4063 else
4065 ix86_user_incoming_stack_boundary = (1 << i) * BITS_PER_UNIT;
4066 ix86_incoming_stack_boundary
4067 = ix86_user_incoming_stack_boundary;
4071 /* Accept -msseregparm only if at least SSE support is enabled. */
4072 if (TARGET_SSEREGPARM
4073 && ! TARGET_SSE)
4074 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
4076 ix86_fpmath = TARGET_FPMATH_DEFAULT;
4077 if (ix86_fpmath_string != 0)
4079 if (! strcmp (ix86_fpmath_string, "387"))
4080 ix86_fpmath = FPMATH_387;
4081 else if (! strcmp (ix86_fpmath_string, "sse"))
4083 if (!TARGET_SSE)
4085 warning (0, "SSE instruction set disabled, using 387 arithmetics");
4086 ix86_fpmath = FPMATH_387;
4088 else
4089 ix86_fpmath = FPMATH_SSE;
4091 else if (! strcmp (ix86_fpmath_string, "387,sse")
4092 || ! strcmp (ix86_fpmath_string, "387+sse")
4093 || ! strcmp (ix86_fpmath_string, "sse,387")
4094 || ! strcmp (ix86_fpmath_string, "sse+387")
4095 || ! strcmp (ix86_fpmath_string, "both"))
4097 if (!TARGET_SSE)
4099 warning (0, "SSE instruction set disabled, using 387 arithmetics");
4100 ix86_fpmath = FPMATH_387;
4102 else if (!TARGET_80387)
4104 warning (0, "387 instruction set disabled, using SSE arithmetics");
4105 ix86_fpmath = FPMATH_SSE;
4107 else
4108 ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
4110 else
4111 error ("bad value (%s) for %sfpmath=%s %s",
4112 ix86_fpmath_string, prefix, suffix, sw);
4115 /* If the i387 is disabled, then do not return values in it. */
4116 if (!TARGET_80387)
4117 target_flags &= ~MASK_FLOAT_RETURNS;
4119 /* Use external vectorized library in vectorizing intrinsics. */
4120 if (ix86_veclibabi_string)
4122 if (strcmp (ix86_veclibabi_string, "svml") == 0)
4123 ix86_veclib_handler = ix86_veclibabi_svml;
4124 else if (strcmp (ix86_veclibabi_string, "acml") == 0)
4125 ix86_veclib_handler = ix86_veclibabi_acml;
4126 else
4127 error ("unknown vectorization library ABI type (%s) for "
4128 "%sveclibabi=%s %s", ix86_veclibabi_string,
4129 prefix, suffix, sw);
4132 if ((!USE_IX86_FRAME_POINTER
4133 || (x86_accumulate_outgoing_args & ix86_tune_mask))
4134 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
4135 && !optimize_size)
4136 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4138 /* ??? Unwind info is not correct around the CFG unless either a frame
4139 pointer is present or M_A_O_A is set. Fixing this requires rewriting
4140 unwind info generation to be aware of the CFG and propagating states
4141 around edges. */
4142 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
4143 || flag_exceptions || flag_non_call_exceptions)
4144 && flag_omit_frame_pointer
4145 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
4147 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
4148 warning (0, "unwind tables currently require either a frame pointer "
4149 "or %saccumulate-outgoing-args%s for correctness",
4150 prefix, suffix);
4151 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4154 /* If stack probes are required, the space used for large function
4155 arguments on the stack must also be probed, so enable
4156 -maccumulate-outgoing-args so this happens in the prologue. */
4157 if (TARGET_STACK_PROBE
4158 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
4160 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
4161 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
4162 "for correctness", prefix, suffix);
4163 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4166 /* For sane SSE instruction set generation we need fcomi instruction.
4167 It is safe to enable all CMOVE instructions. */
4168 if (TARGET_SSE)
4169 TARGET_CMOVE = 1;
4171 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
4173 char *p;
4174 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
4175 p = strchr (internal_label_prefix, 'X');
4176 internal_label_prefix_len = p - internal_label_prefix;
4177 *p = '\0';
4180 /* When scheduling description is not available, disable scheduler pass
4181 so it won't slow down the compilation and make x87 code slower. */
4182 if (!TARGET_SCHEDULE)
4183 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
4185 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
4186 ix86_cost->simultaneous_prefetches,
4187 global_options.x_param_values,
4188 global_options_set.x_param_values);
4189 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE, ix86_cost->prefetch_block,
4190 global_options.x_param_values,
4191 global_options_set.x_param_values);
4192 maybe_set_param_value (PARAM_L1_CACHE_SIZE, ix86_cost->l1_cache_size,
4193 global_options.x_param_values,
4194 global_options_set.x_param_values);
4195 maybe_set_param_value (PARAM_L2_CACHE_SIZE, ix86_cost->l2_cache_size,
4196 global_options.x_param_values,
4197 global_options_set.x_param_values);
4199 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
4200 if (flag_prefetch_loop_arrays < 0
4201 && HAVE_prefetch
4202 && optimize >= 3
4203 && software_prefetching_beneficial_p ())
4204 flag_prefetch_loop_arrays = 1;
4206 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
4207 can be optimized to ap = __builtin_next_arg (0). */
4208 if (!TARGET_64BIT && !flag_split_stack)
4209 targetm.expand_builtin_va_start = NULL;
4211 if (TARGET_64BIT)
4213 ix86_gen_leave = gen_leave_rex64;
4214 ix86_gen_add3 = gen_adddi3;
4215 ix86_gen_sub3 = gen_subdi3;
4216 ix86_gen_sub3_carry = gen_subdi3_carry;
4217 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
4218 ix86_gen_monitor = gen_sse3_monitor64;
4219 ix86_gen_andsp = gen_anddi3;
4220 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_di;
4221 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi;
4222 ix86_gen_probe_stack_range = gen_probe_stack_rangedi;
4224 else
4226 ix86_gen_leave = gen_leave;
4227 ix86_gen_add3 = gen_addsi3;
4228 ix86_gen_sub3 = gen_subsi3;
4229 ix86_gen_sub3_carry = gen_subsi3_carry;
4230 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
4231 ix86_gen_monitor = gen_sse3_monitor;
4232 ix86_gen_andsp = gen_andsi3;
4233 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_si;
4234 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi;
4235 ix86_gen_probe_stack_range = gen_probe_stack_rangesi;
4238 #ifdef USE_IX86_CLD
4239 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
4240 if (!TARGET_64BIT)
4241 target_flags |= MASK_CLD & ~target_flags_explicit;
4242 #endif
4244 if (!TARGET_64BIT && flag_pic)
4246 if (flag_fentry > 0)
4247 sorry ("-mfentry isn%'t supported for 32-bit in combination "
4248 "with -fpic");
4249 flag_fentry = 0;
4251 else if (TARGET_SEH)
4253 if (flag_fentry == 0)
4254 sorry ("-mno-fentry isn%'t compatible with SEH");
4255 flag_fentry = 1;
4257 else if (flag_fentry < 0)
4259 #if defined(PROFILE_BEFORE_PROLOGUE)
4260 flag_fentry = 1;
4261 #else
4262 flag_fentry = 0;
4263 #endif
4266 /* Save the initial options in case the user does function specific options */
4267 if (main_args_p)
4268 target_option_default_node = target_option_current_node
4269 = build_target_option_node ();
4271 if (TARGET_AVX)
4273 /* When not optimize for size, enable vzeroupper optimization for
4274 TARGET_AVX with -fexpensive-optimizations. */
4275 if (!optimize_size
4276 && flag_expensive_optimizations
4277 && !(target_flags_explicit & MASK_VZEROUPPER))
4278 target_flags |= MASK_VZEROUPPER;
4280 else
4282 /* Disable vzeroupper pass if TARGET_AVX is disabled. */
4283 target_flags &= ~MASK_VZEROUPPER;
4287 /* Return TRUE if VAL is passed in register with 256bit AVX modes. */
4289 static bool
4290 function_pass_avx256_p (const_rtx val)
4292 if (!val)
4293 return false;
4295 if (REG_P (val) && VALID_AVX256_REG_MODE (GET_MODE (val)))
4296 return true;
4298 if (GET_CODE (val) == PARALLEL)
4300 int i;
4301 rtx r;
4303 for (i = XVECLEN (val, 0) - 1; i >= 0; i--)
4305 r = XVECEXP (val, 0, i);
4306 if (GET_CODE (r) == EXPR_LIST
4307 && XEXP (r, 0)
4308 && REG_P (XEXP (r, 0))
4309 && (GET_MODE (XEXP (r, 0)) == OImode
4310 || VALID_AVX256_REG_MODE (GET_MODE (XEXP (r, 0)))))
4311 return true;
4315 return false;
4318 /* Implement the TARGET_OPTION_OVERRIDE hook. */
4320 static void
4321 ix86_option_override (void)
4323 ix86_option_override_internal (true);
4326 /* Update register usage after having seen the compiler flags. */
4328 static void
4329 ix86_conditional_register_usage (void)
4331 int i;
4332 unsigned int j;
4334 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4336 if (fixed_regs[i] > 1)
4337 fixed_regs[i] = (fixed_regs[i] == (TARGET_64BIT ? 3 : 2));
4338 if (call_used_regs[i] > 1)
4339 call_used_regs[i] = (call_used_regs[i] == (TARGET_64BIT ? 3 : 2));
4342 /* The PIC register, if it exists, is fixed. */
4343 j = PIC_OFFSET_TABLE_REGNUM;
4344 if (j != INVALID_REGNUM)
4345 fixed_regs[j] = call_used_regs[j] = 1;
4347 /* The MS_ABI changes the set of call-used registers. */
4348 if (TARGET_64BIT && ix86_cfun_abi () == MS_ABI)
4350 call_used_regs[SI_REG] = 0;
4351 call_used_regs[DI_REG] = 0;
4352 call_used_regs[XMM6_REG] = 0;
4353 call_used_regs[XMM7_REG] = 0;
4354 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
4355 call_used_regs[i] = 0;
4358 /* The default setting of CLOBBERED_REGS is for 32-bit; add in the
4359 other call-clobbered regs for 64-bit. */
4360 if (TARGET_64BIT)
4362 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
4364 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4365 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
4366 && call_used_regs[i])
4367 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
4370 /* If MMX is disabled, squash the registers. */
4371 if (! TARGET_MMX)
4372 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4373 if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
4374 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4376 /* If SSE is disabled, squash the registers. */
4377 if (! TARGET_SSE)
4378 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4379 if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
4380 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4382 /* If the FPU is disabled, squash the registers. */
4383 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
4384 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4385 if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
4386 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4388 /* If 32-bit, squash the 64-bit registers. */
4389 if (! TARGET_64BIT)
4391 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
4392 reg_names[i] = "";
4393 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
4394 reg_names[i] = "";
4399 /* Save the current options */
4401 static void
4402 ix86_function_specific_save (struct cl_target_option *ptr)
4404 ptr->arch = ix86_arch;
4405 ptr->schedule = ix86_schedule;
4406 ptr->tune = ix86_tune;
4407 ptr->fpmath = ix86_fpmath;
4408 ptr->branch_cost = ix86_branch_cost;
4409 ptr->tune_defaulted = ix86_tune_defaulted;
4410 ptr->arch_specified = ix86_arch_specified;
4411 ptr->ix86_isa_flags_explicit = ix86_isa_flags_explicit;
4412 ptr->ix86_target_flags_explicit = target_flags_explicit;
4414 /* The fields are char but the variables are not; make sure the
4415 values fit in the fields. */
4416 gcc_assert (ptr->arch == ix86_arch);
4417 gcc_assert (ptr->schedule == ix86_schedule);
4418 gcc_assert (ptr->tune == ix86_tune);
4419 gcc_assert (ptr->fpmath == ix86_fpmath);
4420 gcc_assert (ptr->branch_cost == ix86_branch_cost);
4423 /* Restore the current options */
4425 static void
4426 ix86_function_specific_restore (struct cl_target_option *ptr)
4428 enum processor_type old_tune = ix86_tune;
4429 enum processor_type old_arch = ix86_arch;
4430 unsigned int ix86_arch_mask, ix86_tune_mask;
4431 int i;
4433 ix86_arch = (enum processor_type) ptr->arch;
4434 ix86_schedule = (enum attr_cpu) ptr->schedule;
4435 ix86_tune = (enum processor_type) ptr->tune;
4436 ix86_fpmath = (enum fpmath_unit) ptr->fpmath;
4437 ix86_branch_cost = ptr->branch_cost;
4438 ix86_tune_defaulted = ptr->tune_defaulted;
4439 ix86_arch_specified = ptr->arch_specified;
4440 ix86_isa_flags_explicit = ptr->ix86_isa_flags_explicit;
4441 target_flags_explicit = ptr->ix86_target_flags_explicit;
4443 /* Recreate the arch feature tests if the arch changed */
4444 if (old_arch != ix86_arch)
4446 ix86_arch_mask = 1u << ix86_arch;
4447 for (i = 0; i < X86_ARCH_LAST; ++i)
4448 ix86_arch_features[i]
4449 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
4452 /* Recreate the tune optimization tests */
4453 if (old_tune != ix86_tune)
4455 ix86_tune_mask = 1u << ix86_tune;
4456 for (i = 0; i < X86_TUNE_LAST; ++i)
4457 ix86_tune_features[i]
4458 = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
4462 /* Print the current options */
4464 static void
4465 ix86_function_specific_print (FILE *file, int indent,
4466 struct cl_target_option *ptr)
4468 char *target_string
4469 = ix86_target_string (ptr->x_ix86_isa_flags, ptr->x_target_flags,
4470 NULL, NULL, NULL, false);
4472 fprintf (file, "%*sarch = %d (%s)\n",
4473 indent, "",
4474 ptr->arch,
4475 ((ptr->arch < TARGET_CPU_DEFAULT_max)
4476 ? cpu_names[ptr->arch]
4477 : "<unknown>"));
4479 fprintf (file, "%*stune = %d (%s)\n",
4480 indent, "",
4481 ptr->tune,
4482 ((ptr->tune < TARGET_CPU_DEFAULT_max)
4483 ? cpu_names[ptr->tune]
4484 : "<unknown>"));
4486 fprintf (file, "%*sfpmath = %d%s%s\n", indent, "", ptr->fpmath,
4487 (ptr->fpmath & FPMATH_387) ? ", 387" : "",
4488 (ptr->fpmath & FPMATH_SSE) ? ", sse" : "");
4489 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
4491 if (target_string)
4493 fprintf (file, "%*s%s\n", indent, "", target_string);
4494 free (target_string);
4499 /* Inner function to process the attribute((target(...))), take an argument and
4500 set the current options from the argument. If we have a list, recursively go
4501 over the list. */
4503 static bool
4504 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[])
4506 char *next_optstr;
4507 bool ret = true;
4509 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
4510 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
4511 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
4512 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
4514 enum ix86_opt_type
4516 ix86_opt_unknown,
4517 ix86_opt_yes,
4518 ix86_opt_no,
4519 ix86_opt_str,
4520 ix86_opt_isa
4523 static const struct
4525 const char *string;
4526 size_t len;
4527 enum ix86_opt_type type;
4528 int opt;
4529 int mask;
4530 } attrs[] = {
4531 /* isa options */
4532 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
4533 IX86_ATTR_ISA ("abm", OPT_mabm),
4534 IX86_ATTR_ISA ("bmi", OPT_mbmi),
4535 IX86_ATTR_ISA ("tbm", OPT_mtbm),
4536 IX86_ATTR_ISA ("aes", OPT_maes),
4537 IX86_ATTR_ISA ("avx", OPT_mavx),
4538 IX86_ATTR_ISA ("mmx", OPT_mmmx),
4539 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
4540 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
4541 IX86_ATTR_ISA ("sse", OPT_msse),
4542 IX86_ATTR_ISA ("sse2", OPT_msse2),
4543 IX86_ATTR_ISA ("sse3", OPT_msse3),
4544 IX86_ATTR_ISA ("sse4", OPT_msse4),
4545 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
4546 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
4547 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
4548 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
4549 IX86_ATTR_ISA ("fma4", OPT_mfma4),
4550 IX86_ATTR_ISA ("xop", OPT_mxop),
4551 IX86_ATTR_ISA ("lwp", OPT_mlwp),
4552 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase),
4553 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd),
4554 IX86_ATTR_ISA ("f16c", OPT_mf16c),
4556 /* string options */
4557 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
4558 IX86_ATTR_STR ("fpmath=", IX86_FUNCTION_SPECIFIC_FPMATH),
4559 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
4561 /* flag options */
4562 IX86_ATTR_YES ("cld",
4563 OPT_mcld,
4564 MASK_CLD),
4566 IX86_ATTR_NO ("fancy-math-387",
4567 OPT_mfancy_math_387,
4568 MASK_NO_FANCY_MATH_387),
4570 IX86_ATTR_YES ("ieee-fp",
4571 OPT_mieee_fp,
4572 MASK_IEEE_FP),
4574 IX86_ATTR_YES ("inline-all-stringops",
4575 OPT_minline_all_stringops,
4576 MASK_INLINE_ALL_STRINGOPS),
4578 IX86_ATTR_YES ("inline-stringops-dynamically",
4579 OPT_minline_stringops_dynamically,
4580 MASK_INLINE_STRINGOPS_DYNAMICALLY),
4582 IX86_ATTR_NO ("align-stringops",
4583 OPT_mno_align_stringops,
4584 MASK_NO_ALIGN_STRINGOPS),
4586 IX86_ATTR_YES ("recip",
4587 OPT_mrecip,
4588 MASK_RECIP),
4592 /* If this is a list, recurse to get the options. */
4593 if (TREE_CODE (args) == TREE_LIST)
4595 bool ret = true;
4597 for (; args; args = TREE_CHAIN (args))
4598 if (TREE_VALUE (args)
4599 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args), p_strings))
4600 ret = false;
4602 return ret;
4605 else if (TREE_CODE (args) != STRING_CST)
4606 gcc_unreachable ();
4608 /* Handle multiple arguments separated by commas. */
4609 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
4611 while (next_optstr && *next_optstr != '\0')
4613 char *p = next_optstr;
4614 char *orig_p = p;
4615 char *comma = strchr (next_optstr, ',');
4616 const char *opt_string;
4617 size_t len, opt_len;
4618 int opt;
4619 bool opt_set_p;
4620 char ch;
4621 unsigned i;
4622 enum ix86_opt_type type = ix86_opt_unknown;
4623 int mask = 0;
4625 if (comma)
4627 *comma = '\0';
4628 len = comma - next_optstr;
4629 next_optstr = comma + 1;
4631 else
4633 len = strlen (p);
4634 next_optstr = NULL;
4637 /* Recognize no-xxx. */
4638 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
4640 opt_set_p = false;
4641 p += 3;
4642 len -= 3;
4644 else
4645 opt_set_p = true;
4647 /* Find the option. */
4648 ch = *p;
4649 opt = N_OPTS;
4650 for (i = 0; i < ARRAY_SIZE (attrs); i++)
4652 type = attrs[i].type;
4653 opt_len = attrs[i].len;
4654 if (ch == attrs[i].string[0]
4655 && ((type != ix86_opt_str) ? len == opt_len : len > opt_len)
4656 && memcmp (p, attrs[i].string, opt_len) == 0)
4658 opt = attrs[i].opt;
4659 mask = attrs[i].mask;
4660 opt_string = attrs[i].string;
4661 break;
4665 /* Process the option. */
4666 if (opt == N_OPTS)
4668 error ("attribute(target(\"%s\")) is unknown", orig_p);
4669 ret = false;
4672 else if (type == ix86_opt_isa)
4673 ix86_handle_option (opt, p, opt_set_p);
4675 else if (type == ix86_opt_yes || type == ix86_opt_no)
4677 if (type == ix86_opt_no)
4678 opt_set_p = !opt_set_p;
4680 if (opt_set_p)
4681 target_flags |= mask;
4682 else
4683 target_flags &= ~mask;
4686 else if (type == ix86_opt_str)
4688 if (p_strings[opt])
4690 error ("option(\"%s\") was already specified", opt_string);
4691 ret = false;
4693 else
4694 p_strings[opt] = xstrdup (p + opt_len);
4697 else
4698 gcc_unreachable ();
4701 return ret;
4704 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
4706 tree
4707 ix86_valid_target_attribute_tree (tree args)
4709 const char *orig_arch_string = ix86_arch_string;
4710 const char *orig_tune_string = ix86_tune_string;
4711 const char *orig_fpmath_string = ix86_fpmath_string;
4712 int orig_tune_defaulted = ix86_tune_defaulted;
4713 int orig_arch_specified = ix86_arch_specified;
4714 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL, NULL };
4715 tree t = NULL_TREE;
4716 int i;
4717 struct cl_target_option *def
4718 = TREE_TARGET_OPTION (target_option_default_node);
4720 /* Process each of the options on the chain. */
4721 if (! ix86_valid_target_attribute_inner_p (args, option_strings))
4722 return NULL_TREE;
4724 /* If the changed options are different from the default, rerun
4725 ix86_option_override_internal, and then save the options away.
4726 The string options are are attribute options, and will be undone
4727 when we copy the save structure. */
4728 if (ix86_isa_flags != def->x_ix86_isa_flags
4729 || target_flags != def->x_target_flags
4730 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
4731 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
4732 || option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
4734 /* If we are using the default tune= or arch=, undo the string assigned,
4735 and use the default. */
4736 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
4737 ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
4738 else if (!orig_arch_specified)
4739 ix86_arch_string = NULL;
4741 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
4742 ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
4743 else if (orig_tune_defaulted)
4744 ix86_tune_string = NULL;
4746 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
4747 if (option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
4748 ix86_fpmath_string = option_strings[IX86_FUNCTION_SPECIFIC_FPMATH];
4749 else if (!TARGET_64BIT && TARGET_SSE)
4750 ix86_fpmath_string = "sse,387";
4752 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
4753 ix86_option_override_internal (false);
4755 /* Add any builtin functions with the new isa if any. */
4756 ix86_add_new_builtins (ix86_isa_flags);
4758 /* Save the current options unless we are validating options for
4759 #pragma. */
4760 t = build_target_option_node ();
4762 ix86_arch_string = orig_arch_string;
4763 ix86_tune_string = orig_tune_string;
4764 ix86_fpmath_string = orig_fpmath_string;
4766 /* Free up memory allocated to hold the strings */
4767 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
4768 if (option_strings[i])
4769 free (option_strings[i]);
4772 return t;
4775 /* Hook to validate attribute((target("string"))). */
4777 static bool
4778 ix86_valid_target_attribute_p (tree fndecl,
4779 tree ARG_UNUSED (name),
4780 tree args,
4781 int ARG_UNUSED (flags))
4783 struct cl_target_option cur_target;
4784 bool ret = true;
4785 tree old_optimize = build_optimization_node ();
4786 tree new_target, new_optimize;
4787 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
4789 /* If the function changed the optimization levels as well as setting target
4790 options, start with the optimizations specified. */
4791 if (func_optimize && func_optimize != old_optimize)
4792 cl_optimization_restore (&global_options,
4793 TREE_OPTIMIZATION (func_optimize));
4795 /* The target attributes may also change some optimization flags, so update
4796 the optimization options if necessary. */
4797 cl_target_option_save (&cur_target, &global_options);
4798 new_target = ix86_valid_target_attribute_tree (args);
4799 new_optimize = build_optimization_node ();
4801 if (!new_target)
4802 ret = false;
4804 else if (fndecl)
4806 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
4808 if (old_optimize != new_optimize)
4809 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
4812 cl_target_option_restore (&global_options, &cur_target);
4814 if (old_optimize != new_optimize)
4815 cl_optimization_restore (&global_options,
4816 TREE_OPTIMIZATION (old_optimize));
4818 return ret;
4822 /* Hook to determine if one function can safely inline another. */
4824 static bool
4825 ix86_can_inline_p (tree caller, tree callee)
4827 bool ret = false;
4828 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
4829 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
4831 /* If callee has no option attributes, then it is ok to inline. */
4832 if (!callee_tree)
4833 ret = true;
4835 /* If caller has no option attributes, but callee does then it is not ok to
4836 inline. */
4837 else if (!caller_tree)
4838 ret = false;
4840 else
4842 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
4843 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
4845 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
4846 can inline a SSE2 function but a SSE2 function can't inline a SSE4
4847 function. */
4848 if ((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
4849 != callee_opts->x_ix86_isa_flags)
4850 ret = false;
4852 /* See if we have the same non-isa options. */
4853 else if (caller_opts->x_target_flags != callee_opts->x_target_flags)
4854 ret = false;
4856 /* See if arch, tune, etc. are the same. */
4857 else if (caller_opts->arch != callee_opts->arch)
4858 ret = false;
4860 else if (caller_opts->tune != callee_opts->tune)
4861 ret = false;
4863 else if (caller_opts->fpmath != callee_opts->fpmath)
4864 ret = false;
4866 else if (caller_opts->branch_cost != callee_opts->branch_cost)
4867 ret = false;
4869 else
4870 ret = true;
4873 return ret;
4877 /* Remember the last target of ix86_set_current_function. */
4878 static GTY(()) tree ix86_previous_fndecl;
4880 /* Establish appropriate back-end context for processing the function
4881 FNDECL. The argument might be NULL to indicate processing at top
4882 level, outside of any function scope. */
4883 static void
4884 ix86_set_current_function (tree fndecl)
4886 /* Only change the context if the function changes. This hook is called
4887 several times in the course of compiling a function, and we don't want to
4888 slow things down too much or call target_reinit when it isn't safe. */
4889 if (fndecl && fndecl != ix86_previous_fndecl)
4891 tree old_tree = (ix86_previous_fndecl
4892 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
4893 : NULL_TREE);
4895 tree new_tree = (fndecl
4896 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
4897 : NULL_TREE);
4899 ix86_previous_fndecl = fndecl;
4900 if (old_tree == new_tree)
4903 else if (new_tree)
4905 cl_target_option_restore (&global_options,
4906 TREE_TARGET_OPTION (new_tree));
4907 target_reinit ();
4910 else if (old_tree)
4912 struct cl_target_option *def
4913 = TREE_TARGET_OPTION (target_option_current_node);
4915 cl_target_option_restore (&global_options, def);
4916 target_reinit ();
4922 /* Return true if this goes in large data/bss. */
4924 static bool
4925 ix86_in_large_data_p (tree exp)
4927 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
4928 return false;
4930 /* Functions are never large data. */
4931 if (TREE_CODE (exp) == FUNCTION_DECL)
4932 return false;
4934 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
4936 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
4937 if (strcmp (section, ".ldata") == 0
4938 || strcmp (section, ".lbss") == 0)
4939 return true;
4940 return false;
4942 else
4944 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
4946 /* If this is an incomplete type with size 0, then we can't put it
4947 in data because it might be too big when completed. */
4948 if (!size || size > ix86_section_threshold)
4949 return true;
4952 return false;
4955 /* Switch to the appropriate section for output of DECL.
4956 DECL is either a `VAR_DECL' node or a constant of some sort.
4957 RELOC indicates whether forming the initial value of DECL requires
4958 link-time relocations. */
4960 static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT)
4961 ATTRIBUTE_UNUSED;
4963 static section *
4964 x86_64_elf_select_section (tree decl, int reloc,
4965 unsigned HOST_WIDE_INT align)
4967 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4968 && ix86_in_large_data_p (decl))
4970 const char *sname = NULL;
4971 unsigned int flags = SECTION_WRITE;
4972 switch (categorize_decl_for_section (decl, reloc))
4974 case SECCAT_DATA:
4975 sname = ".ldata";
4976 break;
4977 case SECCAT_DATA_REL:
4978 sname = ".ldata.rel";
4979 break;
4980 case SECCAT_DATA_REL_LOCAL:
4981 sname = ".ldata.rel.local";
4982 break;
4983 case SECCAT_DATA_REL_RO:
4984 sname = ".ldata.rel.ro";
4985 break;
4986 case SECCAT_DATA_REL_RO_LOCAL:
4987 sname = ".ldata.rel.ro.local";
4988 break;
4989 case SECCAT_BSS:
4990 sname = ".lbss";
4991 flags |= SECTION_BSS;
4992 break;
4993 case SECCAT_RODATA:
4994 case SECCAT_RODATA_MERGE_STR:
4995 case SECCAT_RODATA_MERGE_STR_INIT:
4996 case SECCAT_RODATA_MERGE_CONST:
4997 sname = ".lrodata";
4998 flags = 0;
4999 break;
5000 case SECCAT_SRODATA:
5001 case SECCAT_SDATA:
5002 case SECCAT_SBSS:
5003 gcc_unreachable ();
5004 case SECCAT_TEXT:
5005 case SECCAT_TDATA:
5006 case SECCAT_TBSS:
5007 /* We don't split these for medium model. Place them into
5008 default sections and hope for best. */
5009 break;
5011 if (sname)
5013 /* We might get called with string constants, but get_named_section
5014 doesn't like them as they are not DECLs. Also, we need to set
5015 flags in that case. */
5016 if (!DECL_P (decl))
5017 return get_section (sname, flags, NULL);
5018 return get_named_section (decl, sname, reloc);
5021 return default_elf_select_section (decl, reloc, align);
5024 /* Build up a unique section name, expressed as a
5025 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
5026 RELOC indicates whether the initial value of EXP requires
5027 link-time relocations. */
5029 static void ATTRIBUTE_UNUSED
5030 x86_64_elf_unique_section (tree decl, int reloc)
5032 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5033 && ix86_in_large_data_p (decl))
5035 const char *prefix = NULL;
5036 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
5037 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
5039 switch (categorize_decl_for_section (decl, reloc))
5041 case SECCAT_DATA:
5042 case SECCAT_DATA_REL:
5043 case SECCAT_DATA_REL_LOCAL:
5044 case SECCAT_DATA_REL_RO:
5045 case SECCAT_DATA_REL_RO_LOCAL:
5046 prefix = one_only ? ".ld" : ".ldata";
5047 break;
5048 case SECCAT_BSS:
5049 prefix = one_only ? ".lb" : ".lbss";
5050 break;
5051 case SECCAT_RODATA:
5052 case SECCAT_RODATA_MERGE_STR:
5053 case SECCAT_RODATA_MERGE_STR_INIT:
5054 case SECCAT_RODATA_MERGE_CONST:
5055 prefix = one_only ? ".lr" : ".lrodata";
5056 break;
5057 case SECCAT_SRODATA:
5058 case SECCAT_SDATA:
5059 case SECCAT_SBSS:
5060 gcc_unreachable ();
5061 case SECCAT_TEXT:
5062 case SECCAT_TDATA:
5063 case SECCAT_TBSS:
5064 /* We don't split these for medium model. Place them into
5065 default sections and hope for best. */
5066 break;
5068 if (prefix)
5070 const char *name, *linkonce;
5071 char *string;
5073 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
5074 name = targetm.strip_name_encoding (name);
5076 /* If we're using one_only, then there needs to be a .gnu.linkonce
5077 prefix to the section name. */
5078 linkonce = one_only ? ".gnu.linkonce" : "";
5080 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
5082 DECL_SECTION_NAME (decl) = build_string (strlen (string), string);
5083 return;
5086 default_unique_section (decl, reloc);
5089 #ifdef COMMON_ASM_OP
5090 /* This says how to output assembler code to declare an
5091 uninitialized external linkage data object.
5093 For medium model x86-64 we need to use .largecomm opcode for
5094 large objects. */
5095 void
5096 x86_elf_aligned_common (FILE *file,
5097 const char *name, unsigned HOST_WIDE_INT size,
5098 int align)
5100 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5101 && size > (unsigned int)ix86_section_threshold)
5102 fputs (".largecomm\t", file);
5103 else
5104 fputs (COMMON_ASM_OP, file);
5105 assemble_name (file, name);
5106 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
5107 size, align / BITS_PER_UNIT);
5109 #endif
5111 /* Utility function for targets to use in implementing
5112 ASM_OUTPUT_ALIGNED_BSS. */
5114 void
5115 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
5116 const char *name, unsigned HOST_WIDE_INT size,
5117 int align)
5119 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5120 && size > (unsigned int)ix86_section_threshold)
5121 switch_to_section (get_named_section (decl, ".lbss", 0));
5122 else
5123 switch_to_section (bss_section);
5124 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
5125 #ifdef ASM_DECLARE_OBJECT_NAME
5126 last_assemble_variable_decl = decl;
5127 ASM_DECLARE_OBJECT_NAME (file, name, decl);
5128 #else
5129 /* Standard thing is just output label for the object. */
5130 ASM_OUTPUT_LABEL (file, name);
5131 #endif /* ASM_DECLARE_OBJECT_NAME */
5132 ASM_OUTPUT_SKIP (file, size ? size : 1);
5135 static const struct default_options ix86_option_optimization_table[] =
5137 /* Turn off -fschedule-insns by default. It tends to make the
5138 problem with not enough registers even worse. */
5139 #ifdef INSN_SCHEDULING
5140 { OPT_LEVELS_ALL, OPT_fschedule_insns, NULL, 0 },
5141 #endif
5143 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
5144 SUBTARGET_OPTIMIZATION_OPTIONS,
5145 #endif
5146 { OPT_LEVELS_NONE, 0, NULL, 0 }
5149 /* Implement TARGET_OPTION_INIT_STRUCT. */
5151 static void
5152 ix86_option_init_struct (struct gcc_options *opts)
5154 if (TARGET_MACHO)
5155 /* The Darwin libraries never set errno, so we might as well
5156 avoid calling them when that's the only reason we would. */
5157 opts->x_flag_errno_math = 0;
5159 opts->x_flag_pcc_struct_return = 2;
5160 opts->x_flag_asynchronous_unwind_tables = 2;
5161 opts->x_flag_vect_cost_model = 1;
5164 /* Decide whether we must probe the stack before any space allocation
5165 on this target. It's essentially TARGET_STACK_PROBE except when
5166 -fstack-check causes the stack to be already probed differently. */
5168 bool
5169 ix86_target_stack_probe (void)
5171 /* Do not probe the stack twice if static stack checking is enabled. */
5172 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
5173 return false;
5175 return TARGET_STACK_PROBE;
5178 /* Decide whether we can make a sibling call to a function. DECL is the
5179 declaration of the function being targeted by the call and EXP is the
5180 CALL_EXPR representing the call. */
5182 static bool
5183 ix86_function_ok_for_sibcall (tree decl, tree exp)
5185 tree type, decl_or_type;
5186 rtx a, b;
5188 /* If we are generating position-independent code, we cannot sibcall
5189 optimize any indirect call, or a direct call to a global function,
5190 as the PLT requires %ebx be live. (Darwin does not have a PLT.) */
5191 if (!TARGET_MACHO
5192 && !TARGET_64BIT
5193 && flag_pic
5194 && (!decl || !targetm.binds_local_p (decl)))
5195 return false;
5197 /* If we need to align the outgoing stack, then sibcalling would
5198 unalign the stack, which may break the called function. */
5199 if (ix86_minimum_incoming_stack_boundary (true)
5200 < PREFERRED_STACK_BOUNDARY)
5201 return false;
5203 if (decl)
5205 decl_or_type = decl;
5206 type = TREE_TYPE (decl);
5208 else
5210 /* We're looking at the CALL_EXPR, we need the type of the function. */
5211 type = CALL_EXPR_FN (exp); /* pointer expression */
5212 type = TREE_TYPE (type); /* pointer type */
5213 type = TREE_TYPE (type); /* function type */
5214 decl_or_type = type;
5217 /* Check that the return value locations are the same. Like
5218 if we are returning floats on the 80387 register stack, we cannot
5219 make a sibcall from a function that doesn't return a float to a
5220 function that does or, conversely, from a function that does return
5221 a float to a function that doesn't; the necessary stack adjustment
5222 would not be executed. This is also the place we notice
5223 differences in the return value ABI. Note that it is ok for one
5224 of the functions to have void return type as long as the return
5225 value of the other is passed in a register. */
5226 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
5227 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
5228 cfun->decl, false);
5229 if (STACK_REG_P (a) || STACK_REG_P (b))
5231 if (!rtx_equal_p (a, b))
5232 return false;
5234 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
5236 /* Disable sibcall if we need to generate vzeroupper after
5237 callee returns. */
5238 if (TARGET_VZEROUPPER
5239 && cfun->machine->callee_return_avx256_p
5240 && !cfun->machine->caller_return_avx256_p)
5241 return false;
5243 else if (!rtx_equal_p (a, b))
5244 return false;
5246 if (TARGET_64BIT)
5248 /* The SYSV ABI has more call-clobbered registers;
5249 disallow sibcalls from MS to SYSV. */
5250 if (cfun->machine->call_abi == MS_ABI
5251 && ix86_function_type_abi (type) == SYSV_ABI)
5252 return false;
5254 else
5256 /* If this call is indirect, we'll need to be able to use a
5257 call-clobbered register for the address of the target function.
5258 Make sure that all such registers are not used for passing
5259 parameters. Note that DLLIMPORT functions are indirect. */
5260 if (!decl
5261 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
5263 if (ix86_function_regparm (type, NULL) >= 3)
5265 /* ??? Need to count the actual number of registers to be used,
5266 not the possible number of registers. Fix later. */
5267 return false;
5272 /* Otherwise okay. That also includes certain types of indirect calls. */
5273 return true;
5276 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
5277 and "sseregparm" calling convention attributes;
5278 arguments as in struct attribute_spec.handler. */
5280 static tree
5281 ix86_handle_cconv_attribute (tree *node, tree name,
5282 tree args,
5283 int flags ATTRIBUTE_UNUSED,
5284 bool *no_add_attrs)
5286 if (TREE_CODE (*node) != FUNCTION_TYPE
5287 && TREE_CODE (*node) != METHOD_TYPE
5288 && TREE_CODE (*node) != FIELD_DECL
5289 && TREE_CODE (*node) != TYPE_DECL)
5291 warning (OPT_Wattributes, "%qE attribute only applies to functions",
5292 name);
5293 *no_add_attrs = true;
5294 return NULL_TREE;
5297 /* Can combine regparm with all attributes but fastcall. */
5298 if (is_attribute_p ("regparm", name))
5300 tree cst;
5302 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5304 error ("fastcall and regparm attributes are not compatible");
5307 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5309 error ("regparam and thiscall attributes are not compatible");
5312 cst = TREE_VALUE (args);
5313 if (TREE_CODE (cst) != INTEGER_CST)
5315 warning (OPT_Wattributes,
5316 "%qE attribute requires an integer constant argument",
5317 name);
5318 *no_add_attrs = true;
5320 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
5322 warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
5323 name, REGPARM_MAX);
5324 *no_add_attrs = true;
5327 return NULL_TREE;
5330 if (TARGET_64BIT)
5332 /* Do not warn when emulating the MS ABI. */
5333 if ((TREE_CODE (*node) != FUNCTION_TYPE
5334 && TREE_CODE (*node) != METHOD_TYPE)
5335 || ix86_function_type_abi (*node) != MS_ABI)
5336 warning (OPT_Wattributes, "%qE attribute ignored",
5337 name);
5338 *no_add_attrs = true;
5339 return NULL_TREE;
5342 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
5343 if (is_attribute_p ("fastcall", name))
5345 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5347 error ("fastcall and cdecl attributes are not compatible");
5349 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5351 error ("fastcall and stdcall attributes are not compatible");
5353 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
5355 error ("fastcall and regparm attributes are not compatible");
5357 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5359 error ("fastcall and thiscall attributes are not compatible");
5363 /* Can combine stdcall with fastcall (redundant), regparm and
5364 sseregparm. */
5365 else if (is_attribute_p ("stdcall", name))
5367 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5369 error ("stdcall and cdecl attributes are not compatible");
5371 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5373 error ("stdcall and fastcall attributes are not compatible");
5375 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5377 error ("stdcall and thiscall attributes are not compatible");
5381 /* Can combine cdecl with regparm and sseregparm. */
5382 else if (is_attribute_p ("cdecl", name))
5384 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5386 error ("stdcall and cdecl attributes are not compatible");
5388 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5390 error ("fastcall and cdecl attributes are not compatible");
5392 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5394 error ("cdecl and thiscall attributes are not compatible");
5397 else if (is_attribute_p ("thiscall", name))
5399 if (TREE_CODE (*node) != METHOD_TYPE && pedantic)
5400 warning (OPT_Wattributes, "%qE attribute is used for none class-method",
5401 name);
5402 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5404 error ("stdcall and thiscall attributes are not compatible");
5406 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5408 error ("fastcall and thiscall attributes are not compatible");
5410 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5412 error ("cdecl and thiscall attributes are not compatible");
5416 /* Can combine sseregparm with all attributes. */
5418 return NULL_TREE;
5421 /* Return 0 if the attributes for two types are incompatible, 1 if they
5422 are compatible, and 2 if they are nearly compatible (which causes a
5423 warning to be generated). */
5425 static int
5426 ix86_comp_type_attributes (const_tree type1, const_tree type2)
5428 /* Check for mismatch of non-default calling convention. */
5429 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
5431 if (TREE_CODE (type1) != FUNCTION_TYPE
5432 && TREE_CODE (type1) != METHOD_TYPE)
5433 return 1;
5435 /* Check for mismatched fastcall/regparm types. */
5436 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
5437 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
5438 || (ix86_function_regparm (type1, NULL)
5439 != ix86_function_regparm (type2, NULL)))
5440 return 0;
5442 /* Check for mismatched sseregparm types. */
5443 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
5444 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
5445 return 0;
5447 /* Check for mismatched thiscall types. */
5448 if (!lookup_attribute ("thiscall", TYPE_ATTRIBUTES (type1))
5449 != !lookup_attribute ("thiscall", TYPE_ATTRIBUTES (type2)))
5450 return 0;
5452 /* Check for mismatched return types (cdecl vs stdcall). */
5453 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
5454 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
5455 return 0;
5457 return 1;
5460 /* Return the regparm value for a function with the indicated TYPE and DECL.
5461 DECL may be NULL when calling function indirectly
5462 or considering a libcall. */
5464 static int
5465 ix86_function_regparm (const_tree type, const_tree decl)
5467 tree attr;
5468 int regparm;
5470 if (TARGET_64BIT)
5471 return (ix86_function_type_abi (type) == SYSV_ABI
5472 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
5474 regparm = ix86_regparm;
5475 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
5476 if (attr)
5478 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
5479 return regparm;
5482 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
5483 return 2;
5485 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (type)))
5486 return 1;
5488 /* Use register calling convention for local functions when possible. */
5489 if (decl
5490 && TREE_CODE (decl) == FUNCTION_DECL
5491 && optimize
5492 && !(profile_flag && !flag_fentry))
5494 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5495 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE (decl));
5496 if (i && i->local && i->can_change_signature)
5498 int local_regparm, globals = 0, regno;
5500 /* Make sure no regparm register is taken by a
5501 fixed register variable. */
5502 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
5503 if (fixed_regs[local_regparm])
5504 break;
5506 /* We don't want to use regparm(3) for nested functions as
5507 these use a static chain pointer in the third argument. */
5508 if (local_regparm == 3 && DECL_STATIC_CHAIN (decl))
5509 local_regparm = 2;
5511 /* In 32-bit mode save a register for the split stack. */
5512 if (!TARGET_64BIT && local_regparm == 3 && flag_split_stack)
5513 local_regparm = 2;
5515 /* Each fixed register usage increases register pressure,
5516 so less registers should be used for argument passing.
5517 This functionality can be overriden by an explicit
5518 regparm value. */
5519 for (regno = 0; regno <= DI_REG; regno++)
5520 if (fixed_regs[regno])
5521 globals++;
5523 local_regparm
5524 = globals < local_regparm ? local_regparm - globals : 0;
5526 if (local_regparm > regparm)
5527 regparm = local_regparm;
5531 return regparm;
5534 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
5535 DFmode (2) arguments in SSE registers for a function with the
5536 indicated TYPE and DECL. DECL may be NULL when calling function
5537 indirectly or considering a libcall. Otherwise return 0. */
5539 static int
5540 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
5542 gcc_assert (!TARGET_64BIT);
5544 /* Use SSE registers to pass SFmode and DFmode arguments if requested
5545 by the sseregparm attribute. */
5546 if (TARGET_SSEREGPARM
5547 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
5549 if (!TARGET_SSE)
5551 if (warn)
5553 if (decl)
5554 error ("calling %qD with attribute sseregparm without "
5555 "SSE/SSE2 enabled", decl);
5556 else
5557 error ("calling %qT with attribute sseregparm without "
5558 "SSE/SSE2 enabled", type);
5560 return 0;
5563 return 2;
5566 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
5567 (and DFmode for SSE2) arguments in SSE registers. */
5568 if (decl && TARGET_SSE_MATH && optimize
5569 && !(profile_flag && !flag_fentry))
5571 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5572 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
5573 if (i && i->local && i->can_change_signature)
5574 return TARGET_SSE2 ? 2 : 1;
5577 return 0;
5580 /* Return true if EAX is live at the start of the function. Used by
5581 ix86_expand_prologue to determine if we need special help before
5582 calling allocate_stack_worker. */
5584 static bool
5585 ix86_eax_live_at_start_p (void)
5587 /* Cheat. Don't bother working forward from ix86_function_regparm
5588 to the function type to whether an actual argument is located in
5589 eax. Instead just look at cfg info, which is still close enough
5590 to correct at this point. This gives false positives for broken
5591 functions that might use uninitialized data that happens to be
5592 allocated in eax, but who cares? */
5593 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR), 0);
5596 static bool
5597 ix86_keep_aggregate_return_pointer (tree fntype)
5599 tree attr;
5601 attr = lookup_attribute ("callee_pop_aggregate_return",
5602 TYPE_ATTRIBUTES (fntype));
5603 if (attr)
5604 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0);
5606 return KEEP_AGGREGATE_RETURN_POINTER != 0;
5609 /* Value is the number of bytes of arguments automatically
5610 popped when returning from a subroutine call.
5611 FUNDECL is the declaration node of the function (as a tree),
5612 FUNTYPE is the data type of the function (as a tree),
5613 or for a library call it is an identifier node for the subroutine name.
5614 SIZE is the number of bytes of arguments passed on the stack.
5616 On the 80386, the RTD insn may be used to pop them if the number
5617 of args is fixed, but if the number is variable then the caller
5618 must pop them all. RTD can't be used for library calls now
5619 because the library is compiled with the Unix compiler.
5620 Use of RTD is a selectable option, since it is incompatible with
5621 standard Unix calling sequences. If the option is not selected,
5622 the caller must always pop the args.
5624 The attribute stdcall is equivalent to RTD on a per module basis. */
5626 static int
5627 ix86_return_pops_args (tree fundecl, tree funtype, int size)
5629 int rtd;
5631 /* None of the 64-bit ABIs pop arguments. */
5632 if (TARGET_64BIT)
5633 return 0;
5635 rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
5637 /* Cdecl functions override -mrtd, and never pop the stack. */
5638 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype)))
5640 /* Stdcall and fastcall functions will pop the stack if not
5641 variable args. */
5642 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
5643 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype))
5644 || lookup_attribute ("thiscall", TYPE_ATTRIBUTES (funtype)))
5645 rtd = 1;
5647 if (rtd && ! stdarg_p (funtype))
5648 return size;
5651 /* Lose any fake structure return argument if it is passed on the stack. */
5652 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
5653 && !ix86_keep_aggregate_return_pointer (funtype))
5655 int nregs = ix86_function_regparm (funtype, fundecl);
5656 if (nregs == 0)
5657 return GET_MODE_SIZE (Pmode);
5660 return 0;
5663 /* Argument support functions. */
5665 /* Return true when register may be used to pass function parameters. */
5666 bool
5667 ix86_function_arg_regno_p (int regno)
5669 int i;
5670 const int *parm_regs;
5672 if (!TARGET_64BIT)
5674 if (TARGET_MACHO)
5675 return (regno < REGPARM_MAX
5676 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
5677 else
5678 return (regno < REGPARM_MAX
5679 || (TARGET_MMX && MMX_REGNO_P (regno)
5680 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
5681 || (TARGET_SSE && SSE_REGNO_P (regno)
5682 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
5685 if (TARGET_MACHO)
5687 if (SSE_REGNO_P (regno) && TARGET_SSE)
5688 return true;
5690 else
5692 if (TARGET_SSE && SSE_REGNO_P (regno)
5693 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
5694 return true;
5697 /* TODO: The function should depend on current function ABI but
5698 builtins.c would need updating then. Therefore we use the
5699 default ABI. */
5701 /* RAX is used as hidden argument to va_arg functions. */
5702 if (ix86_abi == SYSV_ABI && regno == AX_REG)
5703 return true;
5705 if (ix86_abi == MS_ABI)
5706 parm_regs = x86_64_ms_abi_int_parameter_registers;
5707 else
5708 parm_regs = x86_64_int_parameter_registers;
5709 for (i = 0; i < (ix86_abi == MS_ABI
5710 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
5711 if (regno == parm_regs[i])
5712 return true;
5713 return false;
5716 /* Return if we do not know how to pass TYPE solely in registers. */
5718 static bool
5719 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
5721 if (must_pass_in_stack_var_size_or_pad (mode, type))
5722 return true;
5724 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
5725 The layout_type routine is crafty and tries to trick us into passing
5726 currently unsupported vector types on the stack by using TImode. */
5727 return (!TARGET_64BIT && mode == TImode
5728 && type && TREE_CODE (type) != VECTOR_TYPE);
5731 /* It returns the size, in bytes, of the area reserved for arguments passed
5732 in registers for the function represented by fndecl dependent to the used
5733 abi format. */
5735 ix86_reg_parm_stack_space (const_tree fndecl)
5737 enum calling_abi call_abi = SYSV_ABI;
5738 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
5739 call_abi = ix86_function_abi (fndecl);
5740 else
5741 call_abi = ix86_function_type_abi (fndecl);
5742 if (call_abi == MS_ABI)
5743 return 32;
5744 return 0;
5747 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
5748 call abi used. */
5749 enum calling_abi
5750 ix86_function_type_abi (const_tree fntype)
5752 if (TARGET_64BIT && fntype != NULL)
5754 enum calling_abi abi = ix86_abi;
5755 if (abi == SYSV_ABI)
5757 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
5758 abi = MS_ABI;
5760 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
5761 abi = SYSV_ABI;
5762 return abi;
5764 return ix86_abi;
5767 static bool
5768 ix86_function_ms_hook_prologue (const_tree fn)
5770 if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
5772 if (decl_function_context (fn) != NULL_TREE)
5773 error_at (DECL_SOURCE_LOCATION (fn),
5774 "ms_hook_prologue is not compatible with nested function");
5775 else
5776 return true;
5778 return false;
5781 static enum calling_abi
5782 ix86_function_abi (const_tree fndecl)
5784 if (! fndecl)
5785 return ix86_abi;
5786 return ix86_function_type_abi (TREE_TYPE (fndecl));
5789 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
5790 call abi used. */
5791 enum calling_abi
5792 ix86_cfun_abi (void)
5794 if (! cfun || ! TARGET_64BIT)
5795 return ix86_abi;
5796 return cfun->machine->call_abi;
5799 /* Write the extra assembler code needed to declare a function properly. */
5801 void
5802 ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
5803 tree decl)
5805 bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
5807 if (is_ms_hook)
5809 int i, filler_count = (TARGET_64BIT ? 32 : 16);
5810 unsigned int filler_cc = 0xcccccccc;
5812 for (i = 0; i < filler_count; i += 4)
5813 fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
5816 #ifdef SUBTARGET_ASM_UNWIND_INIT
5817 SUBTARGET_ASM_UNWIND_INIT (asm_out_file);
5818 #endif
5820 ASM_OUTPUT_LABEL (asm_out_file, fname);
5822 /* Output magic byte marker, if hot-patch attribute is set. */
5823 if (is_ms_hook)
5825 if (TARGET_64BIT)
5827 /* leaq [%rsp + 0], %rsp */
5828 asm_fprintf (asm_out_file, ASM_BYTE
5829 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
5831 else
5833 /* movl.s %edi, %edi
5834 push %ebp
5835 movl.s %esp, %ebp */
5836 asm_fprintf (asm_out_file, ASM_BYTE
5837 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
5842 /* regclass.c */
5843 extern void init_regs (void);
5845 /* Implementation of call abi switching target hook. Specific to FNDECL
5846 the specific call register sets are set. See also
5847 ix86_conditional_register_usage for more details. */
5848 void
5849 ix86_call_abi_override (const_tree fndecl)
5851 if (fndecl == NULL_TREE)
5852 cfun->machine->call_abi = ix86_abi;
5853 else
5854 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
5857 /* MS and SYSV ABI have different set of call used registers. Avoid expensive
5858 re-initialization of init_regs each time we switch function context since
5859 this is needed only during RTL expansion. */
5860 static void
5861 ix86_maybe_switch_abi (void)
5863 if (TARGET_64BIT &&
5864 call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
5865 reinit_regs ();
5868 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5869 for a call to a function whose data type is FNTYPE.
5870 For a library call, FNTYPE is 0. */
5872 void
5873 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
5874 tree fntype, /* tree ptr for function decl */
5875 rtx libname, /* SYMBOL_REF of library name or 0 */
5876 tree fndecl,
5877 int caller)
5879 struct cgraph_local_info *i;
5880 tree fnret_type;
5882 memset (cum, 0, sizeof (*cum));
5884 /* Initialize for the current callee. */
5885 if (caller)
5887 cfun->machine->callee_pass_avx256_p = false;
5888 cfun->machine->callee_return_avx256_p = false;
5891 if (fndecl)
5893 i = cgraph_local_info (fndecl);
5894 cum->call_abi = ix86_function_abi (fndecl);
5895 fnret_type = TREE_TYPE (TREE_TYPE (fndecl));
5897 else
5899 i = NULL;
5900 cum->call_abi = ix86_function_type_abi (fntype);
5901 if (fntype)
5902 fnret_type = TREE_TYPE (fntype);
5903 else
5904 fnret_type = NULL;
5907 if (TARGET_VZEROUPPER && fnret_type)
5909 rtx fnret_value = ix86_function_value (fnret_type, fntype,
5910 false);
5911 if (function_pass_avx256_p (fnret_value))
5913 /* The return value of this function uses 256bit AVX modes. */
5914 if (caller)
5915 cfun->machine->callee_return_avx256_p = true;
5916 else
5917 cfun->machine->caller_return_avx256_p = true;
5921 cum->caller = caller;
5923 /* Set up the number of registers to use for passing arguments. */
5925 if (cum->call_abi == MS_ABI && !ACCUMULATE_OUTGOING_ARGS)
5926 sorry ("ms_abi attribute requires -maccumulate-outgoing-args "
5927 "or subtarget optimization implying it");
5928 cum->nregs = ix86_regparm;
5929 if (TARGET_64BIT)
5931 cum->nregs = (cum->call_abi == SYSV_ABI
5932 ? X86_64_REGPARM_MAX
5933 : X86_64_MS_REGPARM_MAX);
5935 if (TARGET_SSE)
5937 cum->sse_nregs = SSE_REGPARM_MAX;
5938 if (TARGET_64BIT)
5940 cum->sse_nregs = (cum->call_abi == SYSV_ABI
5941 ? X86_64_SSE_REGPARM_MAX
5942 : X86_64_MS_SSE_REGPARM_MAX);
5945 if (TARGET_MMX)
5946 cum->mmx_nregs = MMX_REGPARM_MAX;
5947 cum->warn_avx = true;
5948 cum->warn_sse = true;
5949 cum->warn_mmx = true;
5951 /* Because type might mismatch in between caller and callee, we need to
5952 use actual type of function for local calls.
5953 FIXME: cgraph_analyze can be told to actually record if function uses
5954 va_start so for local functions maybe_vaarg can be made aggressive
5955 helping K&R code.
5956 FIXME: once typesytem is fixed, we won't need this code anymore. */
5957 if (i && i->local && i->can_change_signature)
5958 fntype = TREE_TYPE (fndecl);
5959 cum->maybe_vaarg = (fntype
5960 ? (!prototype_p (fntype) || stdarg_p (fntype))
5961 : !libname);
5963 if (!TARGET_64BIT)
5965 /* If there are variable arguments, then we won't pass anything
5966 in registers in 32-bit mode. */
5967 if (stdarg_p (fntype))
5969 cum->nregs = 0;
5970 cum->sse_nregs = 0;
5971 cum->mmx_nregs = 0;
5972 cum->warn_avx = 0;
5973 cum->warn_sse = 0;
5974 cum->warn_mmx = 0;
5975 return;
5978 /* Use ecx and edx registers if function has fastcall attribute,
5979 else look for regparm information. */
5980 if (fntype)
5982 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)))
5984 cum->nregs = 1;
5985 cum->fastcall = 1; /* Same first register as in fastcall. */
5987 else if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
5989 cum->nregs = 2;
5990 cum->fastcall = 1;
5992 else
5993 cum->nregs = ix86_function_regparm (fntype, fndecl);
5996 /* Set up the number of SSE registers used for passing SFmode
5997 and DFmode arguments. Warn for mismatching ABI. */
5998 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
6002 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
6003 But in the case of vector types, it is some vector mode.
6005 When we have only some of our vector isa extensions enabled, then there
6006 are some modes for which vector_mode_supported_p is false. For these
6007 modes, the generic vector support in gcc will choose some non-vector mode
6008 in order to implement the type. By computing the natural mode, we'll
6009 select the proper ABI location for the operand and not depend on whatever
6010 the middle-end decides to do with these vector types.
6012 The midde-end can't deal with the vector types > 16 bytes. In this
6013 case, we return the original mode and warn ABI change if CUM isn't
6014 NULL. */
6016 static enum machine_mode
6017 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum)
6019 enum machine_mode mode = TYPE_MODE (type);
6021 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
6023 HOST_WIDE_INT size = int_size_in_bytes (type);
6024 if ((size == 8 || size == 16 || size == 32)
6025 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
6026 && TYPE_VECTOR_SUBPARTS (type) > 1)
6028 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
6030 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
6031 mode = MIN_MODE_VECTOR_FLOAT;
6032 else
6033 mode = MIN_MODE_VECTOR_INT;
6035 /* Get the mode which has this inner mode and number of units. */
6036 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
6037 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
6038 && GET_MODE_INNER (mode) == innermode)
6040 if (size == 32 && !TARGET_AVX)
6042 static bool warnedavx;
6044 if (cum
6045 && !warnedavx
6046 && cum->warn_avx)
6048 warnedavx = true;
6049 warning (0, "AVX vector argument without AVX "
6050 "enabled changes the ABI");
6052 return TYPE_MODE (type);
6054 else
6055 return mode;
6058 gcc_unreachable ();
6062 return mode;
6065 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
6066 this may not agree with the mode that the type system has chosen for the
6067 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
6068 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
6070 static rtx
6071 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
6072 unsigned int regno)
6074 rtx tmp;
6076 if (orig_mode != BLKmode)
6077 tmp = gen_rtx_REG (orig_mode, regno);
6078 else
6080 tmp = gen_rtx_REG (mode, regno);
6081 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
6082 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
6085 return tmp;
6088 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
6089 of this code is to classify each 8bytes of incoming argument by the register
6090 class and assign registers accordingly. */
6092 /* Return the union class of CLASS1 and CLASS2.
6093 See the x86-64 PS ABI for details. */
6095 static enum x86_64_reg_class
6096 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
6098 /* Rule #1: If both classes are equal, this is the resulting class. */
6099 if (class1 == class2)
6100 return class1;
6102 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
6103 the other class. */
6104 if (class1 == X86_64_NO_CLASS)
6105 return class2;
6106 if (class2 == X86_64_NO_CLASS)
6107 return class1;
6109 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
6110 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
6111 return X86_64_MEMORY_CLASS;
6113 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
6114 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
6115 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
6116 return X86_64_INTEGERSI_CLASS;
6117 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
6118 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
6119 return X86_64_INTEGER_CLASS;
6121 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
6122 MEMORY is used. */
6123 if (class1 == X86_64_X87_CLASS
6124 || class1 == X86_64_X87UP_CLASS
6125 || class1 == X86_64_COMPLEX_X87_CLASS
6126 || class2 == X86_64_X87_CLASS
6127 || class2 == X86_64_X87UP_CLASS
6128 || class2 == X86_64_COMPLEX_X87_CLASS)
6129 return X86_64_MEMORY_CLASS;
6131 /* Rule #6: Otherwise class SSE is used. */
6132 return X86_64_SSE_CLASS;
6135 /* Classify the argument of type TYPE and mode MODE.
6136 CLASSES will be filled by the register class used to pass each word
6137 of the operand. The number of words is returned. In case the parameter
6138 should be passed in memory, 0 is returned. As a special case for zero
6139 sized containers, classes[0] will be NO_CLASS and 1 is returned.
6141 BIT_OFFSET is used internally for handling records and specifies offset
6142 of the offset in bits modulo 256 to avoid overflow cases.
6144 See the x86-64 PS ABI for details.
6147 static int
6148 classify_argument (enum machine_mode mode, const_tree type,
6149 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
6151 HOST_WIDE_INT bytes =
6152 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
6153 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6155 /* Variable sized entities are always passed/returned in memory. */
6156 if (bytes < 0)
6157 return 0;
6159 if (mode != VOIDmode
6160 && targetm.calls.must_pass_in_stack (mode, type))
6161 return 0;
6163 if (type && AGGREGATE_TYPE_P (type))
6165 int i;
6166 tree field;
6167 enum x86_64_reg_class subclasses[MAX_CLASSES];
6169 /* On x86-64 we pass structures larger than 32 bytes on the stack. */
6170 if (bytes > 32)
6171 return 0;
6173 for (i = 0; i < words; i++)
6174 classes[i] = X86_64_NO_CLASS;
6176 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
6177 signalize memory class, so handle it as special case. */
6178 if (!words)
6180 classes[0] = X86_64_NO_CLASS;
6181 return 1;
6184 /* Classify each field of record and merge classes. */
6185 switch (TREE_CODE (type))
6187 case RECORD_TYPE:
6188 /* And now merge the fields of structure. */
6189 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6191 if (TREE_CODE (field) == FIELD_DECL)
6193 int num;
6195 if (TREE_TYPE (field) == error_mark_node)
6196 continue;
6198 /* Bitfields are always classified as integer. Handle them
6199 early, since later code would consider them to be
6200 misaligned integers. */
6201 if (DECL_BIT_FIELD (field))
6203 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
6204 i < ((int_bit_position (field) + (bit_offset % 64))
6205 + tree_low_cst (DECL_SIZE (field), 0)
6206 + 63) / 8 / 8; i++)
6207 classes[i] =
6208 merge_classes (X86_64_INTEGER_CLASS,
6209 classes[i]);
6211 else
6213 int pos;
6215 type = TREE_TYPE (field);
6217 /* Flexible array member is ignored. */
6218 if (TYPE_MODE (type) == BLKmode
6219 && TREE_CODE (type) == ARRAY_TYPE
6220 && TYPE_SIZE (type) == NULL_TREE
6221 && TYPE_DOMAIN (type) != NULL_TREE
6222 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
6223 == NULL_TREE))
6225 static bool warned;
6227 if (!warned && warn_psabi)
6229 warned = true;
6230 inform (input_location,
6231 "the ABI of passing struct with"
6232 " a flexible array member has"
6233 " changed in GCC 4.4");
6235 continue;
6237 num = classify_argument (TYPE_MODE (type), type,
6238 subclasses,
6239 (int_bit_position (field)
6240 + bit_offset) % 256);
6241 if (!num)
6242 return 0;
6243 pos = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
6244 for (i = 0; i < num && (i + pos) < words; i++)
6245 classes[i + pos] =
6246 merge_classes (subclasses[i], classes[i + pos]);
6250 break;
6252 case ARRAY_TYPE:
6253 /* Arrays are handled as small records. */
6255 int num;
6256 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
6257 TREE_TYPE (type), subclasses, bit_offset);
6258 if (!num)
6259 return 0;
6261 /* The partial classes are now full classes. */
6262 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
6263 subclasses[0] = X86_64_SSE_CLASS;
6264 if (subclasses[0] == X86_64_INTEGERSI_CLASS
6265 && !((bit_offset % 64) == 0 && bytes == 4))
6266 subclasses[0] = X86_64_INTEGER_CLASS;
6268 for (i = 0; i < words; i++)
6269 classes[i] = subclasses[i % num];
6271 break;
6273 case UNION_TYPE:
6274 case QUAL_UNION_TYPE:
6275 /* Unions are similar to RECORD_TYPE but offset is always 0.
6277 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6279 if (TREE_CODE (field) == FIELD_DECL)
6281 int num;
6283 if (TREE_TYPE (field) == error_mark_node)
6284 continue;
6286 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
6287 TREE_TYPE (field), subclasses,
6288 bit_offset);
6289 if (!num)
6290 return 0;
6291 for (i = 0; i < num; i++)
6292 classes[i] = merge_classes (subclasses[i], classes[i]);
6295 break;
6297 default:
6298 gcc_unreachable ();
6301 if (words > 2)
6303 /* When size > 16 bytes, if the first one isn't
6304 X86_64_SSE_CLASS or any other ones aren't
6305 X86_64_SSEUP_CLASS, everything should be passed in
6306 memory. */
6307 if (classes[0] != X86_64_SSE_CLASS)
6308 return 0;
6310 for (i = 1; i < words; i++)
6311 if (classes[i] != X86_64_SSEUP_CLASS)
6312 return 0;
6315 /* Final merger cleanup. */
6316 for (i = 0; i < words; i++)
6318 /* If one class is MEMORY, everything should be passed in
6319 memory. */
6320 if (classes[i] == X86_64_MEMORY_CLASS)
6321 return 0;
6323 /* The X86_64_SSEUP_CLASS should be always preceded by
6324 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
6325 if (classes[i] == X86_64_SSEUP_CLASS
6326 && classes[i - 1] != X86_64_SSE_CLASS
6327 && classes[i - 1] != X86_64_SSEUP_CLASS)
6329 /* The first one should never be X86_64_SSEUP_CLASS. */
6330 gcc_assert (i != 0);
6331 classes[i] = X86_64_SSE_CLASS;
6334 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
6335 everything should be passed in memory. */
6336 if (classes[i] == X86_64_X87UP_CLASS
6337 && (classes[i - 1] != X86_64_X87_CLASS))
6339 static bool warned;
6341 /* The first one should never be X86_64_X87UP_CLASS. */
6342 gcc_assert (i != 0);
6343 if (!warned && warn_psabi)
6345 warned = true;
6346 inform (input_location,
6347 "the ABI of passing union with long double"
6348 " has changed in GCC 4.4");
6350 return 0;
6353 return words;
6356 /* Compute alignment needed. We align all types to natural boundaries with
6357 exception of XFmode that is aligned to 64bits. */
6358 if (mode != VOIDmode && mode != BLKmode)
6360 int mode_alignment = GET_MODE_BITSIZE (mode);
6362 if (mode == XFmode)
6363 mode_alignment = 128;
6364 else if (mode == XCmode)
6365 mode_alignment = 256;
6366 if (COMPLEX_MODE_P (mode))
6367 mode_alignment /= 2;
6368 /* Misaligned fields are always returned in memory. */
6369 if (bit_offset % mode_alignment)
6370 return 0;
6373 /* for V1xx modes, just use the base mode */
6374 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
6375 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
6376 mode = GET_MODE_INNER (mode);
6378 /* Classification of atomic types. */
6379 switch (mode)
6381 case SDmode:
6382 case DDmode:
6383 classes[0] = X86_64_SSE_CLASS;
6384 return 1;
6385 case TDmode:
6386 classes[0] = X86_64_SSE_CLASS;
6387 classes[1] = X86_64_SSEUP_CLASS;
6388 return 2;
6389 case DImode:
6390 case SImode:
6391 case HImode:
6392 case QImode:
6393 case CSImode:
6394 case CHImode:
6395 case CQImode:
6397 int size = (bit_offset % 64)+ (int) GET_MODE_BITSIZE (mode);
6399 if (size <= 32)
6401 classes[0] = X86_64_INTEGERSI_CLASS;
6402 return 1;
6404 else if (size <= 64)
6406 classes[0] = X86_64_INTEGER_CLASS;
6407 return 1;
6409 else if (size <= 64+32)
6411 classes[0] = X86_64_INTEGER_CLASS;
6412 classes[1] = X86_64_INTEGERSI_CLASS;
6413 return 2;
6415 else if (size <= 64+64)
6417 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6418 return 2;
6420 else
6421 gcc_unreachable ();
6423 case CDImode:
6424 case TImode:
6425 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6426 return 2;
6427 case COImode:
6428 case OImode:
6429 /* OImode shouldn't be used directly. */
6430 gcc_unreachable ();
6431 case CTImode:
6432 return 0;
6433 case SFmode:
6434 if (!(bit_offset % 64))
6435 classes[0] = X86_64_SSESF_CLASS;
6436 else
6437 classes[0] = X86_64_SSE_CLASS;
6438 return 1;
6439 case DFmode:
6440 classes[0] = X86_64_SSEDF_CLASS;
6441 return 1;
6442 case XFmode:
6443 classes[0] = X86_64_X87_CLASS;
6444 classes[1] = X86_64_X87UP_CLASS;
6445 return 2;
6446 case TFmode:
6447 classes[0] = X86_64_SSE_CLASS;
6448 classes[1] = X86_64_SSEUP_CLASS;
6449 return 2;
6450 case SCmode:
6451 classes[0] = X86_64_SSE_CLASS;
6452 if (!(bit_offset % 64))
6453 return 1;
6454 else
6456 static bool warned;
6458 if (!warned && warn_psabi)
6460 warned = true;
6461 inform (input_location,
6462 "the ABI of passing structure with complex float"
6463 " member has changed in GCC 4.4");
6465 classes[1] = X86_64_SSESF_CLASS;
6466 return 2;
6468 case DCmode:
6469 classes[0] = X86_64_SSEDF_CLASS;
6470 classes[1] = X86_64_SSEDF_CLASS;
6471 return 2;
6472 case XCmode:
6473 classes[0] = X86_64_COMPLEX_X87_CLASS;
6474 return 1;
6475 case TCmode:
6476 /* This modes is larger than 16 bytes. */
6477 return 0;
6478 case V8SFmode:
6479 case V8SImode:
6480 case V32QImode:
6481 case V16HImode:
6482 case V4DFmode:
6483 case V4DImode:
6484 classes[0] = X86_64_SSE_CLASS;
6485 classes[1] = X86_64_SSEUP_CLASS;
6486 classes[2] = X86_64_SSEUP_CLASS;
6487 classes[3] = X86_64_SSEUP_CLASS;
6488 return 4;
6489 case V4SFmode:
6490 case V4SImode:
6491 case V16QImode:
6492 case V8HImode:
6493 case V2DFmode:
6494 case V2DImode:
6495 classes[0] = X86_64_SSE_CLASS;
6496 classes[1] = X86_64_SSEUP_CLASS;
6497 return 2;
6498 case V1TImode:
6499 case V1DImode:
6500 case V2SFmode:
6501 case V2SImode:
6502 case V4HImode:
6503 case V8QImode:
6504 classes[0] = X86_64_SSE_CLASS;
6505 return 1;
6506 case BLKmode:
6507 case VOIDmode:
6508 return 0;
6509 default:
6510 gcc_assert (VECTOR_MODE_P (mode));
6512 if (bytes > 16)
6513 return 0;
6515 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
6517 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
6518 classes[0] = X86_64_INTEGERSI_CLASS;
6519 else
6520 classes[0] = X86_64_INTEGER_CLASS;
6521 classes[1] = X86_64_INTEGER_CLASS;
6522 return 1 + (bytes > 8);
6526 /* Examine the argument and return set number of register required in each
6527 class. Return 0 iff parameter should be passed in memory. */
6528 static int
6529 examine_argument (enum machine_mode mode, const_tree type, int in_return,
6530 int *int_nregs, int *sse_nregs)
6532 enum x86_64_reg_class regclass[MAX_CLASSES];
6533 int n = classify_argument (mode, type, regclass, 0);
6535 *int_nregs = 0;
6536 *sse_nregs = 0;
6537 if (!n)
6538 return 0;
6539 for (n--; n >= 0; n--)
6540 switch (regclass[n])
6542 case X86_64_INTEGER_CLASS:
6543 case X86_64_INTEGERSI_CLASS:
6544 (*int_nregs)++;
6545 break;
6546 case X86_64_SSE_CLASS:
6547 case X86_64_SSESF_CLASS:
6548 case X86_64_SSEDF_CLASS:
6549 (*sse_nregs)++;
6550 break;
6551 case X86_64_NO_CLASS:
6552 case X86_64_SSEUP_CLASS:
6553 break;
6554 case X86_64_X87_CLASS:
6555 case X86_64_X87UP_CLASS:
6556 if (!in_return)
6557 return 0;
6558 break;
6559 case X86_64_COMPLEX_X87_CLASS:
6560 return in_return ? 2 : 0;
6561 case X86_64_MEMORY_CLASS:
6562 gcc_unreachable ();
6564 return 1;
6567 /* Construct container for the argument used by GCC interface. See
6568 FUNCTION_ARG for the detailed description. */
6570 static rtx
6571 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
6572 const_tree type, int in_return, int nintregs, int nsseregs,
6573 const int *intreg, int sse_regno)
6575 /* The following variables hold the static issued_error state. */
6576 static bool issued_sse_arg_error;
6577 static bool issued_sse_ret_error;
6578 static bool issued_x87_ret_error;
6580 enum machine_mode tmpmode;
6581 int bytes =
6582 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
6583 enum x86_64_reg_class regclass[MAX_CLASSES];
6584 int n;
6585 int i;
6586 int nexps = 0;
6587 int needed_sseregs, needed_intregs;
6588 rtx exp[MAX_CLASSES];
6589 rtx ret;
6591 n = classify_argument (mode, type, regclass, 0);
6592 if (!n)
6593 return NULL;
6594 if (!examine_argument (mode, type, in_return, &needed_intregs,
6595 &needed_sseregs))
6596 return NULL;
6597 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
6598 return NULL;
6600 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
6601 some less clueful developer tries to use floating-point anyway. */
6602 if (needed_sseregs && !TARGET_SSE)
6604 if (in_return)
6606 if (!issued_sse_ret_error)
6608 error ("SSE register return with SSE disabled");
6609 issued_sse_ret_error = true;
6612 else if (!issued_sse_arg_error)
6614 error ("SSE register argument with SSE disabled");
6615 issued_sse_arg_error = true;
6617 return NULL;
6620 /* Likewise, error if the ABI requires us to return values in the
6621 x87 registers and the user specified -mno-80387. */
6622 if (!TARGET_80387 && in_return)
6623 for (i = 0; i < n; i++)
6624 if (regclass[i] == X86_64_X87_CLASS
6625 || regclass[i] == X86_64_X87UP_CLASS
6626 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
6628 if (!issued_x87_ret_error)
6630 error ("x87 register return with x87 disabled");
6631 issued_x87_ret_error = true;
6633 return NULL;
6636 /* First construct simple cases. Avoid SCmode, since we want to use
6637 single register to pass this type. */
6638 if (n == 1 && mode != SCmode)
6639 switch (regclass[0])
6641 case X86_64_INTEGER_CLASS:
6642 case X86_64_INTEGERSI_CLASS:
6643 return gen_rtx_REG (mode, intreg[0]);
6644 case X86_64_SSE_CLASS:
6645 case X86_64_SSESF_CLASS:
6646 case X86_64_SSEDF_CLASS:
6647 if (mode != BLKmode)
6648 return gen_reg_or_parallel (mode, orig_mode,
6649 SSE_REGNO (sse_regno));
6650 break;
6651 case X86_64_X87_CLASS:
6652 case X86_64_COMPLEX_X87_CLASS:
6653 return gen_rtx_REG (mode, FIRST_STACK_REG);
6654 case X86_64_NO_CLASS:
6655 /* Zero sized array, struct or class. */
6656 return NULL;
6657 default:
6658 gcc_unreachable ();
6660 if (n == 2 && regclass[0] == X86_64_SSE_CLASS
6661 && regclass[1] == X86_64_SSEUP_CLASS && mode != BLKmode)
6662 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
6663 if (n == 4
6664 && regclass[0] == X86_64_SSE_CLASS
6665 && regclass[1] == X86_64_SSEUP_CLASS
6666 && regclass[2] == X86_64_SSEUP_CLASS
6667 && regclass[3] == X86_64_SSEUP_CLASS
6668 && mode != BLKmode)
6669 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
6671 if (n == 2
6672 && regclass[0] == X86_64_X87_CLASS && regclass[1] == X86_64_X87UP_CLASS)
6673 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
6674 if (n == 2 && regclass[0] == X86_64_INTEGER_CLASS
6675 && regclass[1] == X86_64_INTEGER_CLASS
6676 && (mode == CDImode || mode == TImode || mode == TFmode)
6677 && intreg[0] + 1 == intreg[1])
6678 return gen_rtx_REG (mode, intreg[0]);
6680 /* Otherwise figure out the entries of the PARALLEL. */
6681 for (i = 0; i < n; i++)
6683 int pos;
6685 switch (regclass[i])
6687 case X86_64_NO_CLASS:
6688 break;
6689 case X86_64_INTEGER_CLASS:
6690 case X86_64_INTEGERSI_CLASS:
6691 /* Merge TImodes on aligned occasions here too. */
6692 if (i * 8 + 8 > bytes)
6693 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
6694 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
6695 tmpmode = SImode;
6696 else
6697 tmpmode = DImode;
6698 /* We've requested 24 bytes we don't have mode for. Use DImode. */
6699 if (tmpmode == BLKmode)
6700 tmpmode = DImode;
6701 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6702 gen_rtx_REG (tmpmode, *intreg),
6703 GEN_INT (i*8));
6704 intreg++;
6705 break;
6706 case X86_64_SSESF_CLASS:
6707 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6708 gen_rtx_REG (SFmode,
6709 SSE_REGNO (sse_regno)),
6710 GEN_INT (i*8));
6711 sse_regno++;
6712 break;
6713 case X86_64_SSEDF_CLASS:
6714 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6715 gen_rtx_REG (DFmode,
6716 SSE_REGNO (sse_regno)),
6717 GEN_INT (i*8));
6718 sse_regno++;
6719 break;
6720 case X86_64_SSE_CLASS:
6721 pos = i;
6722 switch (n)
6724 case 1:
6725 tmpmode = DImode;
6726 break;
6727 case 2:
6728 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
6730 tmpmode = TImode;
6731 i++;
6733 else
6734 tmpmode = DImode;
6735 break;
6736 case 4:
6737 gcc_assert (i == 0
6738 && regclass[1] == X86_64_SSEUP_CLASS
6739 && regclass[2] == X86_64_SSEUP_CLASS
6740 && regclass[3] == X86_64_SSEUP_CLASS);
6741 tmpmode = OImode;
6742 i += 3;
6743 break;
6744 default:
6745 gcc_unreachable ();
6747 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6748 gen_rtx_REG (tmpmode,
6749 SSE_REGNO (sse_regno)),
6750 GEN_INT (pos*8));
6751 sse_regno++;
6752 break;
6753 default:
6754 gcc_unreachable ();
6758 /* Empty aligned struct, union or class. */
6759 if (nexps == 0)
6760 return NULL;
6762 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
6763 for (i = 0; i < nexps; i++)
6764 XVECEXP (ret, 0, i) = exp [i];
6765 return ret;
6768 /* Update the data in CUM to advance over an argument of mode MODE
6769 and data type TYPE. (TYPE is null for libcalls where that information
6770 may not be available.) */
6772 static void
6773 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6774 const_tree type, HOST_WIDE_INT bytes,
6775 HOST_WIDE_INT words)
6777 switch (mode)
6779 default:
6780 break;
6782 case BLKmode:
6783 if (bytes < 0)
6784 break;
6785 /* FALLTHRU */
6787 case DImode:
6788 case SImode:
6789 case HImode:
6790 case QImode:
6791 cum->words += words;
6792 cum->nregs -= words;
6793 cum->regno += words;
6795 if (cum->nregs <= 0)
6797 cum->nregs = 0;
6798 cum->regno = 0;
6800 break;
6802 case OImode:
6803 /* OImode shouldn't be used directly. */
6804 gcc_unreachable ();
6806 case DFmode:
6807 if (cum->float_in_sse < 2)
6808 break;
6809 case SFmode:
6810 if (cum->float_in_sse < 1)
6811 break;
6812 /* FALLTHRU */
6814 case V8SFmode:
6815 case V8SImode:
6816 case V32QImode:
6817 case V16HImode:
6818 case V4DFmode:
6819 case V4DImode:
6820 case TImode:
6821 case V16QImode:
6822 case V8HImode:
6823 case V4SImode:
6824 case V2DImode:
6825 case V4SFmode:
6826 case V2DFmode:
6827 if (!type || !AGGREGATE_TYPE_P (type))
6829 cum->sse_words += words;
6830 cum->sse_nregs -= 1;
6831 cum->sse_regno += 1;
6832 if (cum->sse_nregs <= 0)
6834 cum->sse_nregs = 0;
6835 cum->sse_regno = 0;
6838 break;
6840 case V8QImode:
6841 case V4HImode:
6842 case V2SImode:
6843 case V2SFmode:
6844 case V1TImode:
6845 case V1DImode:
6846 if (!type || !AGGREGATE_TYPE_P (type))
6848 cum->mmx_words += words;
6849 cum->mmx_nregs -= 1;
6850 cum->mmx_regno += 1;
6851 if (cum->mmx_nregs <= 0)
6853 cum->mmx_nregs = 0;
6854 cum->mmx_regno = 0;
6857 break;
6861 static void
6862 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6863 const_tree type, HOST_WIDE_INT words, bool named)
6865 int int_nregs, sse_nregs;
6867 /* Unnamed 256bit vector mode parameters are passed on stack. */
6868 if (!named && VALID_AVX256_REG_MODE (mode))
6869 return;
6871 if (examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
6872 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
6874 cum->nregs -= int_nregs;
6875 cum->sse_nregs -= sse_nregs;
6876 cum->regno += int_nregs;
6877 cum->sse_regno += sse_nregs;
6879 else
6881 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
6882 cum->words = (cum->words + align - 1) & ~(align - 1);
6883 cum->words += words;
6887 static void
6888 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
6889 HOST_WIDE_INT words)
6891 /* Otherwise, this should be passed indirect. */
6892 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
6894 cum->words += words;
6895 if (cum->nregs > 0)
6897 cum->nregs -= 1;
6898 cum->regno += 1;
6902 /* Update the data in CUM to advance over an argument of mode MODE and
6903 data type TYPE. (TYPE is null for libcalls where that information
6904 may not be available.) */
6906 static void
6907 ix86_function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6908 const_tree type, bool named)
6910 HOST_WIDE_INT bytes, words;
6912 if (mode == BLKmode)
6913 bytes = int_size_in_bytes (type);
6914 else
6915 bytes = GET_MODE_SIZE (mode);
6916 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6918 if (type)
6919 mode = type_natural_mode (type, NULL);
6921 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6922 function_arg_advance_ms_64 (cum, bytes, words);
6923 else if (TARGET_64BIT)
6924 function_arg_advance_64 (cum, mode, type, words, named);
6925 else
6926 function_arg_advance_32 (cum, mode, type, bytes, words);
6929 /* Define where to put the arguments to a function.
6930 Value is zero to push the argument on the stack,
6931 or a hard register in which to store the argument.
6933 MODE is the argument's machine mode.
6934 TYPE is the data type of the argument (as a tree).
6935 This is null for libcalls where that information may
6936 not be available.
6937 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6938 the preceding args and about the function being called.
6939 NAMED is nonzero if this argument is a named parameter
6940 (otherwise it is an extra parameter matching an ellipsis). */
6942 static rtx
6943 function_arg_32 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6944 enum machine_mode orig_mode, const_tree type,
6945 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
6947 static bool warnedsse, warnedmmx;
6949 /* Avoid the AL settings for the Unix64 ABI. */
6950 if (mode == VOIDmode)
6951 return constm1_rtx;
6953 switch (mode)
6955 default:
6956 break;
6958 case BLKmode:
6959 if (bytes < 0)
6960 break;
6961 /* FALLTHRU */
6962 case DImode:
6963 case SImode:
6964 case HImode:
6965 case QImode:
6966 if (words <= cum->nregs)
6968 int regno = cum->regno;
6970 /* Fastcall allocates the first two DWORD (SImode) or
6971 smaller arguments to ECX and EDX if it isn't an
6972 aggregate type . */
6973 if (cum->fastcall)
6975 if (mode == BLKmode
6976 || mode == DImode
6977 || (type && AGGREGATE_TYPE_P (type)))
6978 break;
6980 /* ECX not EAX is the first allocated register. */
6981 if (regno == AX_REG)
6982 regno = CX_REG;
6984 return gen_rtx_REG (mode, regno);
6986 break;
6988 case DFmode:
6989 if (cum->float_in_sse < 2)
6990 break;
6991 case SFmode:
6992 if (cum->float_in_sse < 1)
6993 break;
6994 /* FALLTHRU */
6995 case TImode:
6996 /* In 32bit, we pass TImode in xmm registers. */
6997 case V16QImode:
6998 case V8HImode:
6999 case V4SImode:
7000 case V2DImode:
7001 case V4SFmode:
7002 case V2DFmode:
7003 if (!type || !AGGREGATE_TYPE_P (type))
7005 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
7007 warnedsse = true;
7008 warning (0, "SSE vector argument without SSE enabled "
7009 "changes the ABI");
7011 if (cum->sse_nregs)
7012 return gen_reg_or_parallel (mode, orig_mode,
7013 cum->sse_regno + FIRST_SSE_REG);
7015 break;
7017 case OImode:
7018 /* OImode shouldn't be used directly. */
7019 gcc_unreachable ();
7021 case V8SFmode:
7022 case V8SImode:
7023 case V32QImode:
7024 case V16HImode:
7025 case V4DFmode:
7026 case V4DImode:
7027 if (!type || !AGGREGATE_TYPE_P (type))
7029 if (cum->sse_nregs)
7030 return gen_reg_or_parallel (mode, orig_mode,
7031 cum->sse_regno + FIRST_SSE_REG);
7033 break;
7035 case V8QImode:
7036 case V4HImode:
7037 case V2SImode:
7038 case V2SFmode:
7039 case V1TImode:
7040 case V1DImode:
7041 if (!type || !AGGREGATE_TYPE_P (type))
7043 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
7045 warnedmmx = true;
7046 warning (0, "MMX vector argument without MMX enabled "
7047 "changes the ABI");
7049 if (cum->mmx_nregs)
7050 return gen_reg_or_parallel (mode, orig_mode,
7051 cum->mmx_regno + FIRST_MMX_REG);
7053 break;
7056 return NULL_RTX;
7059 static rtx
7060 function_arg_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
7061 enum machine_mode orig_mode, const_tree type, bool named)
7063 /* Handle a hidden AL argument containing number of registers
7064 for varargs x86-64 functions. */
7065 if (mode == VOIDmode)
7066 return GEN_INT (cum->maybe_vaarg
7067 ? (cum->sse_nregs < 0
7068 ? X86_64_SSE_REGPARM_MAX
7069 : cum->sse_regno)
7070 : -1);
7072 switch (mode)
7074 default:
7075 break;
7077 case V8SFmode:
7078 case V8SImode:
7079 case V32QImode:
7080 case V16HImode:
7081 case V4DFmode:
7082 case V4DImode:
7083 /* Unnamed 256bit vector mode parameters are passed on stack. */
7084 if (!named)
7085 return NULL;
7086 break;
7089 return construct_container (mode, orig_mode, type, 0, cum->nregs,
7090 cum->sse_nregs,
7091 &x86_64_int_parameter_registers [cum->regno],
7092 cum->sse_regno);
7095 static rtx
7096 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
7097 enum machine_mode orig_mode, bool named,
7098 HOST_WIDE_INT bytes)
7100 unsigned int regno;
7102 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
7103 We use value of -2 to specify that current function call is MSABI. */
7104 if (mode == VOIDmode)
7105 return GEN_INT (-2);
7107 /* If we've run out of registers, it goes on the stack. */
7108 if (cum->nregs == 0)
7109 return NULL_RTX;
7111 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
7113 /* Only floating point modes are passed in anything but integer regs. */
7114 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
7116 if (named)
7117 regno = cum->regno + FIRST_SSE_REG;
7118 else
7120 rtx t1, t2;
7122 /* Unnamed floating parameters are passed in both the
7123 SSE and integer registers. */
7124 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
7125 t2 = gen_rtx_REG (mode, regno);
7126 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
7127 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
7128 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
7131 /* Handle aggregated types passed in register. */
7132 if (orig_mode == BLKmode)
7134 if (bytes > 0 && bytes <= 8)
7135 mode = (bytes > 4 ? DImode : SImode);
7136 if (mode == BLKmode)
7137 mode = DImode;
7140 return gen_reg_or_parallel (mode, orig_mode, regno);
7143 /* Return where to put the arguments to a function.
7144 Return zero to push the argument on the stack, or a hard register in which to store the argument.
7146 MODE is the argument's machine mode. TYPE is the data type of the
7147 argument. It is null for libcalls where that information may not be
7148 available. CUM gives information about the preceding args and about
7149 the function being called. NAMED is nonzero if this argument is a
7150 named parameter (otherwise it is an extra parameter matching an
7151 ellipsis). */
7153 static rtx
7154 ix86_function_arg (CUMULATIVE_ARGS *cum, enum machine_mode omode,
7155 const_tree type, bool named)
7157 enum machine_mode mode = omode;
7158 HOST_WIDE_INT bytes, words;
7159 rtx arg;
7161 if (mode == BLKmode)
7162 bytes = int_size_in_bytes (type);
7163 else
7164 bytes = GET_MODE_SIZE (mode);
7165 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7167 /* To simplify the code below, represent vector types with a vector mode
7168 even if MMX/SSE are not active. */
7169 if (type && TREE_CODE (type) == VECTOR_TYPE)
7170 mode = type_natural_mode (type, cum);
7172 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7173 arg = function_arg_ms_64 (cum, mode, omode, named, bytes);
7174 else if (TARGET_64BIT)
7175 arg = function_arg_64 (cum, mode, omode, type, named);
7176 else
7177 arg = function_arg_32 (cum, mode, omode, type, bytes, words);
7179 if (TARGET_VZEROUPPER && function_pass_avx256_p (arg))
7181 /* This argument uses 256bit AVX modes. */
7182 if (cum->caller)
7183 cfun->machine->callee_pass_avx256_p = true;
7184 else
7185 cfun->machine->caller_pass_avx256_p = true;
7188 return arg;
7191 /* A C expression that indicates when an argument must be passed by
7192 reference. If nonzero for an argument, a copy of that argument is
7193 made in memory and a pointer to the argument is passed instead of
7194 the argument itself. The pointer is passed in whatever way is
7195 appropriate for passing a pointer to that type. */
7197 static bool
7198 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
7199 enum machine_mode mode ATTRIBUTE_UNUSED,
7200 const_tree type, bool named ATTRIBUTE_UNUSED)
7202 /* See Windows x64 Software Convention. */
7203 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7205 int msize = (int) GET_MODE_SIZE (mode);
7206 if (type)
7208 /* Arrays are passed by reference. */
7209 if (TREE_CODE (type) == ARRAY_TYPE)
7210 return true;
7212 if (AGGREGATE_TYPE_P (type))
7214 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
7215 are passed by reference. */
7216 msize = int_size_in_bytes (type);
7220 /* __m128 is passed by reference. */
7221 switch (msize) {
7222 case 1: case 2: case 4: case 8:
7223 break;
7224 default:
7225 return true;
7228 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
7229 return 1;
7231 return 0;
7234 /* Return true when TYPE should be 128bit aligned for 32bit argument
7235 passing ABI. XXX: This function is obsolete and is only used for
7236 checking psABI compatibility with previous versions of GCC. */
7238 static bool
7239 ix86_compat_aligned_value_p (const_tree type)
7241 enum machine_mode mode = TYPE_MODE (type);
7242 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
7243 || mode == TDmode
7244 || mode == TFmode
7245 || mode == TCmode)
7246 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
7247 return true;
7248 if (TYPE_ALIGN (type) < 128)
7249 return false;
7251 if (AGGREGATE_TYPE_P (type))
7253 /* Walk the aggregates recursively. */
7254 switch (TREE_CODE (type))
7256 case RECORD_TYPE:
7257 case UNION_TYPE:
7258 case QUAL_UNION_TYPE:
7260 tree field;
7262 /* Walk all the structure fields. */
7263 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
7265 if (TREE_CODE (field) == FIELD_DECL
7266 && ix86_compat_aligned_value_p (TREE_TYPE (field)))
7267 return true;
7269 break;
7272 case ARRAY_TYPE:
7273 /* Just for use if some languages passes arrays by value. */
7274 if (ix86_compat_aligned_value_p (TREE_TYPE (type)))
7275 return true;
7276 break;
7278 default:
7279 gcc_unreachable ();
7282 return false;
7285 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
7286 XXX: This function is obsolete and is only used for checking psABI
7287 compatibility with previous versions of GCC. */
7289 static unsigned int
7290 ix86_compat_function_arg_boundary (enum machine_mode mode,
7291 const_tree type, unsigned int align)
7293 /* In 32bit, only _Decimal128 and __float128 are aligned to their
7294 natural boundaries. */
7295 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
7297 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
7298 make an exception for SSE modes since these require 128bit
7299 alignment.
7301 The handling here differs from field_alignment. ICC aligns MMX
7302 arguments to 4 byte boundaries, while structure fields are aligned
7303 to 8 byte boundaries. */
7304 if (!type)
7306 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
7307 align = PARM_BOUNDARY;
7309 else
7311 if (!ix86_compat_aligned_value_p (type))
7312 align = PARM_BOUNDARY;
7315 if (align > BIGGEST_ALIGNMENT)
7316 align = BIGGEST_ALIGNMENT;
7317 return align;
7320 /* Return true when TYPE should be 128bit aligned for 32bit argument
7321 passing ABI. */
7323 static bool
7324 ix86_contains_aligned_value_p (const_tree type)
7326 enum machine_mode mode = TYPE_MODE (type);
7328 if (mode == XFmode || mode == XCmode)
7329 return false;
7331 if (TYPE_ALIGN (type) < 128)
7332 return false;
7334 if (AGGREGATE_TYPE_P (type))
7336 /* Walk the aggregates recursively. */
7337 switch (TREE_CODE (type))
7339 case RECORD_TYPE:
7340 case UNION_TYPE:
7341 case QUAL_UNION_TYPE:
7343 tree field;
7345 /* Walk all the structure fields. */
7346 for (field = TYPE_FIELDS (type);
7347 field;
7348 field = DECL_CHAIN (field))
7350 if (TREE_CODE (field) == FIELD_DECL
7351 && ix86_contains_aligned_value_p (TREE_TYPE (field)))
7352 return true;
7354 break;
7357 case ARRAY_TYPE:
7358 /* Just for use if some languages passes arrays by value. */
7359 if (ix86_contains_aligned_value_p (TREE_TYPE (type)))
7360 return true;
7361 break;
7363 default:
7364 gcc_unreachable ();
7367 else
7368 return TYPE_ALIGN (type) >= 128;
7370 return false;
7373 /* Gives the alignment boundary, in bits, of an argument with the
7374 specified mode and type. */
7376 static unsigned int
7377 ix86_function_arg_boundary (enum machine_mode mode, const_tree type)
7379 unsigned int align;
7380 if (type)
7382 /* Since the main variant type is used for call, we convert it to
7383 the main variant type. */
7384 type = TYPE_MAIN_VARIANT (type);
7385 align = TYPE_ALIGN (type);
7387 else
7388 align = GET_MODE_ALIGNMENT (mode);
7389 if (align < PARM_BOUNDARY)
7390 align = PARM_BOUNDARY;
7391 else
7393 static bool warned;
7394 unsigned int saved_align = align;
7396 if (!TARGET_64BIT)
7398 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
7399 if (!type)
7401 if (mode == XFmode || mode == XCmode)
7402 align = PARM_BOUNDARY;
7404 else if (!ix86_contains_aligned_value_p (type))
7405 align = PARM_BOUNDARY;
7407 if (align < 128)
7408 align = PARM_BOUNDARY;
7411 if (warn_psabi
7412 && !warned
7413 && align != ix86_compat_function_arg_boundary (mode, type,
7414 saved_align))
7416 warned = true;
7417 inform (input_location,
7418 "The ABI for passing parameters with %d-byte"
7419 " alignment has changed in GCC 4.6",
7420 align / BITS_PER_UNIT);
7424 return align;
7427 /* Return true if N is a possible register number of function value. */
7429 static bool
7430 ix86_function_value_regno_p (const unsigned int regno)
7432 switch (regno)
7434 case 0:
7435 return true;
7437 case FIRST_FLOAT_REG:
7438 /* TODO: The function should depend on current function ABI but
7439 builtins.c would need updating then. Therefore we use the
7440 default ABI. */
7441 if (TARGET_64BIT && ix86_abi == MS_ABI)
7442 return false;
7443 return TARGET_FLOAT_RETURNS_IN_80387;
7445 case FIRST_SSE_REG:
7446 return TARGET_SSE;
7448 case FIRST_MMX_REG:
7449 if (TARGET_MACHO || TARGET_64BIT)
7450 return false;
7451 return TARGET_MMX;
7454 return false;
7457 /* Define how to find the value returned by a function.
7458 VALTYPE is the data type of the value (as a tree).
7459 If the precise function being called is known, FUNC is its FUNCTION_DECL;
7460 otherwise, FUNC is 0. */
7462 static rtx
7463 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
7464 const_tree fntype, const_tree fn)
7466 unsigned int regno;
7468 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
7469 we normally prevent this case when mmx is not available. However
7470 some ABIs may require the result to be returned like DImode. */
7471 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
7472 regno = TARGET_MMX ? FIRST_MMX_REG : 0;
7474 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
7475 we prevent this case when sse is not available. However some ABIs
7476 may require the result to be returned like integer TImode. */
7477 else if (mode == TImode
7478 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
7479 regno = TARGET_SSE ? FIRST_SSE_REG : 0;
7481 /* 32-byte vector modes in %ymm0. */
7482 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
7483 regno = TARGET_AVX ? FIRST_SSE_REG : 0;
7485 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
7486 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
7487 regno = FIRST_FLOAT_REG;
7488 else
7489 /* Most things go in %eax. */
7490 regno = AX_REG;
7492 /* Override FP return register with %xmm0 for local functions when
7493 SSE math is enabled or for functions with sseregparm attribute. */
7494 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
7496 int sse_level = ix86_function_sseregparm (fntype, fn, false);
7497 if ((sse_level >= 1 && mode == SFmode)
7498 || (sse_level == 2 && mode == DFmode))
7499 regno = FIRST_SSE_REG;
7502 /* OImode shouldn't be used directly. */
7503 gcc_assert (mode != OImode);
7505 return gen_rtx_REG (orig_mode, regno);
7508 static rtx
7509 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
7510 const_tree valtype)
7512 rtx ret;
7514 /* Handle libcalls, which don't provide a type node. */
7515 if (valtype == NULL)
7517 switch (mode)
7519 case SFmode:
7520 case SCmode:
7521 case DFmode:
7522 case DCmode:
7523 case TFmode:
7524 case SDmode:
7525 case DDmode:
7526 case TDmode:
7527 return gen_rtx_REG (mode, FIRST_SSE_REG);
7528 case XFmode:
7529 case XCmode:
7530 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
7531 case TCmode:
7532 return NULL;
7533 default:
7534 return gen_rtx_REG (mode, AX_REG);
7538 ret = construct_container (mode, orig_mode, valtype, 1,
7539 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
7540 x86_64_int_return_registers, 0);
7542 /* For zero sized structures, construct_container returns NULL, but we
7543 need to keep rest of compiler happy by returning meaningful value. */
7544 if (!ret)
7545 ret = gen_rtx_REG (orig_mode, AX_REG);
7547 return ret;
7550 static rtx
7551 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode)
7553 unsigned int regno = AX_REG;
7555 if (TARGET_SSE)
7557 switch (GET_MODE_SIZE (mode))
7559 case 16:
7560 if((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
7561 && !COMPLEX_MODE_P (mode))
7562 regno = FIRST_SSE_REG;
7563 break;
7564 case 8:
7565 case 4:
7566 if (mode == SFmode || mode == DFmode)
7567 regno = FIRST_SSE_REG;
7568 break;
7569 default:
7570 break;
7573 return gen_rtx_REG (orig_mode, regno);
7576 static rtx
7577 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
7578 enum machine_mode orig_mode, enum machine_mode mode)
7580 const_tree fn, fntype;
7582 fn = NULL_TREE;
7583 if (fntype_or_decl && DECL_P (fntype_or_decl))
7584 fn = fntype_or_decl;
7585 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
7587 if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
7588 return function_value_ms_64 (orig_mode, mode);
7589 else if (TARGET_64BIT)
7590 return function_value_64 (orig_mode, mode, valtype);
7591 else
7592 return function_value_32 (orig_mode, mode, fntype, fn);
7595 static rtx
7596 ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
7597 bool outgoing ATTRIBUTE_UNUSED)
7599 enum machine_mode mode, orig_mode;
7601 orig_mode = TYPE_MODE (valtype);
7602 mode = type_natural_mode (valtype, NULL);
7603 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
7607 ix86_libcall_value (enum machine_mode mode)
7609 return ix86_function_value_1 (NULL, NULL, mode, mode);
7612 /* Return true iff type is returned in memory. */
7614 static bool ATTRIBUTE_UNUSED
7615 return_in_memory_32 (const_tree type, enum machine_mode mode)
7617 HOST_WIDE_INT size;
7619 if (mode == BLKmode)
7620 return true;
7622 size = int_size_in_bytes (type);
7624 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
7625 return false;
7627 if (VECTOR_MODE_P (mode) || mode == TImode)
7629 /* User-created vectors small enough to fit in EAX. */
7630 if (size < 8)
7631 return false;
7633 /* MMX/3dNow values are returned in MM0,
7634 except when it doesn't exits or the ABI prescribes otherwise. */
7635 if (size == 8)
7636 return !TARGET_MMX || TARGET_VECT8_RETURNS;
7638 /* SSE values are returned in XMM0, except when it doesn't exist. */
7639 if (size == 16)
7640 return !TARGET_SSE;
7642 /* AVX values are returned in YMM0, except when it doesn't exist. */
7643 if (size == 32)
7644 return !TARGET_AVX;
7647 if (mode == XFmode)
7648 return false;
7650 if (size > 12)
7651 return true;
7653 /* OImode shouldn't be used directly. */
7654 gcc_assert (mode != OImode);
7656 return false;
7659 static bool ATTRIBUTE_UNUSED
7660 return_in_memory_64 (const_tree type, enum machine_mode mode)
7662 int needed_intregs, needed_sseregs;
7663 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
7666 static bool ATTRIBUTE_UNUSED
7667 return_in_memory_ms_64 (const_tree type, enum machine_mode mode)
7669 HOST_WIDE_INT size = int_size_in_bytes (type);
7671 /* __m128 is returned in xmm0. */
7672 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
7673 && !COMPLEX_MODE_P (mode) && (GET_MODE_SIZE (mode) == 16 || size == 16))
7674 return false;
7676 /* Otherwise, the size must be exactly in [1248]. */
7677 return size != 1 && size != 2 && size != 4 && size != 8;
7680 static bool
7681 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
7683 #ifdef SUBTARGET_RETURN_IN_MEMORY
7684 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
7685 #else
7686 const enum machine_mode mode = type_natural_mode (type, NULL);
7688 if (TARGET_64BIT)
7690 if (ix86_function_type_abi (fntype) == MS_ABI)
7691 return return_in_memory_ms_64 (type, mode);
7692 else
7693 return return_in_memory_64 (type, mode);
7695 else
7696 return return_in_memory_32 (type, mode);
7697 #endif
7700 /* When returning SSE vector types, we have a choice of either
7701 (1) being abi incompatible with a -march switch, or
7702 (2) generating an error.
7703 Given no good solution, I think the safest thing is one warning.
7704 The user won't be able to use -Werror, but....
7706 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
7707 called in response to actually generating a caller or callee that
7708 uses such a type. As opposed to TARGET_RETURN_IN_MEMORY, which is called
7709 via aggregate_value_p for general type probing from tree-ssa. */
7711 static rtx
7712 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
7714 static bool warnedsse, warnedmmx;
7716 if (!TARGET_64BIT && type)
7718 /* Look at the return type of the function, not the function type. */
7719 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
7721 if (!TARGET_SSE && !warnedsse)
7723 if (mode == TImode
7724 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
7726 warnedsse = true;
7727 warning (0, "SSE vector return without SSE enabled "
7728 "changes the ABI");
7732 if (!TARGET_MMX && !warnedmmx)
7734 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
7736 warnedmmx = true;
7737 warning (0, "MMX vector return without MMX enabled "
7738 "changes the ABI");
7743 return NULL;
7747 /* Create the va_list data type. */
7749 /* Returns the calling convention specific va_list date type.
7750 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
7752 static tree
7753 ix86_build_builtin_va_list_abi (enum calling_abi abi)
7755 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
7757 /* For i386 we use plain pointer to argument area. */
7758 if (!TARGET_64BIT || abi == MS_ABI)
7759 return build_pointer_type (char_type_node);
7761 record = lang_hooks.types.make_type (RECORD_TYPE);
7762 type_decl = build_decl (BUILTINS_LOCATION,
7763 TYPE_DECL, get_identifier ("__va_list_tag"), record);
7765 f_gpr = build_decl (BUILTINS_LOCATION,
7766 FIELD_DECL, get_identifier ("gp_offset"),
7767 unsigned_type_node);
7768 f_fpr = build_decl (BUILTINS_LOCATION,
7769 FIELD_DECL, get_identifier ("fp_offset"),
7770 unsigned_type_node);
7771 f_ovf = build_decl (BUILTINS_LOCATION,
7772 FIELD_DECL, get_identifier ("overflow_arg_area"),
7773 ptr_type_node);
7774 f_sav = build_decl (BUILTINS_LOCATION,
7775 FIELD_DECL, get_identifier ("reg_save_area"),
7776 ptr_type_node);
7778 va_list_gpr_counter_field = f_gpr;
7779 va_list_fpr_counter_field = f_fpr;
7781 DECL_FIELD_CONTEXT (f_gpr) = record;
7782 DECL_FIELD_CONTEXT (f_fpr) = record;
7783 DECL_FIELD_CONTEXT (f_ovf) = record;
7784 DECL_FIELD_CONTEXT (f_sav) = record;
7786 TYPE_STUB_DECL (record) = type_decl;
7787 TYPE_NAME (record) = type_decl;
7788 TYPE_FIELDS (record) = f_gpr;
7789 DECL_CHAIN (f_gpr) = f_fpr;
7790 DECL_CHAIN (f_fpr) = f_ovf;
7791 DECL_CHAIN (f_ovf) = f_sav;
7793 layout_type (record);
7795 /* The correct type is an array type of one element. */
7796 return build_array_type (record, build_index_type (size_zero_node));
7799 /* Setup the builtin va_list data type and for 64-bit the additional
7800 calling convention specific va_list data types. */
7802 static tree
7803 ix86_build_builtin_va_list (void)
7805 tree ret = ix86_build_builtin_va_list_abi (ix86_abi);
7807 /* Initialize abi specific va_list builtin types. */
7808 if (TARGET_64BIT)
7810 tree t;
7811 if (ix86_abi == MS_ABI)
7813 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
7814 if (TREE_CODE (t) != RECORD_TYPE)
7815 t = build_variant_type_copy (t);
7816 sysv_va_list_type_node = t;
7818 else
7820 t = ret;
7821 if (TREE_CODE (t) != RECORD_TYPE)
7822 t = build_variant_type_copy (t);
7823 sysv_va_list_type_node = t;
7825 if (ix86_abi != MS_ABI)
7827 t = ix86_build_builtin_va_list_abi (MS_ABI);
7828 if (TREE_CODE (t) != RECORD_TYPE)
7829 t = build_variant_type_copy (t);
7830 ms_va_list_type_node = t;
7832 else
7834 t = ret;
7835 if (TREE_CODE (t) != RECORD_TYPE)
7836 t = build_variant_type_copy (t);
7837 ms_va_list_type_node = t;
7841 return ret;
7844 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
7846 static void
7847 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
7849 rtx save_area, mem;
7850 alias_set_type set;
7851 int i, max;
7853 /* GPR size of varargs save area. */
7854 if (cfun->va_list_gpr_size)
7855 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
7856 else
7857 ix86_varargs_gpr_size = 0;
7859 /* FPR size of varargs save area. We don't need it if we don't pass
7860 anything in SSE registers. */
7861 if (TARGET_SSE && cfun->va_list_fpr_size)
7862 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
7863 else
7864 ix86_varargs_fpr_size = 0;
7866 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
7867 return;
7869 save_area = frame_pointer_rtx;
7870 set = get_varargs_alias_set ();
7872 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
7873 if (max > X86_64_REGPARM_MAX)
7874 max = X86_64_REGPARM_MAX;
7876 for (i = cum->regno; i < max; i++)
7878 mem = gen_rtx_MEM (Pmode,
7879 plus_constant (save_area, i * UNITS_PER_WORD));
7880 MEM_NOTRAP_P (mem) = 1;
7881 set_mem_alias_set (mem, set);
7882 emit_move_insn (mem, gen_rtx_REG (Pmode,
7883 x86_64_int_parameter_registers[i]));
7886 if (ix86_varargs_fpr_size)
7888 enum machine_mode smode;
7889 rtx label, test;
7891 /* Now emit code to save SSE registers. The AX parameter contains number
7892 of SSE parameter registers used to call this function, though all we
7893 actually check here is the zero/non-zero status. */
7895 label = gen_label_rtx ();
7896 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
7897 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
7898 label));
7900 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
7901 we used movdqa (i.e. TImode) instead? Perhaps even better would
7902 be if we could determine the real mode of the data, via a hook
7903 into pass_stdarg. Ignore all that for now. */
7904 smode = V4SFmode;
7905 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
7906 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
7908 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
7909 if (max > X86_64_SSE_REGPARM_MAX)
7910 max = X86_64_SSE_REGPARM_MAX;
7912 for (i = cum->sse_regno; i < max; ++i)
7914 mem = plus_constant (save_area, i * 16 + ix86_varargs_gpr_size);
7915 mem = gen_rtx_MEM (smode, mem);
7916 MEM_NOTRAP_P (mem) = 1;
7917 set_mem_alias_set (mem, set);
7918 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
7920 emit_move_insn (mem, gen_rtx_REG (smode, SSE_REGNO (i)));
7923 emit_label (label);
7927 static void
7928 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
7930 alias_set_type set = get_varargs_alias_set ();
7931 int i;
7933 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
7935 rtx reg, mem;
7937 mem = gen_rtx_MEM (Pmode,
7938 plus_constant (virtual_incoming_args_rtx,
7939 i * UNITS_PER_WORD));
7940 MEM_NOTRAP_P (mem) = 1;
7941 set_mem_alias_set (mem, set);
7943 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
7944 emit_move_insn (mem, reg);
7948 static void
7949 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7950 tree type, int *pretend_size ATTRIBUTE_UNUSED,
7951 int no_rtl)
7953 CUMULATIVE_ARGS next_cum;
7954 tree fntype;
7956 /* This argument doesn't appear to be used anymore. Which is good,
7957 because the old code here didn't suppress rtl generation. */
7958 gcc_assert (!no_rtl);
7960 if (!TARGET_64BIT)
7961 return;
7963 fntype = TREE_TYPE (current_function_decl);
7965 /* For varargs, we do not want to skip the dummy va_dcl argument.
7966 For stdargs, we do want to skip the last named argument. */
7967 next_cum = *cum;
7968 if (stdarg_p (fntype))
7969 ix86_function_arg_advance (&next_cum, mode, type, true);
7971 if (cum->call_abi == MS_ABI)
7972 setup_incoming_varargs_ms_64 (&next_cum);
7973 else
7974 setup_incoming_varargs_64 (&next_cum);
7977 /* Checks if TYPE is of kind va_list char *. */
7979 static bool
7980 is_va_list_char_pointer (tree type)
7982 tree canonic;
7984 /* For 32-bit it is always true. */
7985 if (!TARGET_64BIT)
7986 return true;
7987 canonic = ix86_canonical_va_list_type (type);
7988 return (canonic == ms_va_list_type_node
7989 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
7992 /* Implement va_start. */
7994 static void
7995 ix86_va_start (tree valist, rtx nextarg)
7997 HOST_WIDE_INT words, n_gpr, n_fpr;
7998 tree f_gpr, f_fpr, f_ovf, f_sav;
7999 tree gpr, fpr, ovf, sav, t;
8000 tree type;
8001 rtx ovf_rtx;
8003 if (flag_split_stack
8004 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8006 unsigned int scratch_regno;
8008 /* When we are splitting the stack, we can't refer to the stack
8009 arguments using internal_arg_pointer, because they may be on
8010 the old stack. The split stack prologue will arrange to
8011 leave a pointer to the old stack arguments in a scratch
8012 register, which we here copy to a pseudo-register. The split
8013 stack prologue can't set the pseudo-register directly because
8014 it (the prologue) runs before any registers have been saved. */
8016 scratch_regno = split_stack_prologue_scratch_regno ();
8017 if (scratch_regno != INVALID_REGNUM)
8019 rtx reg, seq;
8021 reg = gen_reg_rtx (Pmode);
8022 cfun->machine->split_stack_varargs_pointer = reg;
8024 start_sequence ();
8025 emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
8026 seq = get_insns ();
8027 end_sequence ();
8029 push_topmost_sequence ();
8030 emit_insn_after (seq, entry_of_function ());
8031 pop_topmost_sequence ();
8035 /* Only 64bit target needs something special. */
8036 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
8038 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8039 std_expand_builtin_va_start (valist, nextarg);
8040 else
8042 rtx va_r, next;
8044 va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
8045 next = expand_binop (ptr_mode, add_optab,
8046 cfun->machine->split_stack_varargs_pointer,
8047 crtl->args.arg_offset_rtx,
8048 NULL_RTX, 0, OPTAB_LIB_WIDEN);
8049 convert_move (va_r, next, 0);
8051 return;
8054 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
8055 f_fpr = DECL_CHAIN (f_gpr);
8056 f_ovf = DECL_CHAIN (f_fpr);
8057 f_sav = DECL_CHAIN (f_ovf);
8059 valist = build_simple_mem_ref (valist);
8060 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
8061 /* The following should be folded into the MEM_REF offset. */
8062 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
8063 f_gpr, NULL_TREE);
8064 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
8065 f_fpr, NULL_TREE);
8066 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
8067 f_ovf, NULL_TREE);
8068 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
8069 f_sav, NULL_TREE);
8071 /* Count number of gp and fp argument registers used. */
8072 words = crtl->args.info.words;
8073 n_gpr = crtl->args.info.regno;
8074 n_fpr = crtl->args.info.sse_regno;
8076 if (cfun->va_list_gpr_size)
8078 type = TREE_TYPE (gpr);
8079 t = build2 (MODIFY_EXPR, type,
8080 gpr, build_int_cst (type, n_gpr * 8));
8081 TREE_SIDE_EFFECTS (t) = 1;
8082 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8085 if (TARGET_SSE && cfun->va_list_fpr_size)
8087 type = TREE_TYPE (fpr);
8088 t = build2 (MODIFY_EXPR, type, fpr,
8089 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
8090 TREE_SIDE_EFFECTS (t) = 1;
8091 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8094 /* Find the overflow area. */
8095 type = TREE_TYPE (ovf);
8096 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8097 ovf_rtx = crtl->args.internal_arg_pointer;
8098 else
8099 ovf_rtx = cfun->machine->split_stack_varargs_pointer;
8100 t = make_tree (type, ovf_rtx);
8101 if (words != 0)
8102 t = build2 (POINTER_PLUS_EXPR, type, t,
8103 size_int (words * UNITS_PER_WORD));
8104 t = build2 (MODIFY_EXPR, type, ovf, t);
8105 TREE_SIDE_EFFECTS (t) = 1;
8106 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8108 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
8110 /* Find the register save area.
8111 Prologue of the function save it right above stack frame. */
8112 type = TREE_TYPE (sav);
8113 t = make_tree (type, frame_pointer_rtx);
8114 if (!ix86_varargs_gpr_size)
8115 t = build2 (POINTER_PLUS_EXPR, type, t,
8116 size_int (-8 * X86_64_REGPARM_MAX));
8117 t = build2 (MODIFY_EXPR, type, sav, t);
8118 TREE_SIDE_EFFECTS (t) = 1;
8119 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8123 /* Implement va_arg. */
8125 static tree
8126 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
8127 gimple_seq *post_p)
8129 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
8130 tree f_gpr, f_fpr, f_ovf, f_sav;
8131 tree gpr, fpr, ovf, sav, t;
8132 int size, rsize;
8133 tree lab_false, lab_over = NULL_TREE;
8134 tree addr, t2;
8135 rtx container;
8136 int indirect_p = 0;
8137 tree ptrtype;
8138 enum machine_mode nat_mode;
8139 unsigned int arg_boundary;
8141 /* Only 64bit target needs something special. */
8142 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
8143 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
8145 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
8146 f_fpr = DECL_CHAIN (f_gpr);
8147 f_ovf = DECL_CHAIN (f_fpr);
8148 f_sav = DECL_CHAIN (f_ovf);
8150 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
8151 build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
8152 valist = build_va_arg_indirect_ref (valist);
8153 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
8154 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
8155 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
8157 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
8158 if (indirect_p)
8159 type = build_pointer_type (type);
8160 size = int_size_in_bytes (type);
8161 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
8163 nat_mode = type_natural_mode (type, NULL);
8164 switch (nat_mode)
8166 case V8SFmode:
8167 case V8SImode:
8168 case V32QImode:
8169 case V16HImode:
8170 case V4DFmode:
8171 case V4DImode:
8172 /* Unnamed 256bit vector mode parameters are passed on stack. */
8173 if (ix86_cfun_abi () == SYSV_ABI)
8175 container = NULL;
8176 break;
8179 default:
8180 container = construct_container (nat_mode, TYPE_MODE (type),
8181 type, 0, X86_64_REGPARM_MAX,
8182 X86_64_SSE_REGPARM_MAX, intreg,
8184 break;
8187 /* Pull the value out of the saved registers. */
8189 addr = create_tmp_var (ptr_type_node, "addr");
8191 if (container)
8193 int needed_intregs, needed_sseregs;
8194 bool need_temp;
8195 tree int_addr, sse_addr;
8197 lab_false = create_artificial_label (UNKNOWN_LOCATION);
8198 lab_over = create_artificial_label (UNKNOWN_LOCATION);
8200 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
8202 need_temp = (!REG_P (container)
8203 && ((needed_intregs && TYPE_ALIGN (type) > 64)
8204 || TYPE_ALIGN (type) > 128));
8206 /* In case we are passing structure, verify that it is consecutive block
8207 on the register save area. If not we need to do moves. */
8208 if (!need_temp && !REG_P (container))
8210 /* Verify that all registers are strictly consecutive */
8211 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
8213 int i;
8215 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
8217 rtx slot = XVECEXP (container, 0, i);
8218 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
8219 || INTVAL (XEXP (slot, 1)) != i * 16)
8220 need_temp = 1;
8223 else
8225 int i;
8227 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
8229 rtx slot = XVECEXP (container, 0, i);
8230 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
8231 || INTVAL (XEXP (slot, 1)) != i * 8)
8232 need_temp = 1;
8236 if (!need_temp)
8238 int_addr = addr;
8239 sse_addr = addr;
8241 else
8243 int_addr = create_tmp_var (ptr_type_node, "int_addr");
8244 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
8247 /* First ensure that we fit completely in registers. */
8248 if (needed_intregs)
8250 t = build_int_cst (TREE_TYPE (gpr),
8251 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
8252 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
8253 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
8254 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
8255 gimplify_and_add (t, pre_p);
8257 if (needed_sseregs)
8259 t = build_int_cst (TREE_TYPE (fpr),
8260 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
8261 + X86_64_REGPARM_MAX * 8);
8262 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
8263 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
8264 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
8265 gimplify_and_add (t, pre_p);
8268 /* Compute index to start of area used for integer regs. */
8269 if (needed_intregs)
8271 /* int_addr = gpr + sav; */
8272 t = fold_convert (sizetype, gpr);
8273 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
8274 gimplify_assign (int_addr, t, pre_p);
8276 if (needed_sseregs)
8278 /* sse_addr = fpr + sav; */
8279 t = fold_convert (sizetype, fpr);
8280 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
8281 gimplify_assign (sse_addr, t, pre_p);
8283 if (need_temp)
8285 int i, prev_size = 0;
8286 tree temp = create_tmp_var (type, "va_arg_tmp");
8288 /* addr = &temp; */
8289 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
8290 gimplify_assign (addr, t, pre_p);
8292 for (i = 0; i < XVECLEN (container, 0); i++)
8294 rtx slot = XVECEXP (container, 0, i);
8295 rtx reg = XEXP (slot, 0);
8296 enum machine_mode mode = GET_MODE (reg);
8297 tree piece_type;
8298 tree addr_type;
8299 tree daddr_type;
8300 tree src_addr, src;
8301 int src_offset;
8302 tree dest_addr, dest;
8303 int cur_size = GET_MODE_SIZE (mode);
8305 gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
8306 prev_size = INTVAL (XEXP (slot, 1));
8307 if (prev_size + cur_size > size)
8309 cur_size = size - prev_size;
8310 mode = mode_for_size (cur_size * BITS_PER_UNIT, MODE_INT, 1);
8311 if (mode == BLKmode)
8312 mode = QImode;
8314 piece_type = lang_hooks.types.type_for_mode (mode, 1);
8315 if (mode == GET_MODE (reg))
8316 addr_type = build_pointer_type (piece_type);
8317 else
8318 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
8319 true);
8320 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
8321 true);
8323 if (SSE_REGNO_P (REGNO (reg)))
8325 src_addr = sse_addr;
8326 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
8328 else
8330 src_addr = int_addr;
8331 src_offset = REGNO (reg) * 8;
8333 src_addr = fold_convert (addr_type, src_addr);
8334 src_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, src_addr,
8335 size_int (src_offset));
8337 dest_addr = fold_convert (daddr_type, addr);
8338 dest_addr = fold_build2 (POINTER_PLUS_EXPR, daddr_type, dest_addr,
8339 size_int (prev_size));
8340 if (cur_size == GET_MODE_SIZE (mode))
8342 src = build_va_arg_indirect_ref (src_addr);
8343 dest = build_va_arg_indirect_ref (dest_addr);
8345 gimplify_assign (dest, src, pre_p);
8347 else
8349 tree copy
8350 = build_call_expr (implicit_built_in_decls[BUILT_IN_MEMCPY],
8351 3, dest_addr, src_addr,
8352 size_int (cur_size));
8353 gimplify_and_add (copy, pre_p);
8355 prev_size += cur_size;
8359 if (needed_intregs)
8361 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
8362 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
8363 gimplify_assign (gpr, t, pre_p);
8366 if (needed_sseregs)
8368 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
8369 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
8370 gimplify_assign (fpr, t, pre_p);
8373 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
8375 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
8378 /* ... otherwise out of the overflow area. */
8380 /* When we align parameter on stack for caller, if the parameter
8381 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
8382 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
8383 here with caller. */
8384 arg_boundary = ix86_function_arg_boundary (VOIDmode, type);
8385 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
8386 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
8388 /* Care for on-stack alignment if needed. */
8389 if (arg_boundary <= 64 || size == 0)
8390 t = ovf;
8391 else
8393 HOST_WIDE_INT align = arg_boundary / 8;
8394 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (ovf), ovf,
8395 size_int (align - 1));
8396 t = fold_convert (sizetype, t);
8397 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
8398 size_int (-align));
8399 t = fold_convert (TREE_TYPE (ovf), t);
8402 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
8403 gimplify_assign (addr, t, pre_p);
8405 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (t), t,
8406 size_int (rsize * UNITS_PER_WORD));
8407 gimplify_assign (unshare_expr (ovf), t, pre_p);
8409 if (container)
8410 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
8412 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
8413 addr = fold_convert (ptrtype, addr);
8415 if (indirect_p)
8416 addr = build_va_arg_indirect_ref (addr);
8417 return build_va_arg_indirect_ref (addr);
8420 /* Return true if OPNUM's MEM should be matched
8421 in movabs* patterns. */
8423 bool
8424 ix86_check_movabs (rtx insn, int opnum)
8426 rtx set, mem;
8428 set = PATTERN (insn);
8429 if (GET_CODE (set) == PARALLEL)
8430 set = XVECEXP (set, 0, 0);
8431 gcc_assert (GET_CODE (set) == SET);
8432 mem = XEXP (set, opnum);
8433 while (GET_CODE (mem) == SUBREG)
8434 mem = SUBREG_REG (mem);
8435 gcc_assert (MEM_P (mem));
8436 return volatile_ok || !MEM_VOLATILE_P (mem);
8439 /* Initialize the table of extra 80387 mathematical constants. */
8441 static void
8442 init_ext_80387_constants (void)
8444 static const char * cst[5] =
8446 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
8447 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
8448 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
8449 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
8450 "3.1415926535897932385128089594061862044", /* 4: fldpi */
8452 int i;
8454 for (i = 0; i < 5; i++)
8456 real_from_string (&ext_80387_constants_table[i], cst[i]);
8457 /* Ensure each constant is rounded to XFmode precision. */
8458 real_convert (&ext_80387_constants_table[i],
8459 XFmode, &ext_80387_constants_table[i]);
8462 ext_80387_constants_init = 1;
8465 /* Return non-zero if the constant is something that
8466 can be loaded with a special instruction. */
8469 standard_80387_constant_p (rtx x)
8471 enum machine_mode mode = GET_MODE (x);
8473 REAL_VALUE_TYPE r;
8475 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
8476 return -1;
8478 if (x == CONST0_RTX (mode))
8479 return 1;
8480 if (x == CONST1_RTX (mode))
8481 return 2;
8483 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8485 /* For XFmode constants, try to find a special 80387 instruction when
8486 optimizing for size or on those CPUs that benefit from them. */
8487 if (mode == XFmode
8488 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
8490 int i;
8492 if (! ext_80387_constants_init)
8493 init_ext_80387_constants ();
8495 for (i = 0; i < 5; i++)
8496 if (real_identical (&r, &ext_80387_constants_table[i]))
8497 return i + 3;
8500 /* Load of the constant -0.0 or -1.0 will be split as
8501 fldz;fchs or fld1;fchs sequence. */
8502 if (real_isnegzero (&r))
8503 return 8;
8504 if (real_identical (&r, &dconstm1))
8505 return 9;
8507 return 0;
8510 /* Return the opcode of the special instruction to be used to load
8511 the constant X. */
8513 const char *
8514 standard_80387_constant_opcode (rtx x)
8516 switch (standard_80387_constant_p (x))
8518 case 1:
8519 return "fldz";
8520 case 2:
8521 return "fld1";
8522 case 3:
8523 return "fldlg2";
8524 case 4:
8525 return "fldln2";
8526 case 5:
8527 return "fldl2e";
8528 case 6:
8529 return "fldl2t";
8530 case 7:
8531 return "fldpi";
8532 case 8:
8533 case 9:
8534 return "#";
8535 default:
8536 gcc_unreachable ();
8540 /* Return the CONST_DOUBLE representing the 80387 constant that is
8541 loaded by the specified special instruction. The argument IDX
8542 matches the return value from standard_80387_constant_p. */
8545 standard_80387_constant_rtx (int idx)
8547 int i;
8549 if (! ext_80387_constants_init)
8550 init_ext_80387_constants ();
8552 switch (idx)
8554 case 3:
8555 case 4:
8556 case 5:
8557 case 6:
8558 case 7:
8559 i = idx - 3;
8560 break;
8562 default:
8563 gcc_unreachable ();
8566 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
8567 XFmode);
8570 /* Return 1 if X is all 0s and 2 if x is all 1s
8571 in supported SSE vector mode. */
8574 standard_sse_constant_p (rtx x)
8576 enum machine_mode mode = GET_MODE (x);
8578 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
8579 return 1;
8580 if (vector_all_ones_operand (x, mode))
8581 switch (mode)
8583 case V16QImode:
8584 case V8HImode:
8585 case V4SImode:
8586 case V2DImode:
8587 if (TARGET_SSE2)
8588 return 2;
8589 default:
8590 break;
8593 return 0;
8596 /* Return the opcode of the special instruction to be used to load
8597 the constant X. */
8599 const char *
8600 standard_sse_constant_opcode (rtx insn, rtx x)
8602 switch (standard_sse_constant_p (x))
8604 case 1:
8605 switch (get_attr_mode (insn))
8607 case MODE_V4SF:
8608 return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
8609 case MODE_V2DF:
8610 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
8611 return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
8612 else
8613 return TARGET_AVX ? "vxorpd\t%0, %0, %0" : "xorpd\t%0, %0";
8614 case MODE_TI:
8615 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
8616 return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
8617 else
8618 return TARGET_AVX ? "vpxor\t%0, %0, %0" : "pxor\t%0, %0";
8619 case MODE_V8SF:
8620 return "vxorps\t%x0, %x0, %x0";
8621 case MODE_V4DF:
8622 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
8623 return "vxorps\t%x0, %x0, %x0";
8624 else
8625 return "vxorpd\t%x0, %x0, %x0";
8626 case MODE_OI:
8627 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
8628 return "vxorps\t%x0, %x0, %x0";
8629 else
8630 return "vpxor\t%x0, %x0, %x0";
8631 default:
8632 break;
8634 case 2:
8635 return TARGET_AVX ? "vpcmpeqd\t%0, %0, %0" : "pcmpeqd\t%0, %0";
8636 default:
8637 break;
8639 gcc_unreachable ();
8642 /* Returns true if OP contains a symbol reference */
8644 bool
8645 symbolic_reference_mentioned_p (rtx op)
8647 const char *fmt;
8648 int i;
8650 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
8651 return true;
8653 fmt = GET_RTX_FORMAT (GET_CODE (op));
8654 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
8656 if (fmt[i] == 'E')
8658 int j;
8660 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
8661 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
8662 return true;
8665 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
8666 return true;
8669 return false;
8672 /* Return true if it is appropriate to emit `ret' instructions in the
8673 body of a function. Do this only if the epilogue is simple, needing a
8674 couple of insns. Prior to reloading, we can't tell how many registers
8675 must be saved, so return false then. Return false if there is no frame
8676 marker to de-allocate. */
8678 bool
8679 ix86_can_use_return_insn_p (void)
8681 struct ix86_frame frame;
8683 if (! reload_completed || frame_pointer_needed)
8684 return 0;
8686 /* Don't allow more than 32k pop, since that's all we can do
8687 with one instruction. */
8688 if (crtl->args.pops_args && crtl->args.size >= 32768)
8689 return 0;
8691 ix86_compute_frame_layout (&frame);
8692 return (frame.stack_pointer_offset == UNITS_PER_WORD
8693 && (frame.nregs + frame.nsseregs) == 0);
8696 /* Value should be nonzero if functions must have frame pointers.
8697 Zero means the frame pointer need not be set up (and parms may
8698 be accessed via the stack pointer) in functions that seem suitable. */
8700 static bool
8701 ix86_frame_pointer_required (void)
8703 /* If we accessed previous frames, then the generated code expects
8704 to be able to access the saved ebp value in our frame. */
8705 if (cfun->machine->accesses_prev_frame)
8706 return true;
8708 /* Several x86 os'es need a frame pointer for other reasons,
8709 usually pertaining to setjmp. */
8710 if (SUBTARGET_FRAME_POINTER_REQUIRED)
8711 return true;
8713 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
8714 turns off the frame pointer by default. Turn it back on now if
8715 we've not got a leaf function. */
8716 if (TARGET_OMIT_LEAF_FRAME_POINTER
8717 && (!current_function_is_leaf
8718 || ix86_current_function_calls_tls_descriptor))
8719 return true;
8721 if (crtl->profile && !flag_fentry)
8722 return true;
8724 return false;
8727 /* Record that the current function accesses previous call frames. */
8729 void
8730 ix86_setup_frame_addresses (void)
8732 cfun->machine->accesses_prev_frame = 1;
8735 #ifndef USE_HIDDEN_LINKONCE
8736 # if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
8737 # define USE_HIDDEN_LINKONCE 1
8738 # else
8739 # define USE_HIDDEN_LINKONCE 0
8740 # endif
8741 #endif
8743 static int pic_labels_used;
8745 /* Fills in the label name that should be used for a pc thunk for
8746 the given register. */
8748 static void
8749 get_pc_thunk_name (char name[32], unsigned int regno)
8751 gcc_assert (!TARGET_64BIT);
8753 if (USE_HIDDEN_LINKONCE)
8754 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
8755 else
8756 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
8760 /* This function generates code for -fpic that loads %ebx with
8761 the return address of the caller and then returns. */
8763 static void
8764 ix86_code_end (void)
8766 rtx xops[2];
8767 int regno;
8769 for (regno = AX_REG; regno <= SP_REG; regno++)
8771 char name[32];
8772 tree decl;
8774 if (!(pic_labels_used & (1 << regno)))
8775 continue;
8777 get_pc_thunk_name (name, regno);
8779 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
8780 get_identifier (name),
8781 build_function_type (void_type_node, void_list_node));
8782 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
8783 NULL_TREE, void_type_node);
8784 TREE_PUBLIC (decl) = 1;
8785 TREE_STATIC (decl) = 1;
8787 #if TARGET_MACHO
8788 if (TARGET_MACHO)
8790 switch_to_section (darwin_sections[text_coal_section]);
8791 fputs ("\t.weak_definition\t", asm_out_file);
8792 assemble_name (asm_out_file, name);
8793 fputs ("\n\t.private_extern\t", asm_out_file);
8794 assemble_name (asm_out_file, name);
8795 putc ('\n', asm_out_file);
8796 ASM_OUTPUT_LABEL (asm_out_file, name);
8797 DECL_WEAK (decl) = 1;
8799 else
8800 #endif
8801 if (USE_HIDDEN_LINKONCE)
8803 DECL_COMDAT_GROUP (decl) = DECL_ASSEMBLER_NAME (decl);
8805 targetm.asm_out.unique_section (decl, 0);
8806 switch_to_section (get_named_section (decl, NULL, 0));
8808 targetm.asm_out.globalize_label (asm_out_file, name);
8809 fputs ("\t.hidden\t", asm_out_file);
8810 assemble_name (asm_out_file, name);
8811 putc ('\n', asm_out_file);
8812 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
8814 else
8816 switch_to_section (text_section);
8817 ASM_OUTPUT_LABEL (asm_out_file, name);
8820 DECL_INITIAL (decl) = make_node (BLOCK);
8821 current_function_decl = decl;
8822 init_function_start (decl);
8823 first_function_block_is_cold = false;
8824 /* Make sure unwind info is emitted for the thunk if needed. */
8825 final_start_function (emit_barrier (), asm_out_file, 1);
8827 /* Pad stack IP move with 4 instructions (two NOPs count
8828 as one instruction). */
8829 if (TARGET_PAD_SHORT_FUNCTION)
8831 int i = 8;
8833 while (i--)
8834 fputs ("\tnop\n", asm_out_file);
8837 xops[0] = gen_rtx_REG (Pmode, regno);
8838 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
8839 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
8840 fputs ("\tret\n", asm_out_file);
8841 final_end_function ();
8842 init_insn_lengths ();
8843 free_after_compilation (cfun);
8844 set_cfun (NULL);
8845 current_function_decl = NULL;
8848 if (flag_split_stack)
8849 file_end_indicate_split_stack ();
8852 /* Emit code for the SET_GOT patterns. */
8854 const char *
8855 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
8857 rtx xops[3];
8859 xops[0] = dest;
8861 if (TARGET_VXWORKS_RTP && flag_pic)
8863 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
8864 xops[2] = gen_rtx_MEM (Pmode,
8865 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
8866 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
8868 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
8869 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
8870 an unadorned address. */
8871 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
8872 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
8873 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
8874 return "";
8877 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
8879 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
8881 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
8883 if (!flag_pic)
8884 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
8885 else
8887 output_asm_insn ("call\t%a2", xops);
8888 #ifdef DWARF2_UNWIND_INFO
8889 /* The call to next label acts as a push. */
8890 if (dwarf2out_do_frame ())
8892 rtx insn;
8893 start_sequence ();
8894 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
8895 gen_rtx_PLUS (Pmode,
8896 stack_pointer_rtx,
8897 GEN_INT (-4))));
8898 RTX_FRAME_RELATED_P (insn) = 1;
8899 dwarf2out_frame_debug (insn, true);
8900 end_sequence ();
8902 #endif
8905 #if TARGET_MACHO
8906 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8907 is what will be referenced by the Mach-O PIC subsystem. */
8908 if (!label)
8909 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
8910 #endif
8912 targetm.asm_out.internal_label (asm_out_file, "L",
8913 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
8915 if (flag_pic)
8917 output_asm_insn ("pop%z0\t%0", xops);
8918 #ifdef DWARF2_UNWIND_INFO
8919 /* The pop is a pop and clobbers dest, but doesn't restore it
8920 for unwind info purposes. */
8921 if (dwarf2out_do_frame ())
8923 rtx insn;
8924 start_sequence ();
8925 insn = emit_insn (gen_rtx_SET (VOIDmode, dest, const0_rtx));
8926 dwarf2out_frame_debug (insn, true);
8927 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
8928 gen_rtx_PLUS (Pmode,
8929 stack_pointer_rtx,
8930 GEN_INT (4))));
8931 RTX_FRAME_RELATED_P (insn) = 1;
8932 dwarf2out_frame_debug (insn, true);
8933 end_sequence ();
8935 #endif
8938 else
8940 char name[32];
8941 get_pc_thunk_name (name, REGNO (dest));
8942 pic_labels_used |= 1 << REGNO (dest);
8944 #ifdef DWARF2_UNWIND_INFO
8945 /* Ensure all queued register saves are flushed before the
8946 call. */
8947 if (dwarf2out_do_frame ())
8948 dwarf2out_flush_queued_reg_saves ();
8949 #endif
8950 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
8951 xops[2] = gen_rtx_MEM (QImode, xops[2]);
8952 output_asm_insn ("call\t%X2", xops);
8953 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8954 is what will be referenced by the Mach-O PIC subsystem. */
8955 #if TARGET_MACHO
8956 if (!label)
8957 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
8958 else
8959 targetm.asm_out.internal_label (asm_out_file, "L",
8960 CODE_LABEL_NUMBER (label));
8961 #endif
8964 if (TARGET_MACHO)
8965 return "";
8967 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
8968 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
8969 else
8970 output_asm_insn ("add%z0\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
8972 return "";
8975 /* Generate an "push" pattern for input ARG. */
8977 static rtx
8978 gen_push (rtx arg)
8980 struct machine_function *m = cfun->machine;
8982 if (m->fs.cfa_reg == stack_pointer_rtx)
8983 m->fs.cfa_offset += UNITS_PER_WORD;
8984 m->fs.sp_offset += UNITS_PER_WORD;
8986 return gen_rtx_SET (VOIDmode,
8987 gen_rtx_MEM (Pmode,
8988 gen_rtx_PRE_DEC (Pmode,
8989 stack_pointer_rtx)),
8990 arg);
8993 /* Generate an "pop" pattern for input ARG. */
8995 static rtx
8996 gen_pop (rtx arg)
8998 return gen_rtx_SET (VOIDmode,
8999 arg,
9000 gen_rtx_MEM (Pmode,
9001 gen_rtx_POST_INC (Pmode,
9002 stack_pointer_rtx)));
9005 /* Return >= 0 if there is an unused call-clobbered register available
9006 for the entire function. */
9008 static unsigned int
9009 ix86_select_alt_pic_regnum (void)
9011 if (current_function_is_leaf
9012 && !crtl->profile
9013 && !ix86_current_function_calls_tls_descriptor)
9015 int i, drap;
9016 /* Can't use the same register for both PIC and DRAP. */
9017 if (crtl->drap_reg)
9018 drap = REGNO (crtl->drap_reg);
9019 else
9020 drap = -1;
9021 for (i = 2; i >= 0; --i)
9022 if (i != drap && !df_regs_ever_live_p (i))
9023 return i;
9026 return INVALID_REGNUM;
9029 /* Return 1 if we need to save REGNO. */
9030 static int
9031 ix86_save_reg (unsigned int regno, int maybe_eh_return)
9033 if (pic_offset_table_rtx
9034 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
9035 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
9036 || crtl->profile
9037 || crtl->calls_eh_return
9038 || crtl->uses_const_pool))
9040 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
9041 return 0;
9042 return 1;
9045 if (crtl->calls_eh_return && maybe_eh_return)
9047 unsigned i;
9048 for (i = 0; ; i++)
9050 unsigned test = EH_RETURN_DATA_REGNO (i);
9051 if (test == INVALID_REGNUM)
9052 break;
9053 if (test == regno)
9054 return 1;
9058 if (crtl->drap_reg && regno == REGNO (crtl->drap_reg))
9059 return 1;
9061 return (df_regs_ever_live_p (regno)
9062 && !call_used_regs[regno]
9063 && !fixed_regs[regno]
9064 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
9067 /* Return number of saved general prupose registers. */
9069 static int
9070 ix86_nsaved_regs (void)
9072 int nregs = 0;
9073 int regno;
9075 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9076 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9077 nregs ++;
9078 return nregs;
9081 /* Return number of saved SSE registrers. */
9083 static int
9084 ix86_nsaved_sseregs (void)
9086 int nregs = 0;
9087 int regno;
9089 if (ix86_cfun_abi () != MS_ABI)
9090 return 0;
9091 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9092 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9093 nregs ++;
9094 return nregs;
9097 /* Given FROM and TO register numbers, say whether this elimination is
9098 allowed. If stack alignment is needed, we can only replace argument
9099 pointer with hard frame pointer, or replace frame pointer with stack
9100 pointer. Otherwise, frame pointer elimination is automatically
9101 handled and all other eliminations are valid. */
9103 static bool
9104 ix86_can_eliminate (const int from, const int to)
9106 if (stack_realign_fp)
9107 return ((from == ARG_POINTER_REGNUM
9108 && to == HARD_FRAME_POINTER_REGNUM)
9109 || (from == FRAME_POINTER_REGNUM
9110 && to == STACK_POINTER_REGNUM));
9111 else
9112 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
9115 /* Return the offset between two registers, one to be eliminated, and the other
9116 its replacement, at the start of a routine. */
9118 HOST_WIDE_INT
9119 ix86_initial_elimination_offset (int from, int to)
9121 struct ix86_frame frame;
9122 ix86_compute_frame_layout (&frame);
9124 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
9125 return frame.hard_frame_pointer_offset;
9126 else if (from == FRAME_POINTER_REGNUM
9127 && to == HARD_FRAME_POINTER_REGNUM)
9128 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
9129 else
9131 gcc_assert (to == STACK_POINTER_REGNUM);
9133 if (from == ARG_POINTER_REGNUM)
9134 return frame.stack_pointer_offset;
9136 gcc_assert (from == FRAME_POINTER_REGNUM);
9137 return frame.stack_pointer_offset - frame.frame_pointer_offset;
9141 /* In a dynamically-aligned function, we can't know the offset from
9142 stack pointer to frame pointer, so we must ensure that setjmp
9143 eliminates fp against the hard fp (%ebp) rather than trying to
9144 index from %esp up to the top of the frame across a gap that is
9145 of unknown (at compile-time) size. */
9146 static rtx
9147 ix86_builtin_setjmp_frame_value (void)
9149 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
9152 /* On the x86 -fsplit-stack and -fstack-protector both use the same
9153 field in the TCB, so they can not be used together. */
9155 static bool
9156 ix86_supports_split_stack (bool report ATTRIBUTE_UNUSED,
9157 struct gcc_options *opts ATTRIBUTE_UNUSED)
9159 bool ret = true;
9161 #ifndef TARGET_THREAD_SPLIT_STACK_OFFSET
9162 if (report)
9163 error ("%<-fsplit-stack%> currently only supported on GNU/Linux");
9164 ret = false;
9165 #else
9166 if (!HAVE_GAS_CFI_PERSONALITY_DIRECTIVE)
9168 if (report)
9169 error ("%<-fsplit-stack%> requires "
9170 "assembler support for CFI directives");
9171 ret = false;
9173 #endif
9175 return ret;
9178 /* When using -fsplit-stack, the allocation routines set a field in
9179 the TCB to the bottom of the stack plus this much space, measured
9180 in bytes. */
9182 #define SPLIT_STACK_AVAILABLE 256
9184 /* Fill structure ix86_frame about frame of currently computed function. */
9186 static void
9187 ix86_compute_frame_layout (struct ix86_frame *frame)
9189 unsigned int stack_alignment_needed;
9190 HOST_WIDE_INT offset;
9191 unsigned int preferred_alignment;
9192 HOST_WIDE_INT size = get_frame_size ();
9193 HOST_WIDE_INT to_allocate;
9195 frame->nregs = ix86_nsaved_regs ();
9196 frame->nsseregs = ix86_nsaved_sseregs ();
9198 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
9199 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
9201 /* MS ABI seem to require stack alignment to be always 16 except for function
9202 prologues and leaf. */
9203 if ((ix86_cfun_abi () == MS_ABI && preferred_alignment < 16)
9204 && (!current_function_is_leaf || cfun->calls_alloca != 0
9205 || ix86_current_function_calls_tls_descriptor))
9207 preferred_alignment = 16;
9208 stack_alignment_needed = 16;
9209 crtl->preferred_stack_boundary = 128;
9210 crtl->stack_alignment_needed = 128;
9213 gcc_assert (!size || stack_alignment_needed);
9214 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
9215 gcc_assert (preferred_alignment <= stack_alignment_needed);
9217 /* For SEH we have to limit the amount of code movement into the prologue.
9218 At present we do this via a BLOCKAGE, at which point there's very little
9219 scheduling that can be done, which means that there's very little point
9220 in doing anything except PUSHs. */
9221 if (TARGET_SEH)
9222 cfun->machine->use_fast_prologue_epilogue = false;
9224 /* During reload iteration the amount of registers saved can change.
9225 Recompute the value as needed. Do not recompute when amount of registers
9226 didn't change as reload does multiple calls to the function and does not
9227 expect the decision to change within single iteration. */
9228 else if (!optimize_function_for_size_p (cfun)
9229 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
9231 int count = frame->nregs;
9232 struct cgraph_node *node = cgraph_node (current_function_decl);
9234 cfun->machine->use_fast_prologue_epilogue_nregs = count;
9236 /* The fast prologue uses move instead of push to save registers. This
9237 is significantly longer, but also executes faster as modern hardware
9238 can execute the moves in parallel, but can't do that for push/pop.
9240 Be careful about choosing what prologue to emit: When function takes
9241 many instructions to execute we may use slow version as well as in
9242 case function is known to be outside hot spot (this is known with
9243 feedback only). Weight the size of function by number of registers
9244 to save as it is cheap to use one or two push instructions but very
9245 slow to use many of them. */
9246 if (count)
9247 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
9248 if (node->frequency < NODE_FREQUENCY_NORMAL
9249 || (flag_branch_probabilities
9250 && node->frequency < NODE_FREQUENCY_HOT))
9251 cfun->machine->use_fast_prologue_epilogue = false;
9252 else
9253 cfun->machine->use_fast_prologue_epilogue
9254 = !expensive_function_p (count);
9256 if (TARGET_PROLOGUE_USING_MOVE
9257 && cfun->machine->use_fast_prologue_epilogue)
9258 frame->save_regs_using_mov = true;
9259 else
9260 frame->save_regs_using_mov = false;
9262 /* If static stack checking is enabled and done with probes, the registers
9263 need to be saved before allocating the frame. */
9264 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
9265 frame->save_regs_using_mov = false;
9267 /* Skip return address. */
9268 offset = UNITS_PER_WORD;
9270 /* Skip pushed static chain. */
9271 if (ix86_static_chain_on_stack)
9272 offset += UNITS_PER_WORD;
9274 /* Skip saved base pointer. */
9275 if (frame_pointer_needed)
9276 offset += UNITS_PER_WORD;
9277 frame->hfp_save_offset = offset;
9279 /* The traditional frame pointer location is at the top of the frame. */
9280 frame->hard_frame_pointer_offset = offset;
9282 /* Register save area */
9283 offset += frame->nregs * UNITS_PER_WORD;
9284 frame->reg_save_offset = offset;
9286 /* Align and set SSE register save area. */
9287 if (frame->nsseregs)
9289 /* The only ABI that has saved SSE registers (Win64) also has a
9290 16-byte aligned default stack, and thus we don't need to be
9291 within the re-aligned local stack frame to save them. */
9292 gcc_assert (INCOMING_STACK_BOUNDARY >= 128);
9293 offset = (offset + 16 - 1) & -16;
9294 offset += frame->nsseregs * 16;
9296 frame->sse_reg_save_offset = offset;
9298 /* The re-aligned stack starts here. Values before this point are not
9299 directly comparable with values below this point. In order to make
9300 sure that no value happens to be the same before and after, force
9301 the alignment computation below to add a non-zero value. */
9302 if (stack_realign_fp)
9303 offset = (offset + stack_alignment_needed) & -stack_alignment_needed;
9305 /* Va-arg area */
9306 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
9307 offset += frame->va_arg_size;
9309 /* Align start of frame for local function. */
9310 offset = (offset + stack_alignment_needed - 1) & -stack_alignment_needed;
9312 /* Frame pointer points here. */
9313 frame->frame_pointer_offset = offset;
9315 offset += size;
9317 /* Add outgoing arguments area. Can be skipped if we eliminated
9318 all the function calls as dead code.
9319 Skipping is however impossible when function calls alloca. Alloca
9320 expander assumes that last crtl->outgoing_args_size
9321 of stack frame are unused. */
9322 if (ACCUMULATE_OUTGOING_ARGS
9323 && (!current_function_is_leaf || cfun->calls_alloca
9324 || ix86_current_function_calls_tls_descriptor))
9326 offset += crtl->outgoing_args_size;
9327 frame->outgoing_arguments_size = crtl->outgoing_args_size;
9329 else
9330 frame->outgoing_arguments_size = 0;
9332 /* Align stack boundary. Only needed if we're calling another function
9333 or using alloca. */
9334 if (!current_function_is_leaf || cfun->calls_alloca
9335 || ix86_current_function_calls_tls_descriptor)
9336 offset = (offset + preferred_alignment - 1) & -preferred_alignment;
9338 /* We've reached end of stack frame. */
9339 frame->stack_pointer_offset = offset;
9341 /* Size prologue needs to allocate. */
9342 to_allocate = offset - frame->sse_reg_save_offset;
9344 if ((!to_allocate && frame->nregs <= 1)
9345 || (TARGET_64BIT && to_allocate >= (HOST_WIDE_INT) 0x80000000))
9346 frame->save_regs_using_mov = false;
9348 if (ix86_using_red_zone ()
9349 && current_function_sp_is_unchanging
9350 && current_function_is_leaf
9351 && !ix86_current_function_calls_tls_descriptor)
9353 frame->red_zone_size = to_allocate;
9354 if (frame->save_regs_using_mov)
9355 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
9356 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
9357 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
9359 else
9360 frame->red_zone_size = 0;
9361 frame->stack_pointer_offset -= frame->red_zone_size;
9363 /* The SEH frame pointer location is near the bottom of the frame.
9364 This is enforced by the fact that the difference between the
9365 stack pointer and the frame pointer is limited to 240 bytes in
9366 the unwind data structure. */
9367 if (TARGET_SEH)
9369 HOST_WIDE_INT diff;
9371 /* If we can leave the frame pointer where it is, do so. */
9372 diff = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
9373 if (diff > 240 || (diff & 15) != 0)
9375 /* Ideally we'd determine what portion of the local stack frame
9376 (within the constraint of the lowest 240) is most heavily used.
9377 But without that complication, simply bias the frame pointer
9378 by 128 bytes so as to maximize the amount of the local stack
9379 frame that is addressable with 8-bit offsets. */
9380 frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128;
9385 /* This is semi-inlined memory_address_length, but simplified
9386 since we know that we're always dealing with reg+offset, and
9387 to avoid having to create and discard all that rtl. */
9389 static inline int
9390 choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
9392 int len = 4;
9394 if (offset == 0)
9396 /* EBP and R13 cannot be encoded without an offset. */
9397 len = (regno == BP_REG || regno == R13_REG);
9399 else if (IN_RANGE (offset, -128, 127))
9400 len = 1;
9402 /* ESP and R12 must be encoded with a SIB byte. */
9403 if (regno == SP_REG || regno == R12_REG)
9404 len++;
9406 return len;
9409 /* Return an RTX that points to CFA_OFFSET within the stack frame.
9410 The valid base registers are taken from CFUN->MACHINE->FS. */
9412 static rtx
9413 choose_baseaddr (HOST_WIDE_INT cfa_offset)
9415 const struct machine_function *m = cfun->machine;
9416 rtx base_reg = NULL;
9417 HOST_WIDE_INT base_offset = 0;
9419 if (m->use_fast_prologue_epilogue)
9421 /* Choose the base register most likely to allow the most scheduling
9422 opportunities. Generally FP is valid througout the function,
9423 while DRAP must be reloaded within the epilogue. But choose either
9424 over the SP due to increased encoding size. */
9426 if (m->fs.fp_valid)
9428 base_reg = hard_frame_pointer_rtx;
9429 base_offset = m->fs.fp_offset - cfa_offset;
9431 else if (m->fs.drap_valid)
9433 base_reg = crtl->drap_reg;
9434 base_offset = 0 - cfa_offset;
9436 else if (m->fs.sp_valid)
9438 base_reg = stack_pointer_rtx;
9439 base_offset = m->fs.sp_offset - cfa_offset;
9442 else
9444 HOST_WIDE_INT toffset;
9445 int len = 16, tlen;
9447 /* Choose the base register with the smallest address encoding.
9448 With a tie, choose FP > DRAP > SP. */
9449 if (m->fs.sp_valid)
9451 base_reg = stack_pointer_rtx;
9452 base_offset = m->fs.sp_offset - cfa_offset;
9453 len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
9455 if (m->fs.drap_valid)
9457 toffset = 0 - cfa_offset;
9458 tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
9459 if (tlen <= len)
9461 base_reg = crtl->drap_reg;
9462 base_offset = toffset;
9463 len = tlen;
9466 if (m->fs.fp_valid)
9468 toffset = m->fs.fp_offset - cfa_offset;
9469 tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
9470 if (tlen <= len)
9472 base_reg = hard_frame_pointer_rtx;
9473 base_offset = toffset;
9474 len = tlen;
9478 gcc_assert (base_reg != NULL);
9480 return plus_constant (base_reg, base_offset);
9483 /* Emit code to save registers in the prologue. */
9485 static void
9486 ix86_emit_save_regs (void)
9488 unsigned int regno;
9489 rtx insn;
9491 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
9492 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9494 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
9495 RTX_FRAME_RELATED_P (insn) = 1;
9499 /* Emit a single register save at CFA - CFA_OFFSET. */
9501 static void
9502 ix86_emit_save_reg_using_mov (enum machine_mode mode, unsigned int regno,
9503 HOST_WIDE_INT cfa_offset)
9505 struct machine_function *m = cfun->machine;
9506 rtx reg = gen_rtx_REG (mode, regno);
9507 rtx mem, addr, base, insn;
9509 addr = choose_baseaddr (cfa_offset);
9510 mem = gen_frame_mem (mode, addr);
9512 /* For SSE saves, we need to indicate the 128-bit alignment. */
9513 set_mem_align (mem, GET_MODE_ALIGNMENT (mode));
9515 insn = emit_move_insn (mem, reg);
9516 RTX_FRAME_RELATED_P (insn) = 1;
9518 base = addr;
9519 if (GET_CODE (base) == PLUS)
9520 base = XEXP (base, 0);
9521 gcc_checking_assert (REG_P (base));
9523 /* When saving registers into a re-aligned local stack frame, avoid
9524 any tricky guessing by dwarf2out. */
9525 if (m->fs.realigned)
9527 gcc_checking_assert (stack_realign_drap);
9529 if (regno == REGNO (crtl->drap_reg))
9531 /* A bit of a hack. We force the DRAP register to be saved in
9532 the re-aligned stack frame, which provides us with a copy
9533 of the CFA that will last past the prologue. Install it. */
9534 gcc_checking_assert (cfun->machine->fs.fp_valid);
9535 addr = plus_constant (hard_frame_pointer_rtx,
9536 cfun->machine->fs.fp_offset - cfa_offset);
9537 mem = gen_rtx_MEM (mode, addr);
9538 add_reg_note (insn, REG_CFA_DEF_CFA, mem);
9540 else
9542 /* The frame pointer is a stable reference within the
9543 aligned frame. Use it. */
9544 gcc_checking_assert (cfun->machine->fs.fp_valid);
9545 addr = plus_constant (hard_frame_pointer_rtx,
9546 cfun->machine->fs.fp_offset - cfa_offset);
9547 mem = gen_rtx_MEM (mode, addr);
9548 add_reg_note (insn, REG_CFA_EXPRESSION,
9549 gen_rtx_SET (VOIDmode, mem, reg));
9553 /* The memory may not be relative to the current CFA register,
9554 which means that we may need to generate a new pattern for
9555 use by the unwind info. */
9556 else if (base != m->fs.cfa_reg)
9558 addr = plus_constant (m->fs.cfa_reg, m->fs.cfa_offset - cfa_offset);
9559 mem = gen_rtx_MEM (mode, addr);
9560 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (VOIDmode, mem, reg));
9564 /* Emit code to save registers using MOV insns.
9565 First register is stored at CFA - CFA_OFFSET. */
9566 static void
9567 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
9569 unsigned int regno;
9571 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9572 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9574 ix86_emit_save_reg_using_mov (Pmode, regno, cfa_offset);
9575 cfa_offset -= UNITS_PER_WORD;
9579 /* Emit code to save SSE registers using MOV insns.
9580 First register is stored at CFA - CFA_OFFSET. */
9581 static void
9582 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
9584 unsigned int regno;
9586 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9587 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9589 ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
9590 cfa_offset -= 16;
9594 static GTY(()) rtx queued_cfa_restores;
9596 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
9597 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
9598 Don't add the note if the previously saved value will be left untouched
9599 within stack red-zone till return, as unwinders can find the same value
9600 in the register and on the stack. */
9602 static void
9603 ix86_add_cfa_restore_note (rtx insn, rtx reg, HOST_WIDE_INT cfa_offset)
9605 if (cfa_offset <= cfun->machine->fs.red_zone_offset)
9606 return;
9608 if (insn)
9610 add_reg_note (insn, REG_CFA_RESTORE, reg);
9611 RTX_FRAME_RELATED_P (insn) = 1;
9613 else
9614 queued_cfa_restores
9615 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
9618 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
9620 static void
9621 ix86_add_queued_cfa_restore_notes (rtx insn)
9623 rtx last;
9624 if (!queued_cfa_restores)
9625 return;
9626 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
9628 XEXP (last, 1) = REG_NOTES (insn);
9629 REG_NOTES (insn) = queued_cfa_restores;
9630 queued_cfa_restores = NULL_RTX;
9631 RTX_FRAME_RELATED_P (insn) = 1;
9634 /* Expand prologue or epilogue stack adjustment.
9635 The pattern exist to put a dependency on all ebp-based memory accesses.
9636 STYLE should be negative if instructions should be marked as frame related,
9637 zero if %r11 register is live and cannot be freely used and positive
9638 otherwise. */
9640 static void
9641 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
9642 int style, bool set_cfa)
9644 struct machine_function *m = cfun->machine;
9645 rtx insn;
9646 bool add_frame_related_expr = false;
9648 if (! TARGET_64BIT)
9649 insn = gen_pro_epilogue_adjust_stack_si_add (dest, src, offset);
9650 else if (x86_64_immediate_operand (offset, DImode))
9651 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, offset);
9652 else
9654 rtx tmp;
9655 /* r11 is used by indirect sibcall return as well, set before the
9656 epilogue and used after the epilogue. */
9657 if (style)
9658 tmp = gen_rtx_REG (DImode, R11_REG);
9659 else
9661 gcc_assert (src != hard_frame_pointer_rtx
9662 && dest != hard_frame_pointer_rtx);
9663 tmp = hard_frame_pointer_rtx;
9665 insn = emit_insn (gen_rtx_SET (DImode, tmp, offset));
9666 if (style < 0)
9667 add_frame_related_expr = true;
9669 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, tmp);
9672 insn = emit_insn (insn);
9673 if (style >= 0)
9674 ix86_add_queued_cfa_restore_notes (insn);
9676 if (set_cfa)
9678 rtx r;
9680 gcc_assert (m->fs.cfa_reg == src);
9681 m->fs.cfa_offset += INTVAL (offset);
9682 m->fs.cfa_reg = dest;
9684 r = gen_rtx_PLUS (Pmode, src, offset);
9685 r = gen_rtx_SET (VOIDmode, dest, r);
9686 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
9687 RTX_FRAME_RELATED_P (insn) = 1;
9689 else if (style < 0)
9691 RTX_FRAME_RELATED_P (insn) = 1;
9692 if (add_frame_related_expr)
9694 rtx r = gen_rtx_PLUS (Pmode, src, offset);
9695 r = gen_rtx_SET (VOIDmode, dest, r);
9696 add_reg_note (insn, REG_FRAME_RELATED_EXPR, r);
9700 if (dest == stack_pointer_rtx)
9702 HOST_WIDE_INT ooffset = m->fs.sp_offset;
9703 bool valid = m->fs.sp_valid;
9705 if (src == hard_frame_pointer_rtx)
9707 valid = m->fs.fp_valid;
9708 ooffset = m->fs.fp_offset;
9710 else if (src == crtl->drap_reg)
9712 valid = m->fs.drap_valid;
9713 ooffset = 0;
9715 else
9717 /* Else there are two possibilities: SP itself, which we set
9718 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
9719 taken care of this by hand along the eh_return path. */
9720 gcc_checking_assert (src == stack_pointer_rtx
9721 || offset == const0_rtx);
9724 m->fs.sp_offset = ooffset - INTVAL (offset);
9725 m->fs.sp_valid = valid;
9729 /* Find an available register to be used as dynamic realign argument
9730 pointer regsiter. Such a register will be written in prologue and
9731 used in begin of body, so it must not be
9732 1. parameter passing register.
9733 2. GOT pointer.
9734 We reuse static-chain register if it is available. Otherwise, we
9735 use DI for i386 and R13 for x86-64. We chose R13 since it has
9736 shorter encoding.
9738 Return: the regno of chosen register. */
9740 static unsigned int
9741 find_drap_reg (void)
9743 tree decl = cfun->decl;
9745 if (TARGET_64BIT)
9747 /* Use R13 for nested function or function need static chain.
9748 Since function with tail call may use any caller-saved
9749 registers in epilogue, DRAP must not use caller-saved
9750 register in such case. */
9751 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
9752 return R13_REG;
9754 return R10_REG;
9756 else
9758 /* Use DI for nested function or function need static chain.
9759 Since function with tail call may use any caller-saved
9760 registers in epilogue, DRAP must not use caller-saved
9761 register in such case. */
9762 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
9763 return DI_REG;
9765 /* Reuse static chain register if it isn't used for parameter
9766 passing. */
9767 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2
9768 && !lookup_attribute ("fastcall",
9769 TYPE_ATTRIBUTES (TREE_TYPE (decl)))
9770 && !lookup_attribute ("thiscall",
9771 TYPE_ATTRIBUTES (TREE_TYPE (decl))))
9772 return CX_REG;
9773 else
9774 return DI_REG;
9778 /* Return minimum incoming stack alignment. */
9780 static unsigned int
9781 ix86_minimum_incoming_stack_boundary (bool sibcall)
9783 unsigned int incoming_stack_boundary;
9785 /* Prefer the one specified at command line. */
9786 if (ix86_user_incoming_stack_boundary)
9787 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
9788 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
9789 if -mstackrealign is used, it isn't used for sibcall check and
9790 estimated stack alignment is 128bit. */
9791 else if (!sibcall
9792 && !TARGET_64BIT
9793 && ix86_force_align_arg_pointer
9794 && crtl->stack_alignment_estimated == 128)
9795 incoming_stack_boundary = MIN_STACK_BOUNDARY;
9796 else
9797 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
9799 /* Incoming stack alignment can be changed on individual functions
9800 via force_align_arg_pointer attribute. We use the smallest
9801 incoming stack boundary. */
9802 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
9803 && lookup_attribute (ix86_force_align_arg_pointer_string,
9804 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
9805 incoming_stack_boundary = MIN_STACK_BOUNDARY;
9807 /* The incoming stack frame has to be aligned at least at
9808 parm_stack_boundary. */
9809 if (incoming_stack_boundary < crtl->parm_stack_boundary)
9810 incoming_stack_boundary = crtl->parm_stack_boundary;
9812 /* Stack at entrance of main is aligned by runtime. We use the
9813 smallest incoming stack boundary. */
9814 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
9815 && DECL_NAME (current_function_decl)
9816 && MAIN_NAME_P (DECL_NAME (current_function_decl))
9817 && DECL_FILE_SCOPE_P (current_function_decl))
9818 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
9820 return incoming_stack_boundary;
9823 /* Update incoming stack boundary and estimated stack alignment. */
9825 static void
9826 ix86_update_stack_boundary (void)
9828 ix86_incoming_stack_boundary
9829 = ix86_minimum_incoming_stack_boundary (false);
9831 /* x86_64 vararg needs 16byte stack alignment for register save
9832 area. */
9833 if (TARGET_64BIT
9834 && cfun->stdarg
9835 && crtl->stack_alignment_estimated < 128)
9836 crtl->stack_alignment_estimated = 128;
9839 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
9840 needed or an rtx for DRAP otherwise. */
9842 static rtx
9843 ix86_get_drap_rtx (void)
9845 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
9846 crtl->need_drap = true;
9848 if (stack_realign_drap)
9850 /* Assign DRAP to vDRAP and returns vDRAP */
9851 unsigned int regno = find_drap_reg ();
9852 rtx drap_vreg;
9853 rtx arg_ptr;
9854 rtx seq, insn;
9856 arg_ptr = gen_rtx_REG (Pmode, regno);
9857 crtl->drap_reg = arg_ptr;
9859 start_sequence ();
9860 drap_vreg = copy_to_reg (arg_ptr);
9861 seq = get_insns ();
9862 end_sequence ();
9864 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
9865 if (!optimize)
9867 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
9868 RTX_FRAME_RELATED_P (insn) = 1;
9870 return drap_vreg;
9872 else
9873 return NULL;
9876 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
9878 static rtx
9879 ix86_internal_arg_pointer (void)
9881 return virtual_incoming_args_rtx;
9884 struct scratch_reg {
9885 rtx reg;
9886 bool saved;
9889 /* Return a short-lived scratch register for use on function entry.
9890 In 32-bit mode, it is valid only after the registers are saved
9891 in the prologue. This register must be released by means of
9892 release_scratch_register_on_entry once it is dead. */
9894 static void
9895 get_scratch_register_on_entry (struct scratch_reg *sr)
9897 int regno;
9899 sr->saved = false;
9901 if (TARGET_64BIT)
9903 /* We always use R11 in 64-bit mode. */
9904 regno = R11_REG;
9906 else
9908 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
9909 bool fastcall_p
9910 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
9911 bool static_chain_p = DECL_STATIC_CHAIN (decl);
9912 int regparm = ix86_function_regparm (fntype, decl);
9913 int drap_regno
9914 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
9916 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
9917 for the static chain register. */
9918 if ((regparm < 1 || (fastcall_p && !static_chain_p))
9919 && drap_regno != AX_REG)
9920 regno = AX_REG;
9921 else if (regparm < 2 && drap_regno != DX_REG)
9922 regno = DX_REG;
9923 /* ecx is the static chain register. */
9924 else if (regparm < 3 && !fastcall_p && !static_chain_p
9925 && drap_regno != CX_REG)
9926 regno = CX_REG;
9927 else if (ix86_save_reg (BX_REG, true))
9928 regno = BX_REG;
9929 /* esi is the static chain register. */
9930 else if (!(regparm == 3 && static_chain_p)
9931 && ix86_save_reg (SI_REG, true))
9932 regno = SI_REG;
9933 else if (ix86_save_reg (DI_REG, true))
9934 regno = DI_REG;
9935 else
9937 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
9938 sr->saved = true;
9942 sr->reg = gen_rtx_REG (Pmode, regno);
9943 if (sr->saved)
9945 rtx insn = emit_insn (gen_push (sr->reg));
9946 RTX_FRAME_RELATED_P (insn) = 1;
9950 /* Release a scratch register obtained from the preceding function. */
9952 static void
9953 release_scratch_register_on_entry (struct scratch_reg *sr)
9955 if (sr->saved)
9957 rtx x, insn = emit_insn (gen_pop (sr->reg));
9959 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
9960 RTX_FRAME_RELATED_P (insn) = 1;
9961 x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
9962 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
9963 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
9967 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
9969 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
9971 static void
9972 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size)
9974 /* We skip the probe for the first interval + a small dope of 4 words and
9975 probe that many bytes past the specified size to maintain a protection
9976 area at the botton of the stack. */
9977 const int dope = 4 * UNITS_PER_WORD;
9978 rtx size_rtx = GEN_INT (size);
9980 /* See if we have a constant small number of probes to generate. If so,
9981 that's the easy case. The run-time loop is made up of 11 insns in the
9982 generic case while the compile-time loop is made up of 3+2*(n-1) insns
9983 for n # of intervals. */
9984 if (size <= 5 * PROBE_INTERVAL)
9986 HOST_WIDE_INT i, adjust;
9987 bool first_probe = true;
9989 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
9990 values of N from 1 until it exceeds SIZE. If only one probe is
9991 needed, this will not generate any code. Then adjust and probe
9992 to PROBE_INTERVAL + SIZE. */
9993 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
9995 if (first_probe)
9997 adjust = 2 * PROBE_INTERVAL + dope;
9998 first_probe = false;
10000 else
10001 adjust = PROBE_INTERVAL;
10003 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10004 plus_constant (stack_pointer_rtx, -adjust)));
10005 emit_stack_probe (stack_pointer_rtx);
10008 if (first_probe)
10009 adjust = size + PROBE_INTERVAL + dope;
10010 else
10011 adjust = size + PROBE_INTERVAL - i;
10013 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10014 plus_constant (stack_pointer_rtx, -adjust)));
10015 emit_stack_probe (stack_pointer_rtx);
10017 /* Adjust back to account for the additional first interval. */
10018 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10019 plus_constant (stack_pointer_rtx,
10020 PROBE_INTERVAL + dope)));
10023 /* Otherwise, do the same as above, but in a loop. Note that we must be
10024 extra careful with variables wrapping around because we might be at
10025 the very top (or the very bottom) of the address space and we have
10026 to be able to handle this case properly; in particular, we use an
10027 equality test for the loop condition. */
10028 else
10030 HOST_WIDE_INT rounded_size;
10031 struct scratch_reg sr;
10033 get_scratch_register_on_entry (&sr);
10036 /* Step 1: round SIZE to the previous multiple of the interval. */
10038 rounded_size = size & -PROBE_INTERVAL;
10041 /* Step 2: compute initial and final value of the loop counter. */
10043 /* SP = SP_0 + PROBE_INTERVAL. */
10044 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10045 plus_constant (stack_pointer_rtx,
10046 - (PROBE_INTERVAL + dope))));
10048 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
10049 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
10050 emit_insn (gen_rtx_SET (VOIDmode, sr.reg,
10051 gen_rtx_PLUS (Pmode, sr.reg,
10052 stack_pointer_rtx)));
10055 /* Step 3: the loop
10057 while (SP != LAST_ADDR)
10059 SP = SP + PROBE_INTERVAL
10060 probe at SP
10063 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
10064 values of N from 1 until it is equal to ROUNDED_SIZE. */
10066 emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx));
10069 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
10070 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
10072 if (size != rounded_size)
10074 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10075 plus_constant (stack_pointer_rtx,
10076 rounded_size - size)));
10077 emit_stack_probe (stack_pointer_rtx);
10080 /* Adjust back to account for the additional first interval. */
10081 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10082 plus_constant (stack_pointer_rtx,
10083 PROBE_INTERVAL + dope)));
10085 release_scratch_register_on_entry (&sr);
10088 gcc_assert (cfun->machine->fs.cfa_reg != stack_pointer_rtx);
10089 cfun->machine->fs.sp_offset += size;
10091 /* Make sure nothing is scheduled before we are done. */
10092 emit_insn (gen_blockage ());
10095 /* Adjust the stack pointer up to REG while probing it. */
10097 const char *
10098 output_adjust_stack_and_probe (rtx reg)
10100 static int labelno = 0;
10101 char loop_lab[32], end_lab[32];
10102 rtx xops[2];
10104 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
10105 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
10107 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
10109 /* Jump to END_LAB if SP == LAST_ADDR. */
10110 xops[0] = stack_pointer_rtx;
10111 xops[1] = reg;
10112 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
10113 fputs ("\tje\t", asm_out_file);
10114 assemble_name_raw (asm_out_file, end_lab);
10115 fputc ('\n', asm_out_file);
10117 /* SP = SP + PROBE_INTERVAL. */
10118 xops[1] = GEN_INT (PROBE_INTERVAL);
10119 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
10121 /* Probe at SP. */
10122 xops[1] = const0_rtx;
10123 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
10125 fprintf (asm_out_file, "\tjmp\t");
10126 assemble_name_raw (asm_out_file, loop_lab);
10127 fputc ('\n', asm_out_file);
10129 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
10131 return "";
10134 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
10135 inclusive. These are offsets from the current stack pointer. */
10137 static void
10138 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
10140 /* See if we have a constant small number of probes to generate. If so,
10141 that's the easy case. The run-time loop is made up of 7 insns in the
10142 generic case while the compile-time loop is made up of n insns for n #
10143 of intervals. */
10144 if (size <= 7 * PROBE_INTERVAL)
10146 HOST_WIDE_INT i;
10148 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
10149 it exceeds SIZE. If only one probe is needed, this will not
10150 generate any code. Then probe at FIRST + SIZE. */
10151 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
10152 emit_stack_probe (plus_constant (stack_pointer_rtx, -(first + i)));
10154 emit_stack_probe (plus_constant (stack_pointer_rtx, -(first + size)));
10157 /* Otherwise, do the same as above, but in a loop. Note that we must be
10158 extra careful with variables wrapping around because we might be at
10159 the very top (or the very bottom) of the address space and we have
10160 to be able to handle this case properly; in particular, we use an
10161 equality test for the loop condition. */
10162 else
10164 HOST_WIDE_INT rounded_size, last;
10165 struct scratch_reg sr;
10167 get_scratch_register_on_entry (&sr);
10170 /* Step 1: round SIZE to the previous multiple of the interval. */
10172 rounded_size = size & -PROBE_INTERVAL;
10175 /* Step 2: compute initial and final value of the loop counter. */
10177 /* TEST_OFFSET = FIRST. */
10178 emit_move_insn (sr.reg, GEN_INT (-first));
10180 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
10181 last = first + rounded_size;
10184 /* Step 3: the loop
10186 while (TEST_ADDR != LAST_ADDR)
10188 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
10189 probe at TEST_ADDR
10192 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
10193 until it is equal to ROUNDED_SIZE. */
10195 emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last)));
10198 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
10199 that SIZE is equal to ROUNDED_SIZE. */
10201 if (size != rounded_size)
10202 emit_stack_probe (plus_constant (gen_rtx_PLUS (Pmode,
10203 stack_pointer_rtx,
10204 sr.reg),
10205 rounded_size - size));
10207 release_scratch_register_on_entry (&sr);
10210 /* Make sure nothing is scheduled before we are done. */
10211 emit_insn (gen_blockage ());
10214 /* Probe a range of stack addresses from REG to END, inclusive. These are
10215 offsets from the current stack pointer. */
10217 const char *
10218 output_probe_stack_range (rtx reg, rtx end)
10220 static int labelno = 0;
10221 char loop_lab[32], end_lab[32];
10222 rtx xops[3];
10224 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
10225 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
10227 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
10229 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
10230 xops[0] = reg;
10231 xops[1] = end;
10232 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
10233 fputs ("\tje\t", asm_out_file);
10234 assemble_name_raw (asm_out_file, end_lab);
10235 fputc ('\n', asm_out_file);
10237 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
10238 xops[1] = GEN_INT (PROBE_INTERVAL);
10239 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
10241 /* Probe at TEST_ADDR. */
10242 xops[0] = stack_pointer_rtx;
10243 xops[1] = reg;
10244 xops[2] = const0_rtx;
10245 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
10247 fprintf (asm_out_file, "\tjmp\t");
10248 assemble_name_raw (asm_out_file, loop_lab);
10249 fputc ('\n', asm_out_file);
10251 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
10253 return "";
10256 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
10257 to be generated in correct form. */
10258 static void
10259 ix86_finalize_stack_realign_flags (void)
10261 /* Check if stack realign is really needed after reload, and
10262 stores result in cfun */
10263 unsigned int incoming_stack_boundary
10264 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
10265 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
10266 unsigned int stack_realign = (incoming_stack_boundary
10267 < (current_function_is_leaf
10268 ? crtl->max_used_stack_slot_alignment
10269 : crtl->stack_alignment_needed));
10271 if (crtl->stack_realign_finalized)
10273 /* After stack_realign_needed is finalized, we can't no longer
10274 change it. */
10275 gcc_assert (crtl->stack_realign_needed == stack_realign);
10277 else
10279 crtl->stack_realign_needed = stack_realign;
10280 crtl->stack_realign_finalized = true;
10284 /* Expand the prologue into a bunch of separate insns. */
10286 void
10287 ix86_expand_prologue (void)
10289 struct machine_function *m = cfun->machine;
10290 rtx insn, t;
10291 bool pic_reg_used;
10292 struct ix86_frame frame;
10293 HOST_WIDE_INT allocate;
10294 bool int_registers_saved;
10296 ix86_finalize_stack_realign_flags ();
10298 /* DRAP should not coexist with stack_realign_fp */
10299 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
10301 memset (&m->fs, 0, sizeof (m->fs));
10303 /* Initialize CFA state for before the prologue. */
10304 m->fs.cfa_reg = stack_pointer_rtx;
10305 m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
10307 /* Track SP offset to the CFA. We continue tracking this after we've
10308 swapped the CFA register away from SP. In the case of re-alignment
10309 this is fudged; we're interested to offsets within the local frame. */
10310 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
10311 m->fs.sp_valid = true;
10313 ix86_compute_frame_layout (&frame);
10315 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
10317 /* We should have already generated an error for any use of
10318 ms_hook on a nested function. */
10319 gcc_checking_assert (!ix86_static_chain_on_stack);
10321 /* Check if profiling is active and we shall use profiling before
10322 prologue variant. If so sorry. */
10323 if (crtl->profile && flag_fentry != 0)
10324 sorry ("ms_hook_prologue attribute isn%'t compatible "
10325 "with -mfentry for 32-bit");
10327 /* In ix86_asm_output_function_label we emitted:
10328 8b ff movl.s %edi,%edi
10329 55 push %ebp
10330 8b ec movl.s %esp,%ebp
10332 This matches the hookable function prologue in Win32 API
10333 functions in Microsoft Windows XP Service Pack 2 and newer.
10334 Wine uses this to enable Windows apps to hook the Win32 API
10335 functions provided by Wine.
10337 What that means is that we've already set up the frame pointer. */
10339 if (frame_pointer_needed
10340 && !(crtl->drap_reg && crtl->stack_realign_needed))
10342 rtx push, mov;
10344 /* We've decided to use the frame pointer already set up.
10345 Describe this to the unwinder by pretending that both
10346 push and mov insns happen right here.
10348 Putting the unwind info here at the end of the ms_hook
10349 is done so that we can make absolutely certain we get
10350 the required byte sequence at the start of the function,
10351 rather than relying on an assembler that can produce
10352 the exact encoding required.
10354 However it does mean (in the unpatched case) that we have
10355 a 1 insn window where the asynchronous unwind info is
10356 incorrect. However, if we placed the unwind info at
10357 its correct location we would have incorrect unwind info
10358 in the patched case. Which is probably all moot since
10359 I don't expect Wine generates dwarf2 unwind info for the
10360 system libraries that use this feature. */
10362 insn = emit_insn (gen_blockage ());
10364 push = gen_push (hard_frame_pointer_rtx);
10365 mov = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
10366 stack_pointer_rtx);
10367 RTX_FRAME_RELATED_P (push) = 1;
10368 RTX_FRAME_RELATED_P (mov) = 1;
10370 RTX_FRAME_RELATED_P (insn) = 1;
10371 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
10372 gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
10374 /* Note that gen_push incremented m->fs.cfa_offset, even
10375 though we didn't emit the push insn here. */
10376 m->fs.cfa_reg = hard_frame_pointer_rtx;
10377 m->fs.fp_offset = m->fs.cfa_offset;
10378 m->fs.fp_valid = true;
10380 else
10382 /* The frame pointer is not needed so pop %ebp again.
10383 This leaves us with a pristine state. */
10384 emit_insn (gen_pop (hard_frame_pointer_rtx));
10388 /* The first insn of a function that accepts its static chain on the
10389 stack is to push the register that would be filled in by a direct
10390 call. This insn will be skipped by the trampoline. */
10391 else if (ix86_static_chain_on_stack)
10393 insn = emit_insn (gen_push (ix86_static_chain (cfun->decl, false)));
10394 emit_insn (gen_blockage ());
10396 /* We don't want to interpret this push insn as a register save,
10397 only as a stack adjustment. The real copy of the register as
10398 a save will be done later, if needed. */
10399 t = plus_constant (stack_pointer_rtx, -UNITS_PER_WORD);
10400 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
10401 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
10402 RTX_FRAME_RELATED_P (insn) = 1;
10405 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
10406 of DRAP is needed and stack realignment is really needed after reload */
10407 if (stack_realign_drap)
10409 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
10411 /* Only need to push parameter pointer reg if it is caller saved. */
10412 if (!call_used_regs[REGNO (crtl->drap_reg)])
10414 /* Push arg pointer reg */
10415 insn = emit_insn (gen_push (crtl->drap_reg));
10416 RTX_FRAME_RELATED_P (insn) = 1;
10419 /* Grab the argument pointer. */
10420 t = plus_constant (stack_pointer_rtx, m->fs.sp_offset);
10421 insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
10422 RTX_FRAME_RELATED_P (insn) = 1;
10423 m->fs.cfa_reg = crtl->drap_reg;
10424 m->fs.cfa_offset = 0;
10426 /* Align the stack. */
10427 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
10428 stack_pointer_rtx,
10429 GEN_INT (-align_bytes)));
10430 RTX_FRAME_RELATED_P (insn) = 1;
10432 /* Replicate the return address on the stack so that return
10433 address can be reached via (argp - 1) slot. This is needed
10434 to implement macro RETURN_ADDR_RTX and intrinsic function
10435 expand_builtin_return_addr etc. */
10436 t = plus_constant (crtl->drap_reg, -UNITS_PER_WORD);
10437 t = gen_frame_mem (Pmode, t);
10438 insn = emit_insn (gen_push (t));
10439 RTX_FRAME_RELATED_P (insn) = 1;
10441 /* For the purposes of frame and register save area addressing,
10442 we've started over with a new frame. */
10443 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
10444 m->fs.realigned = true;
10447 if (frame_pointer_needed && !m->fs.fp_valid)
10449 /* Note: AT&T enter does NOT have reversed args. Enter is probably
10450 slower on all targets. Also sdb doesn't like it. */
10451 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
10452 RTX_FRAME_RELATED_P (insn) = 1;
10454 if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
10456 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
10457 RTX_FRAME_RELATED_P (insn) = 1;
10459 if (m->fs.cfa_reg == stack_pointer_rtx)
10460 m->fs.cfa_reg = hard_frame_pointer_rtx;
10461 m->fs.fp_offset = m->fs.sp_offset;
10462 m->fs.fp_valid = true;
10466 int_registers_saved = (frame.nregs == 0);
10468 if (!int_registers_saved)
10470 /* If saving registers via PUSH, do so now. */
10471 if (!frame.save_regs_using_mov)
10473 ix86_emit_save_regs ();
10474 int_registers_saved = true;
10475 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
10478 /* When using red zone we may start register saving before allocating
10479 the stack frame saving one cycle of the prologue. However, avoid
10480 doing this if we have to probe the stack; at least on x86_64 the
10481 stack probe can turn into a call that clobbers a red zone location. */
10482 else if (ix86_using_red_zone ()
10483 && (! TARGET_STACK_PROBE
10484 || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
10486 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
10487 int_registers_saved = true;
10491 if (stack_realign_fp)
10493 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
10494 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
10496 /* The computation of the size of the re-aligned stack frame means
10497 that we must allocate the size of the register save area before
10498 performing the actual alignment. Otherwise we cannot guarantee
10499 that there's enough storage above the realignment point. */
10500 if (m->fs.sp_offset != frame.sse_reg_save_offset)
10501 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10502 GEN_INT (m->fs.sp_offset
10503 - frame.sse_reg_save_offset),
10504 -1, false);
10506 /* Align the stack. */
10507 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
10508 stack_pointer_rtx,
10509 GEN_INT (-align_bytes)));
10511 /* For the purposes of register save area addressing, the stack
10512 pointer is no longer valid. As for the value of sp_offset,
10513 see ix86_compute_frame_layout, which we need to match in order
10514 to pass verification of stack_pointer_offset at the end. */
10515 m->fs.sp_offset = (m->fs.sp_offset + align_bytes) & -align_bytes;
10516 m->fs.sp_valid = false;
10519 allocate = frame.stack_pointer_offset - m->fs.sp_offset;
10521 if (flag_stack_usage)
10523 /* We start to count from ARG_POINTER. */
10524 HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
10526 /* If it was realigned, take into account the fake frame. */
10527 if (stack_realign_drap)
10529 if (ix86_static_chain_on_stack)
10530 stack_size += UNITS_PER_WORD;
10532 if (!call_used_regs[REGNO (crtl->drap_reg)])
10533 stack_size += UNITS_PER_WORD;
10535 /* This over-estimates by 1 minimal-stack-alignment-unit but
10536 mitigates that by counting in the new return address slot. */
10537 current_function_dynamic_stack_size
10538 += crtl->stack_alignment_needed / BITS_PER_UNIT;
10541 current_function_static_stack_size = stack_size;
10544 /* The stack has already been decremented by the instruction calling us
10545 so we need to probe unconditionally to preserve the protection area. */
10546 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
10548 /* We expect the registers to be saved when probes are used. */
10549 gcc_assert (int_registers_saved);
10551 if (STACK_CHECK_MOVING_SP)
10553 ix86_adjust_stack_and_probe (allocate);
10554 allocate = 0;
10556 else
10558 HOST_WIDE_INT size = allocate;
10560 if (TARGET_64BIT && size >= (HOST_WIDE_INT) 0x80000000)
10561 size = 0x80000000 - STACK_CHECK_PROTECT - 1;
10563 if (TARGET_STACK_PROBE)
10564 ix86_emit_probe_stack_range (0, size + STACK_CHECK_PROTECT);
10565 else
10566 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
10570 if (allocate == 0)
10572 else if (!ix86_target_stack_probe ()
10573 || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
10575 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10576 GEN_INT (-allocate), -1,
10577 m->fs.cfa_reg == stack_pointer_rtx);
10579 else
10581 rtx eax = gen_rtx_REG (Pmode, AX_REG);
10582 rtx r10 = NULL;
10583 rtx (*adjust_stack_insn)(rtx, rtx, rtx);
10585 bool eax_live = false;
10586 bool r10_live = false;
10588 if (TARGET_64BIT)
10589 r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
10590 if (!TARGET_64BIT_MS_ABI)
10591 eax_live = ix86_eax_live_at_start_p ();
10593 if (eax_live)
10595 emit_insn (gen_push (eax));
10596 allocate -= UNITS_PER_WORD;
10598 if (r10_live)
10600 r10 = gen_rtx_REG (Pmode, R10_REG);
10601 emit_insn (gen_push (r10));
10602 allocate -= UNITS_PER_WORD;
10605 emit_move_insn (eax, GEN_INT (allocate));
10606 emit_insn (ix86_gen_allocate_stack_worker (eax, eax));
10608 /* Use the fact that AX still contains ALLOCATE. */
10609 adjust_stack_insn = (TARGET_64BIT
10610 ? gen_pro_epilogue_adjust_stack_di_sub
10611 : gen_pro_epilogue_adjust_stack_si_sub);
10613 insn = emit_insn (adjust_stack_insn (stack_pointer_rtx,
10614 stack_pointer_rtx, eax));
10616 /* Note that SEH directives need to continue tracking the stack
10617 pointer even after the frame pointer has been set up. */
10618 if (m->fs.cfa_reg == stack_pointer_rtx || TARGET_SEH)
10620 if (m->fs.cfa_reg == stack_pointer_rtx)
10621 m->fs.cfa_offset += allocate;
10623 RTX_FRAME_RELATED_P (insn) = 1;
10624 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
10625 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10626 plus_constant (stack_pointer_rtx,
10627 -allocate)));
10629 m->fs.sp_offset += allocate;
10631 if (r10_live && eax_live)
10633 t = choose_baseaddr (m->fs.sp_offset - allocate);
10634 emit_move_insn (r10, gen_frame_mem (Pmode, t));
10635 t = choose_baseaddr (m->fs.sp_offset - allocate - UNITS_PER_WORD);
10636 emit_move_insn (eax, gen_frame_mem (Pmode, t));
10638 else if (eax_live || r10_live)
10640 t = choose_baseaddr (m->fs.sp_offset - allocate);
10641 emit_move_insn ((eax_live ? eax : r10), gen_frame_mem (Pmode, t));
10644 gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
10646 /* If we havn't already set up the frame pointer, do so now. */
10647 if (frame_pointer_needed && !m->fs.fp_valid)
10649 insn = ix86_gen_add3 (hard_frame_pointer_rtx, stack_pointer_rtx,
10650 GEN_INT (frame.stack_pointer_offset
10651 - frame.hard_frame_pointer_offset));
10652 insn = emit_insn (insn);
10653 RTX_FRAME_RELATED_P (insn) = 1;
10654 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
10656 if (m->fs.cfa_reg == stack_pointer_rtx)
10657 m->fs.cfa_reg = hard_frame_pointer_rtx;
10658 m->fs.fp_offset = frame.hard_frame_pointer_offset;
10659 m->fs.fp_valid = true;
10662 if (!int_registers_saved)
10663 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
10664 if (frame.nsseregs)
10665 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
10667 pic_reg_used = false;
10668 if (pic_offset_table_rtx
10669 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
10670 || crtl->profile))
10672 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
10674 if (alt_pic_reg_used != INVALID_REGNUM)
10675 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
10677 pic_reg_used = true;
10680 if (pic_reg_used)
10682 if (TARGET_64BIT)
10684 if (ix86_cmodel == CM_LARGE_PIC)
10686 rtx tmp_reg = gen_rtx_REG (DImode, R11_REG);
10687 rtx label = gen_label_rtx ();
10688 emit_label (label);
10689 LABEL_PRESERVE_P (label) = 1;
10690 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
10691 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
10692 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
10693 insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
10694 pic_offset_table_rtx, tmp_reg));
10696 else
10697 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
10699 else
10700 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
10703 /* In the pic_reg_used case, make sure that the got load isn't deleted
10704 when mcount needs it. Blockage to avoid call movement across mcount
10705 call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
10706 note. */
10707 if (crtl->profile && !flag_fentry && pic_reg_used)
10708 emit_insn (gen_prologue_use (pic_offset_table_rtx));
10710 if (crtl->drap_reg && !crtl->stack_realign_needed)
10712 /* vDRAP is setup but after reload it turns out stack realign
10713 isn't necessary, here we will emit prologue to setup DRAP
10714 without stack realign adjustment */
10715 t = choose_baseaddr (0);
10716 emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
10719 /* Prevent instructions from being scheduled into register save push
10720 sequence when access to the redzone area is done through frame pointer.
10721 The offset between the frame pointer and the stack pointer is calculated
10722 relative to the value of the stack pointer at the end of the function
10723 prologue, and moving instructions that access redzone area via frame
10724 pointer inside push sequence violates this assumption. */
10725 if (frame_pointer_needed && frame.red_zone_size)
10726 emit_insn (gen_memory_blockage ());
10728 /* Emit cld instruction if stringops are used in the function. */
10729 if (TARGET_CLD && ix86_current_function_needs_cld)
10730 emit_insn (gen_cld ());
10732 /* SEH requires that the prologue end within 256 bytes of the start of
10733 the function. Prevent instruction schedules that would extend that. */
10734 if (TARGET_SEH)
10735 emit_insn (gen_blockage ());
10738 /* Emit code to restore REG using a POP insn. */
10740 static void
10741 ix86_emit_restore_reg_using_pop (rtx reg)
10743 struct machine_function *m = cfun->machine;
10744 rtx insn = emit_insn (gen_pop (reg));
10746 ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
10747 m->fs.sp_offset -= UNITS_PER_WORD;
10749 if (m->fs.cfa_reg == crtl->drap_reg
10750 && REGNO (reg) == REGNO (crtl->drap_reg))
10752 /* Previously we'd represented the CFA as an expression
10753 like *(%ebp - 8). We've just popped that value from
10754 the stack, which means we need to reset the CFA to
10755 the drap register. This will remain until we restore
10756 the stack pointer. */
10757 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
10758 RTX_FRAME_RELATED_P (insn) = 1;
10760 /* This means that the DRAP register is valid for addressing too. */
10761 m->fs.drap_valid = true;
10762 return;
10765 if (m->fs.cfa_reg == stack_pointer_rtx)
10767 rtx x = plus_constant (stack_pointer_rtx, UNITS_PER_WORD);
10768 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
10769 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
10770 RTX_FRAME_RELATED_P (insn) = 1;
10772 m->fs.cfa_offset -= UNITS_PER_WORD;
10775 /* When the frame pointer is the CFA, and we pop it, we are
10776 swapping back to the stack pointer as the CFA. This happens
10777 for stack frames that don't allocate other data, so we assume
10778 the stack pointer is now pointing at the return address, i.e.
10779 the function entry state, which makes the offset be 1 word. */
10780 if (reg == hard_frame_pointer_rtx)
10782 m->fs.fp_valid = false;
10783 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
10785 m->fs.cfa_reg = stack_pointer_rtx;
10786 m->fs.cfa_offset -= UNITS_PER_WORD;
10788 add_reg_note (insn, REG_CFA_DEF_CFA,
10789 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
10790 GEN_INT (m->fs.cfa_offset)));
10791 RTX_FRAME_RELATED_P (insn) = 1;
10796 /* Emit code to restore saved registers using POP insns. */
10798 static void
10799 ix86_emit_restore_regs_using_pop (void)
10801 unsigned int regno;
10803 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10804 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
10805 ix86_emit_restore_reg_using_pop (gen_rtx_REG (Pmode, regno));
10808 /* Emit code and notes for the LEAVE instruction. */
10810 static void
10811 ix86_emit_leave (void)
10813 struct machine_function *m = cfun->machine;
10814 rtx insn = emit_insn (ix86_gen_leave ());
10816 ix86_add_queued_cfa_restore_notes (insn);
10818 gcc_assert (m->fs.fp_valid);
10819 m->fs.sp_valid = true;
10820 m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
10821 m->fs.fp_valid = false;
10823 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
10825 m->fs.cfa_reg = stack_pointer_rtx;
10826 m->fs.cfa_offset = m->fs.sp_offset;
10828 add_reg_note (insn, REG_CFA_DEF_CFA,
10829 plus_constant (stack_pointer_rtx, m->fs.sp_offset));
10830 RTX_FRAME_RELATED_P (insn) = 1;
10831 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
10832 m->fs.fp_offset);
10836 /* Emit code to restore saved registers using MOV insns.
10837 First register is restored from CFA - CFA_OFFSET. */
10838 static void
10839 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
10840 int maybe_eh_return)
10842 struct machine_function *m = cfun->machine;
10843 unsigned int regno;
10845 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10846 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
10848 rtx reg = gen_rtx_REG (Pmode, regno);
10849 rtx insn, mem;
10851 mem = choose_baseaddr (cfa_offset);
10852 mem = gen_frame_mem (Pmode, mem);
10853 insn = emit_move_insn (reg, mem);
10855 if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
10857 /* Previously we'd represented the CFA as an expression
10858 like *(%ebp - 8). We've just popped that value from
10859 the stack, which means we need to reset the CFA to
10860 the drap register. This will remain until we restore
10861 the stack pointer. */
10862 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
10863 RTX_FRAME_RELATED_P (insn) = 1;
10865 /* This means that the DRAP register is valid for addressing. */
10866 m->fs.drap_valid = true;
10868 else
10869 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
10871 cfa_offset -= UNITS_PER_WORD;
10875 /* Emit code to restore saved registers using MOV insns.
10876 First register is restored from CFA - CFA_OFFSET. */
10877 static void
10878 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
10879 int maybe_eh_return)
10881 unsigned int regno;
10883 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10884 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
10886 rtx reg = gen_rtx_REG (V4SFmode, regno);
10887 rtx mem;
10889 mem = choose_baseaddr (cfa_offset);
10890 mem = gen_rtx_MEM (V4SFmode, mem);
10891 set_mem_align (mem, 128);
10892 emit_move_insn (reg, mem);
10894 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
10896 cfa_offset -= 16;
10900 /* Restore function stack, frame, and registers. */
10902 void
10903 ix86_expand_epilogue (int style)
10905 struct machine_function *m = cfun->machine;
10906 struct machine_frame_state frame_state_save = m->fs;
10907 struct ix86_frame frame;
10908 bool restore_regs_via_mov;
10909 bool using_drap;
10911 ix86_finalize_stack_realign_flags ();
10912 ix86_compute_frame_layout (&frame);
10914 m->fs.sp_valid = (!frame_pointer_needed
10915 || (current_function_sp_is_unchanging
10916 && !stack_realign_fp));
10917 gcc_assert (!m->fs.sp_valid
10918 || m->fs.sp_offset == frame.stack_pointer_offset);
10920 /* The FP must be valid if the frame pointer is present. */
10921 gcc_assert (frame_pointer_needed == m->fs.fp_valid);
10922 gcc_assert (!m->fs.fp_valid
10923 || m->fs.fp_offset == frame.hard_frame_pointer_offset);
10925 /* We must have *some* valid pointer to the stack frame. */
10926 gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
10928 /* The DRAP is never valid at this point. */
10929 gcc_assert (!m->fs.drap_valid);
10931 /* See the comment about red zone and frame
10932 pointer usage in ix86_expand_prologue. */
10933 if (frame_pointer_needed && frame.red_zone_size)
10934 emit_insn (gen_memory_blockage ());
10936 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
10937 gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
10939 /* Determine the CFA offset of the end of the red-zone. */
10940 m->fs.red_zone_offset = 0;
10941 if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
10943 /* The red-zone begins below the return address. */
10944 m->fs.red_zone_offset = RED_ZONE_SIZE + UNITS_PER_WORD;
10946 /* When the register save area is in the aligned portion of
10947 the stack, determine the maximum runtime displacement that
10948 matches up with the aligned frame. */
10949 if (stack_realign_drap)
10950 m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
10951 + UNITS_PER_WORD);
10954 /* Special care must be taken for the normal return case of a function
10955 using eh_return: the eax and edx registers are marked as saved, but
10956 not restored along this path. Adjust the save location to match. */
10957 if (crtl->calls_eh_return && style != 2)
10958 frame.reg_save_offset -= 2 * UNITS_PER_WORD;
10960 /* EH_RETURN requires the use of moves to function properly. */
10961 if (crtl->calls_eh_return)
10962 restore_regs_via_mov = true;
10963 /* SEH requires the use of pops to identify the epilogue. */
10964 else if (TARGET_SEH)
10965 restore_regs_via_mov = false;
10966 /* If we're only restoring one register and sp is not valid then
10967 using a move instruction to restore the register since it's
10968 less work than reloading sp and popping the register. */
10969 else if (!m->fs.sp_valid && frame.nregs <= 1)
10970 restore_regs_via_mov = true;
10971 else if (TARGET_EPILOGUE_USING_MOVE
10972 && cfun->machine->use_fast_prologue_epilogue
10973 && (frame.nregs > 1
10974 || m->fs.sp_offset != frame.reg_save_offset))
10975 restore_regs_via_mov = true;
10976 else if (frame_pointer_needed
10977 && !frame.nregs
10978 && m->fs.sp_offset != frame.reg_save_offset)
10979 restore_regs_via_mov = true;
10980 else if (frame_pointer_needed
10981 && TARGET_USE_LEAVE
10982 && cfun->machine->use_fast_prologue_epilogue
10983 && frame.nregs == 1)
10984 restore_regs_via_mov = true;
10985 else
10986 restore_regs_via_mov = false;
10988 if (restore_regs_via_mov || frame.nsseregs)
10990 /* Ensure that the entire register save area is addressable via
10991 the stack pointer, if we will restore via sp. */
10992 if (TARGET_64BIT
10993 && m->fs.sp_offset > 0x7fffffff
10994 && !(m->fs.fp_valid || m->fs.drap_valid)
10995 && (frame.nsseregs + frame.nregs) != 0)
10997 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10998 GEN_INT (m->fs.sp_offset
10999 - frame.sse_reg_save_offset),
11000 style,
11001 m->fs.cfa_reg == stack_pointer_rtx);
11005 /* If there are any SSE registers to restore, then we have to do it
11006 via moves, since there's obviously no pop for SSE regs. */
11007 if (frame.nsseregs)
11008 ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
11009 style == 2);
11011 if (restore_regs_via_mov)
11013 rtx t;
11015 if (frame.nregs)
11016 ix86_emit_restore_regs_using_mov (frame.reg_save_offset, style == 2);
11018 /* eh_return epilogues need %ecx added to the stack pointer. */
11019 if (style == 2)
11021 rtx insn, sa = EH_RETURN_STACKADJ_RTX;
11023 /* Stack align doesn't work with eh_return. */
11024 gcc_assert (!stack_realign_drap);
11025 /* Neither does regparm nested functions. */
11026 gcc_assert (!ix86_static_chain_on_stack);
11028 if (frame_pointer_needed)
11030 t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
11031 t = plus_constant (t, m->fs.fp_offset - UNITS_PER_WORD);
11032 emit_insn (gen_rtx_SET (VOIDmode, sa, t));
11034 t = gen_frame_mem (Pmode, hard_frame_pointer_rtx);
11035 insn = emit_move_insn (hard_frame_pointer_rtx, t);
11037 /* Note that we use SA as a temporary CFA, as the return
11038 address is at the proper place relative to it. We
11039 pretend this happens at the FP restore insn because
11040 prior to this insn the FP would be stored at the wrong
11041 offset relative to SA, and after this insn we have no
11042 other reasonable register to use for the CFA. We don't
11043 bother resetting the CFA to the SP for the duration of
11044 the return insn. */
11045 add_reg_note (insn, REG_CFA_DEF_CFA,
11046 plus_constant (sa, UNITS_PER_WORD));
11047 ix86_add_queued_cfa_restore_notes (insn);
11048 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
11049 RTX_FRAME_RELATED_P (insn) = 1;
11051 m->fs.cfa_reg = sa;
11052 m->fs.cfa_offset = UNITS_PER_WORD;
11053 m->fs.fp_valid = false;
11055 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
11056 const0_rtx, style, false);
11058 else
11060 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
11061 t = plus_constant (t, m->fs.sp_offset - UNITS_PER_WORD);
11062 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, t));
11063 ix86_add_queued_cfa_restore_notes (insn);
11065 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
11066 if (m->fs.cfa_offset != UNITS_PER_WORD)
11068 m->fs.cfa_offset = UNITS_PER_WORD;
11069 add_reg_note (insn, REG_CFA_DEF_CFA,
11070 plus_constant (stack_pointer_rtx,
11071 UNITS_PER_WORD));
11072 RTX_FRAME_RELATED_P (insn) = 1;
11075 m->fs.sp_offset = UNITS_PER_WORD;
11076 m->fs.sp_valid = true;
11079 else
11081 /* SEH requires that the function end with (1) a stack adjustment
11082 if necessary, (2) a sequence of pops, and (3) a return or
11083 jump instruction. Prevent insns from the function body from
11084 being scheduled into this sequence. */
11085 if (TARGET_SEH)
11087 /* Prevent a catch region from being adjacent to the standard
11088 epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor
11089 several other flags that would be interesting to test are
11090 not yet set up. */
11091 if (flag_non_call_exceptions)
11092 emit_insn (gen_nops (const1_rtx));
11093 else
11094 emit_insn (gen_blockage ());
11097 /* First step is to deallocate the stack frame so that we can
11098 pop the registers. */
11099 if (!m->fs.sp_valid)
11101 pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
11102 GEN_INT (m->fs.fp_offset
11103 - frame.reg_save_offset),
11104 style, false);
11106 else if (m->fs.sp_offset != frame.reg_save_offset)
11108 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11109 GEN_INT (m->fs.sp_offset
11110 - frame.reg_save_offset),
11111 style,
11112 m->fs.cfa_reg == stack_pointer_rtx);
11115 ix86_emit_restore_regs_using_pop ();
11118 /* If we used a stack pointer and haven't already got rid of it,
11119 then do so now. */
11120 if (m->fs.fp_valid)
11122 /* If the stack pointer is valid and pointing at the frame
11123 pointer store address, then we only need a pop. */
11124 if (m->fs.sp_valid && m->fs.sp_offset == frame.hfp_save_offset)
11125 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
11126 /* Leave results in shorter dependency chains on CPUs that are
11127 able to grok it fast. */
11128 else if (TARGET_USE_LEAVE
11129 || optimize_function_for_size_p (cfun)
11130 || !cfun->machine->use_fast_prologue_epilogue)
11131 ix86_emit_leave ();
11132 else
11134 pro_epilogue_adjust_stack (stack_pointer_rtx,
11135 hard_frame_pointer_rtx,
11136 const0_rtx, style, !using_drap);
11137 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
11141 if (using_drap)
11143 int param_ptr_offset = UNITS_PER_WORD;
11144 rtx insn;
11146 gcc_assert (stack_realign_drap);
11148 if (ix86_static_chain_on_stack)
11149 param_ptr_offset += UNITS_PER_WORD;
11150 if (!call_used_regs[REGNO (crtl->drap_reg)])
11151 param_ptr_offset += UNITS_PER_WORD;
11153 insn = emit_insn (gen_rtx_SET
11154 (VOIDmode, stack_pointer_rtx,
11155 gen_rtx_PLUS (Pmode,
11156 crtl->drap_reg,
11157 GEN_INT (-param_ptr_offset))));
11158 m->fs.cfa_reg = stack_pointer_rtx;
11159 m->fs.cfa_offset = param_ptr_offset;
11160 m->fs.sp_offset = param_ptr_offset;
11161 m->fs.realigned = false;
11163 add_reg_note (insn, REG_CFA_DEF_CFA,
11164 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11165 GEN_INT (param_ptr_offset)));
11166 RTX_FRAME_RELATED_P (insn) = 1;
11168 if (!call_used_regs[REGNO (crtl->drap_reg)])
11169 ix86_emit_restore_reg_using_pop (crtl->drap_reg);
11172 /* At this point the stack pointer must be valid, and we must have
11173 restored all of the registers. We may not have deallocated the
11174 entire stack frame. We've delayed this until now because it may
11175 be possible to merge the local stack deallocation with the
11176 deallocation forced by ix86_static_chain_on_stack. */
11177 gcc_assert (m->fs.sp_valid);
11178 gcc_assert (!m->fs.fp_valid);
11179 gcc_assert (!m->fs.realigned);
11180 if (m->fs.sp_offset != UNITS_PER_WORD)
11182 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11183 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
11184 style, true);
11187 /* Sibcall epilogues don't want a return instruction. */
11188 if (style == 0)
11190 m->fs = frame_state_save;
11191 return;
11194 /* Emit vzeroupper if needed. */
11195 if (TARGET_VZEROUPPER
11196 && !TREE_THIS_VOLATILE (cfun->decl)
11197 && !cfun->machine->caller_return_avx256_p)
11198 emit_insn (gen_avx_vzeroupper (GEN_INT (call_no_avx256)));
11200 if (crtl->args.pops_args && crtl->args.size)
11202 rtx popc = GEN_INT (crtl->args.pops_args);
11204 /* i386 can only pop 64K bytes. If asked to pop more, pop return
11205 address, do explicit add, and jump indirectly to the caller. */
11207 if (crtl->args.pops_args >= 65536)
11209 rtx ecx = gen_rtx_REG (SImode, CX_REG);
11210 rtx insn;
11212 /* There is no "pascal" calling convention in any 64bit ABI. */
11213 gcc_assert (!TARGET_64BIT);
11215 insn = emit_insn (gen_pop (ecx));
11216 m->fs.cfa_offset -= UNITS_PER_WORD;
11217 m->fs.sp_offset -= UNITS_PER_WORD;
11219 add_reg_note (insn, REG_CFA_ADJUST_CFA,
11220 copy_rtx (XVECEXP (PATTERN (insn), 0, 1)));
11221 add_reg_note (insn, REG_CFA_REGISTER,
11222 gen_rtx_SET (VOIDmode, ecx, pc_rtx));
11223 RTX_FRAME_RELATED_P (insn) = 1;
11225 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11226 popc, -1, true);
11227 emit_jump_insn (gen_return_indirect_internal (ecx));
11229 else
11230 emit_jump_insn (gen_return_pop_internal (popc));
11232 else
11233 emit_jump_insn (gen_return_internal ());
11235 /* Restore the state back to the state from the prologue,
11236 so that it's correct for the next epilogue. */
11237 m->fs = frame_state_save;
11240 /* Reset from the function's potential modifications. */
11242 static void
11243 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
11244 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
11246 if (pic_offset_table_rtx)
11247 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
11248 #if TARGET_MACHO
11249 /* Mach-O doesn't support labels at the end of objects, so if
11250 it looks like we might want one, insert a NOP. */
11252 rtx insn = get_last_insn ();
11253 while (insn
11254 && NOTE_P (insn)
11255 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
11256 insn = PREV_INSN (insn);
11257 if (insn
11258 && (LABEL_P (insn)
11259 || (NOTE_P (insn)
11260 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
11261 fputs ("\tnop\n", file);
11263 #endif
11267 /* Return a scratch register to use in the split stack prologue. The
11268 split stack prologue is used for -fsplit-stack. It is the first
11269 instructions in the function, even before the regular prologue.
11270 The scratch register can be any caller-saved register which is not
11271 used for parameters or for the static chain. */
11273 static unsigned int
11274 split_stack_prologue_scratch_regno (void)
11276 if (TARGET_64BIT)
11277 return R11_REG;
11278 else
11280 bool is_fastcall;
11281 int regparm;
11283 is_fastcall = (lookup_attribute ("fastcall",
11284 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
11285 != NULL);
11286 regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
11288 if (is_fastcall)
11290 if (DECL_STATIC_CHAIN (cfun->decl))
11292 sorry ("-fsplit-stack does not support fastcall with "
11293 "nested function");
11294 return INVALID_REGNUM;
11296 return AX_REG;
11298 else if (regparm < 3)
11300 if (!DECL_STATIC_CHAIN (cfun->decl))
11301 return CX_REG;
11302 else
11304 if (regparm >= 2)
11306 sorry ("-fsplit-stack does not support 2 register "
11307 " parameters for a nested function");
11308 return INVALID_REGNUM;
11310 return DX_REG;
11313 else
11315 /* FIXME: We could make this work by pushing a register
11316 around the addition and comparison. */
11317 sorry ("-fsplit-stack does not support 3 register parameters");
11318 return INVALID_REGNUM;
11323 /* A SYMBOL_REF for the function which allocates new stackspace for
11324 -fsplit-stack. */
11326 static GTY(()) rtx split_stack_fn;
11328 /* A SYMBOL_REF for the more stack function when using the large
11329 model. */
11331 static GTY(()) rtx split_stack_fn_large;
11333 /* Handle -fsplit-stack. These are the first instructions in the
11334 function, even before the regular prologue. */
11336 void
11337 ix86_expand_split_stack_prologue (void)
11339 struct ix86_frame frame;
11340 HOST_WIDE_INT allocate;
11341 unsigned HOST_WIDE_INT args_size;
11342 rtx label, limit, current, jump_insn, allocate_rtx, call_insn, call_fusage;
11343 rtx scratch_reg = NULL_RTX;
11344 rtx varargs_label = NULL_RTX;
11345 rtx fn;
11347 gcc_assert (flag_split_stack && reload_completed);
11349 ix86_finalize_stack_realign_flags ();
11350 ix86_compute_frame_layout (&frame);
11351 allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
11353 /* This is the label we will branch to if we have enough stack
11354 space. We expect the basic block reordering pass to reverse this
11355 branch if optimizing, so that we branch in the unlikely case. */
11356 label = gen_label_rtx ();
11358 /* We need to compare the stack pointer minus the frame size with
11359 the stack boundary in the TCB. The stack boundary always gives
11360 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
11361 can compare directly. Otherwise we need to do an addition. */
11363 limit = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
11364 UNSPEC_STACK_CHECK);
11365 limit = gen_rtx_CONST (Pmode, limit);
11366 limit = gen_rtx_MEM (Pmode, limit);
11367 if (allocate < SPLIT_STACK_AVAILABLE)
11368 current = stack_pointer_rtx;
11369 else
11371 unsigned int scratch_regno;
11372 rtx offset;
11374 /* We need a scratch register to hold the stack pointer minus
11375 the required frame size. Since this is the very start of the
11376 function, the scratch register can be any caller-saved
11377 register which is not used for parameters. */
11378 offset = GEN_INT (- allocate);
11379 scratch_regno = split_stack_prologue_scratch_regno ();
11380 if (scratch_regno == INVALID_REGNUM)
11381 return;
11382 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
11383 if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
11385 /* We don't use ix86_gen_add3 in this case because it will
11386 want to split to lea, but when not optimizing the insn
11387 will not be split after this point. */
11388 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
11389 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11390 offset)));
11392 else
11394 emit_move_insn (scratch_reg, offset);
11395 emit_insn (gen_adddi3 (scratch_reg, scratch_reg,
11396 stack_pointer_rtx));
11398 current = scratch_reg;
11401 ix86_expand_branch (GEU, current, limit, label);
11402 jump_insn = get_last_insn ();
11403 JUMP_LABEL (jump_insn) = label;
11405 /* Mark the jump as very likely to be taken. */
11406 add_reg_note (jump_insn, REG_BR_PROB,
11407 GEN_INT (REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100));
11409 if (split_stack_fn == NULL_RTX)
11410 split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
11411 fn = split_stack_fn;
11413 /* Get more stack space. We pass in the desired stack space and the
11414 size of the arguments to copy to the new stack. In 32-bit mode
11415 we push the parameters; __morestack will return on a new stack
11416 anyhow. In 64-bit mode we pass the parameters in r10 and
11417 r11. */
11418 allocate_rtx = GEN_INT (allocate);
11419 args_size = crtl->args.size >= 0 ? crtl->args.size : 0;
11420 call_fusage = NULL_RTX;
11421 if (TARGET_64BIT)
11423 rtx reg10, reg11;
11425 reg10 = gen_rtx_REG (Pmode, R10_REG);
11426 reg11 = gen_rtx_REG (Pmode, R11_REG);
11428 /* If this function uses a static chain, it will be in %r10.
11429 Preserve it across the call to __morestack. */
11430 if (DECL_STATIC_CHAIN (cfun->decl))
11432 rtx rax;
11434 rax = gen_rtx_REG (Pmode, AX_REG);
11435 emit_move_insn (rax, reg10);
11436 use_reg (&call_fusage, rax);
11439 if (ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
11441 HOST_WIDE_INT argval;
11443 /* When using the large model we need to load the address
11444 into a register, and we've run out of registers. So we
11445 switch to a different calling convention, and we call a
11446 different function: __morestack_large. We pass the
11447 argument size in the upper 32 bits of r10 and pass the
11448 frame size in the lower 32 bits. */
11449 gcc_assert ((allocate & (HOST_WIDE_INT) 0xffffffff) == allocate);
11450 gcc_assert ((args_size & 0xffffffff) == args_size);
11452 if (split_stack_fn_large == NULL_RTX)
11453 split_stack_fn_large =
11454 gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model");
11456 if (ix86_cmodel == CM_LARGE_PIC)
11458 rtx label, x;
11460 label = gen_label_rtx ();
11461 emit_label (label);
11462 LABEL_PRESERVE_P (label) = 1;
11463 emit_insn (gen_set_rip_rex64 (reg10, label));
11464 emit_insn (gen_set_got_offset_rex64 (reg11, label));
11465 emit_insn (gen_adddi3 (reg10, reg10, reg11));
11466 x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn_large),
11467 UNSPEC_GOT);
11468 x = gen_rtx_CONST (Pmode, x);
11469 emit_move_insn (reg11, x);
11470 x = gen_rtx_PLUS (Pmode, reg10, reg11);
11471 x = gen_const_mem (Pmode, x);
11472 emit_move_insn (reg11, x);
11474 else
11475 emit_move_insn (reg11, split_stack_fn_large);
11477 fn = reg11;
11479 argval = ((args_size << 16) << 16) + allocate;
11480 emit_move_insn (reg10, GEN_INT (argval));
11482 else
11484 emit_move_insn (reg10, allocate_rtx);
11485 emit_move_insn (reg11, GEN_INT (args_size));
11486 use_reg (&call_fusage, reg11);
11489 use_reg (&call_fusage, reg10);
11491 else
11493 emit_insn (gen_push (GEN_INT (args_size)));
11494 emit_insn (gen_push (allocate_rtx));
11496 call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn),
11497 GEN_INT (UNITS_PER_WORD), constm1_rtx,
11498 NULL_RTX, 0);
11499 add_function_usage_to (call_insn, call_fusage);
11501 /* In order to make call/return prediction work right, we now need
11502 to execute a return instruction. See
11503 libgcc/config/i386/morestack.S for the details on how this works.
11505 For flow purposes gcc must not see this as a return
11506 instruction--we need control flow to continue at the subsequent
11507 label. Therefore, we use an unspec. */
11508 gcc_assert (crtl->args.pops_args < 65536);
11509 emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
11511 /* If we are in 64-bit mode and this function uses a static chain,
11512 we saved %r10 in %rax before calling _morestack. */
11513 if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
11514 emit_move_insn (gen_rtx_REG (Pmode, R10_REG),
11515 gen_rtx_REG (Pmode, AX_REG));
11517 /* If this function calls va_start, we need to store a pointer to
11518 the arguments on the old stack, because they may not have been
11519 all copied to the new stack. At this point the old stack can be
11520 found at the frame pointer value used by __morestack, because
11521 __morestack has set that up before calling back to us. Here we
11522 store that pointer in a scratch register, and in
11523 ix86_expand_prologue we store the scratch register in a stack
11524 slot. */
11525 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11527 unsigned int scratch_regno;
11528 rtx frame_reg;
11529 int words;
11531 scratch_regno = split_stack_prologue_scratch_regno ();
11532 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
11533 frame_reg = gen_rtx_REG (Pmode, BP_REG);
11535 /* 64-bit:
11536 fp -> old fp value
11537 return address within this function
11538 return address of caller of this function
11539 stack arguments
11540 So we add three words to get to the stack arguments.
11542 32-bit:
11543 fp -> old fp value
11544 return address within this function
11545 first argument to __morestack
11546 second argument to __morestack
11547 return address of caller of this function
11548 stack arguments
11549 So we add five words to get to the stack arguments.
11551 words = TARGET_64BIT ? 3 : 5;
11552 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
11553 gen_rtx_PLUS (Pmode, frame_reg,
11554 GEN_INT (words * UNITS_PER_WORD))));
11556 varargs_label = gen_label_rtx ();
11557 emit_jump_insn (gen_jump (varargs_label));
11558 JUMP_LABEL (get_last_insn ()) = varargs_label;
11560 emit_barrier ();
11563 emit_label (label);
11564 LABEL_NUSES (label) = 1;
11566 /* If this function calls va_start, we now have to set the scratch
11567 register for the case where we do not call __morestack. In this
11568 case we need to set it based on the stack pointer. */
11569 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11571 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
11572 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11573 GEN_INT (UNITS_PER_WORD))));
11575 emit_label (varargs_label);
11576 LABEL_NUSES (varargs_label) = 1;
11580 /* We may have to tell the dataflow pass that the split stack prologue
11581 is initializing a scratch register. */
11583 static void
11584 ix86_live_on_entry (bitmap regs)
11586 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11588 gcc_assert (flag_split_stack);
11589 bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
11593 /* Extract the parts of an RTL expression that is a valid memory address
11594 for an instruction. Return 0 if the structure of the address is
11595 grossly off. Return -1 if the address contains ASHIFT, so it is not
11596 strictly valid, but still used for computing length of lea instruction. */
11599 ix86_decompose_address (rtx addr, struct ix86_address *out)
11601 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
11602 rtx base_reg, index_reg;
11603 HOST_WIDE_INT scale = 1;
11604 rtx scale_rtx = NULL_RTX;
11605 rtx tmp;
11606 int retval = 1;
11607 enum ix86_address_seg seg = SEG_DEFAULT;
11609 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
11610 base = addr;
11611 else if (GET_CODE (addr) == PLUS)
11613 rtx addends[4], op;
11614 int n = 0, i;
11616 op = addr;
11619 if (n >= 4)
11620 return 0;
11621 addends[n++] = XEXP (op, 1);
11622 op = XEXP (op, 0);
11624 while (GET_CODE (op) == PLUS);
11625 if (n >= 4)
11626 return 0;
11627 addends[n] = op;
11629 for (i = n; i >= 0; --i)
11631 op = addends[i];
11632 switch (GET_CODE (op))
11634 case MULT:
11635 if (index)
11636 return 0;
11637 index = XEXP (op, 0);
11638 scale_rtx = XEXP (op, 1);
11639 break;
11641 case ASHIFT:
11642 if (index)
11643 return 0;
11644 index = XEXP (op, 0);
11645 tmp = XEXP (op, 1);
11646 if (!CONST_INT_P (tmp))
11647 return 0;
11648 scale = INTVAL (tmp);
11649 if ((unsigned HOST_WIDE_INT) scale > 3)
11650 return 0;
11651 scale = 1 << scale;
11652 break;
11654 case UNSPEC:
11655 if (XINT (op, 1) == UNSPEC_TP
11656 && TARGET_TLS_DIRECT_SEG_REFS
11657 && seg == SEG_DEFAULT)
11658 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
11659 else
11660 return 0;
11661 break;
11663 case REG:
11664 case SUBREG:
11665 if (!base)
11666 base = op;
11667 else if (!index)
11668 index = op;
11669 else
11670 return 0;
11671 break;
11673 case CONST:
11674 case CONST_INT:
11675 case SYMBOL_REF:
11676 case LABEL_REF:
11677 if (disp)
11678 return 0;
11679 disp = op;
11680 break;
11682 default:
11683 return 0;
11687 else if (GET_CODE (addr) == MULT)
11689 index = XEXP (addr, 0); /* index*scale */
11690 scale_rtx = XEXP (addr, 1);
11692 else if (GET_CODE (addr) == ASHIFT)
11694 /* We're called for lea too, which implements ashift on occasion. */
11695 index = XEXP (addr, 0);
11696 tmp = XEXP (addr, 1);
11697 if (!CONST_INT_P (tmp))
11698 return 0;
11699 scale = INTVAL (tmp);
11700 if ((unsigned HOST_WIDE_INT) scale > 3)
11701 return 0;
11702 scale = 1 << scale;
11703 retval = -1;
11705 else
11706 disp = addr; /* displacement */
11708 /* Extract the integral value of scale. */
11709 if (scale_rtx)
11711 if (!CONST_INT_P (scale_rtx))
11712 return 0;
11713 scale = INTVAL (scale_rtx);
11716 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
11717 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
11719 /* Avoid useless 0 displacement. */
11720 if (disp == const0_rtx && (base || index))
11721 disp = NULL_RTX;
11723 /* Allow arg pointer and stack pointer as index if there is not scaling. */
11724 if (base_reg && index_reg && scale == 1
11725 && (index_reg == arg_pointer_rtx
11726 || index_reg == frame_pointer_rtx
11727 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
11729 rtx tmp;
11730 tmp = base, base = index, index = tmp;
11731 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
11734 /* Special case: %ebp cannot be encoded as a base without a displacement.
11735 Similarly %r13. */
11736 if (!disp
11737 && base_reg
11738 && (base_reg == hard_frame_pointer_rtx
11739 || base_reg == frame_pointer_rtx
11740 || base_reg == arg_pointer_rtx
11741 || (REG_P (base_reg)
11742 && (REGNO (base_reg) == HARD_FRAME_POINTER_REGNUM
11743 || REGNO (base_reg) == R13_REG))))
11744 disp = const0_rtx;
11746 /* Special case: on K6, [%esi] makes the instruction vector decoded.
11747 Avoid this by transforming to [%esi+0].
11748 Reload calls address legitimization without cfun defined, so we need
11749 to test cfun for being non-NULL. */
11750 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
11751 && base_reg && !index_reg && !disp
11752 && REG_P (base_reg) && REGNO (base_reg) == SI_REG)
11753 disp = const0_rtx;
11755 /* Special case: encode reg+reg instead of reg*2. */
11756 if (!base && index && scale == 2)
11757 base = index, base_reg = index_reg, scale = 1;
11759 /* Special case: scaling cannot be encoded without base or displacement. */
11760 if (!base && !disp && index && scale != 1)
11761 disp = const0_rtx;
11763 out->base = base;
11764 out->index = index;
11765 out->disp = disp;
11766 out->scale = scale;
11767 out->seg = seg;
11769 return retval;
11772 /* Return cost of the memory address x.
11773 For i386, it is better to use a complex address than let gcc copy
11774 the address into a reg and make a new pseudo. But not if the address
11775 requires to two regs - that would mean more pseudos with longer
11776 lifetimes. */
11777 static int
11778 ix86_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
11780 struct ix86_address parts;
11781 int cost = 1;
11782 int ok = ix86_decompose_address (x, &parts);
11784 gcc_assert (ok);
11786 if (parts.base && GET_CODE (parts.base) == SUBREG)
11787 parts.base = SUBREG_REG (parts.base);
11788 if (parts.index && GET_CODE (parts.index) == SUBREG)
11789 parts.index = SUBREG_REG (parts.index);
11791 /* Attempt to minimize number of registers in the address. */
11792 if ((parts.base
11793 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
11794 || (parts.index
11795 && (!REG_P (parts.index)
11796 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
11797 cost++;
11799 if (parts.base
11800 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
11801 && parts.index
11802 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
11803 && parts.base != parts.index)
11804 cost++;
11806 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
11807 since it's predecode logic can't detect the length of instructions
11808 and it degenerates to vector decoded. Increase cost of such
11809 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
11810 to split such addresses or even refuse such addresses at all.
11812 Following addressing modes are affected:
11813 [base+scale*index]
11814 [scale*index+disp]
11815 [base+index]
11817 The first and last case may be avoidable by explicitly coding the zero in
11818 memory address, but I don't have AMD-K6 machine handy to check this
11819 theory. */
11821 if (TARGET_K6
11822 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
11823 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
11824 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
11825 cost += 10;
11827 return cost;
11830 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
11831 this is used for to form addresses to local data when -fPIC is in
11832 use. */
11834 static bool
11835 darwin_local_data_pic (rtx disp)
11837 return (GET_CODE (disp) == UNSPEC
11838 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
11841 /* Determine if a given RTX is a valid constant. We already know this
11842 satisfies CONSTANT_P. */
11844 bool
11845 legitimate_constant_p (rtx x)
11847 switch (GET_CODE (x))
11849 case CONST:
11850 x = XEXP (x, 0);
11852 if (GET_CODE (x) == PLUS)
11854 if (!CONST_INT_P (XEXP (x, 1)))
11855 return false;
11856 x = XEXP (x, 0);
11859 if (TARGET_MACHO && darwin_local_data_pic (x))
11860 return true;
11862 /* Only some unspecs are valid as "constants". */
11863 if (GET_CODE (x) == UNSPEC)
11864 switch (XINT (x, 1))
11866 case UNSPEC_GOT:
11867 case UNSPEC_GOTOFF:
11868 case UNSPEC_PLTOFF:
11869 return TARGET_64BIT;
11870 case UNSPEC_TPOFF:
11871 case UNSPEC_NTPOFF:
11872 x = XVECEXP (x, 0, 0);
11873 return (GET_CODE (x) == SYMBOL_REF
11874 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
11875 case UNSPEC_DTPOFF:
11876 x = XVECEXP (x, 0, 0);
11877 return (GET_CODE (x) == SYMBOL_REF
11878 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
11879 default:
11880 return false;
11883 /* We must have drilled down to a symbol. */
11884 if (GET_CODE (x) == LABEL_REF)
11885 return true;
11886 if (GET_CODE (x) != SYMBOL_REF)
11887 return false;
11888 /* FALLTHRU */
11890 case SYMBOL_REF:
11891 /* TLS symbols are never valid. */
11892 if (SYMBOL_REF_TLS_MODEL (x))
11893 return false;
11895 /* DLLIMPORT symbols are never valid. */
11896 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
11897 && SYMBOL_REF_DLLIMPORT_P (x))
11898 return false;
11900 #if TARGET_MACHO
11901 /* mdynamic-no-pic */
11902 if (MACHO_DYNAMIC_NO_PIC_P)
11903 return machopic_symbol_defined_p (x);
11904 #endif
11905 break;
11907 case CONST_DOUBLE:
11908 if (GET_MODE (x) == TImode
11909 && x != CONST0_RTX (TImode)
11910 && !TARGET_64BIT)
11911 return false;
11912 break;
11914 case CONST_VECTOR:
11915 if (!standard_sse_constant_p (x))
11916 return false;
11918 default:
11919 break;
11922 /* Otherwise we handle everything else in the move patterns. */
11923 return true;
11926 /* Determine if it's legal to put X into the constant pool. This
11927 is not possible for the address of thread-local symbols, which
11928 is checked above. */
11930 static bool
11931 ix86_cannot_force_const_mem (rtx x)
11933 /* We can always put integral constants and vectors in memory. */
11934 switch (GET_CODE (x))
11936 case CONST_INT:
11937 case CONST_DOUBLE:
11938 case CONST_VECTOR:
11939 return false;
11941 default:
11942 break;
11944 return !legitimate_constant_p (x);
11948 /* Nonzero if the constant value X is a legitimate general operand
11949 when generating PIC code. It is given that flag_pic is on and
11950 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
11952 bool
11953 legitimate_pic_operand_p (rtx x)
11955 rtx inner;
11957 switch (GET_CODE (x))
11959 case CONST:
11960 inner = XEXP (x, 0);
11961 if (GET_CODE (inner) == PLUS
11962 && CONST_INT_P (XEXP (inner, 1)))
11963 inner = XEXP (inner, 0);
11965 /* Only some unspecs are valid as "constants". */
11966 if (GET_CODE (inner) == UNSPEC)
11967 switch (XINT (inner, 1))
11969 case UNSPEC_GOT:
11970 case UNSPEC_GOTOFF:
11971 case UNSPEC_PLTOFF:
11972 return TARGET_64BIT;
11973 case UNSPEC_TPOFF:
11974 x = XVECEXP (inner, 0, 0);
11975 return (GET_CODE (x) == SYMBOL_REF
11976 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
11977 case UNSPEC_MACHOPIC_OFFSET:
11978 return legitimate_pic_address_disp_p (x);
11979 default:
11980 return false;
11982 /* FALLTHRU */
11984 case SYMBOL_REF:
11985 case LABEL_REF:
11986 return legitimate_pic_address_disp_p (x);
11988 default:
11989 return true;
11993 /* Determine if a given CONST RTX is a valid memory displacement
11994 in PIC mode. */
11996 bool
11997 legitimate_pic_address_disp_p (rtx disp)
11999 bool saw_plus;
12001 /* In 64bit mode we can allow direct addresses of symbols and labels
12002 when they are not dynamic symbols. */
12003 if (TARGET_64BIT)
12005 rtx op0 = disp, op1;
12007 switch (GET_CODE (disp))
12009 case LABEL_REF:
12010 return true;
12012 case CONST:
12013 if (GET_CODE (XEXP (disp, 0)) != PLUS)
12014 break;
12015 op0 = XEXP (XEXP (disp, 0), 0);
12016 op1 = XEXP (XEXP (disp, 0), 1);
12017 if (!CONST_INT_P (op1)
12018 || INTVAL (op1) >= 16*1024*1024
12019 || INTVAL (op1) < -16*1024*1024)
12020 break;
12021 if (GET_CODE (op0) == LABEL_REF)
12022 return true;
12023 if (GET_CODE (op0) != SYMBOL_REF)
12024 break;
12025 /* FALLTHRU */
12027 case SYMBOL_REF:
12028 /* TLS references should always be enclosed in UNSPEC. */
12029 if (SYMBOL_REF_TLS_MODEL (op0))
12030 return false;
12031 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
12032 && ix86_cmodel != CM_LARGE_PIC)
12033 return true;
12034 break;
12036 default:
12037 break;
12040 if (GET_CODE (disp) != CONST)
12041 return false;
12042 disp = XEXP (disp, 0);
12044 if (TARGET_64BIT)
12046 /* We are unsafe to allow PLUS expressions. This limit allowed distance
12047 of GOT tables. We should not need these anyway. */
12048 if (GET_CODE (disp) != UNSPEC
12049 || (XINT (disp, 1) != UNSPEC_GOTPCREL
12050 && XINT (disp, 1) != UNSPEC_GOTOFF
12051 && XINT (disp, 1) != UNSPEC_PCREL
12052 && XINT (disp, 1) != UNSPEC_PLTOFF))
12053 return false;
12055 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
12056 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
12057 return false;
12058 return true;
12061 saw_plus = false;
12062 if (GET_CODE (disp) == PLUS)
12064 if (!CONST_INT_P (XEXP (disp, 1)))
12065 return false;
12066 disp = XEXP (disp, 0);
12067 saw_plus = true;
12070 if (TARGET_MACHO && darwin_local_data_pic (disp))
12071 return true;
12073 if (GET_CODE (disp) != UNSPEC)
12074 return false;
12076 switch (XINT (disp, 1))
12078 case UNSPEC_GOT:
12079 if (saw_plus)
12080 return false;
12081 /* We need to check for both symbols and labels because VxWorks loads
12082 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
12083 details. */
12084 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
12085 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
12086 case UNSPEC_GOTOFF:
12087 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
12088 While ABI specify also 32bit relocation but we don't produce it in
12089 small PIC model at all. */
12090 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
12091 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
12092 && !TARGET_64BIT)
12093 return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
12094 return false;
12095 case UNSPEC_GOTTPOFF:
12096 case UNSPEC_GOTNTPOFF:
12097 case UNSPEC_INDNTPOFF:
12098 if (saw_plus)
12099 return false;
12100 disp = XVECEXP (disp, 0, 0);
12101 return (GET_CODE (disp) == SYMBOL_REF
12102 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
12103 case UNSPEC_NTPOFF:
12104 disp = XVECEXP (disp, 0, 0);
12105 return (GET_CODE (disp) == SYMBOL_REF
12106 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
12107 case UNSPEC_DTPOFF:
12108 disp = XVECEXP (disp, 0, 0);
12109 return (GET_CODE (disp) == SYMBOL_REF
12110 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
12113 return false;
12116 /* Recognizes RTL expressions that are valid memory addresses for an
12117 instruction. The MODE argument is the machine mode for the MEM
12118 expression that wants to use this address.
12120 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
12121 convert common non-canonical forms to canonical form so that they will
12122 be recognized. */
12124 static bool
12125 ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
12126 rtx addr, bool strict)
12128 struct ix86_address parts;
12129 rtx base, index, disp;
12130 HOST_WIDE_INT scale;
12132 if (ix86_decompose_address (addr, &parts) <= 0)
12133 /* Decomposition failed. */
12134 return false;
12136 base = parts.base;
12137 index = parts.index;
12138 disp = parts.disp;
12139 scale = parts.scale;
12141 /* Validate base register.
12143 Don't allow SUBREG's that span more than a word here. It can lead to spill
12144 failures when the base is one word out of a two word structure, which is
12145 represented internally as a DImode int. */
12147 if (base)
12149 rtx reg;
12151 if (REG_P (base))
12152 reg = base;
12153 else if (GET_CODE (base) == SUBREG
12154 && REG_P (SUBREG_REG (base))
12155 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
12156 <= UNITS_PER_WORD)
12157 reg = SUBREG_REG (base);
12158 else
12159 /* Base is not a register. */
12160 return false;
12162 if (GET_MODE (base) != Pmode)
12163 /* Base is not in Pmode. */
12164 return false;
12166 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
12167 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
12168 /* Base is not valid. */
12169 return false;
12172 /* Validate index register.
12174 Don't allow SUBREG's that span more than a word here -- same as above. */
12176 if (index)
12178 rtx reg;
12180 if (REG_P (index))
12181 reg = index;
12182 else if (GET_CODE (index) == SUBREG
12183 && REG_P (SUBREG_REG (index))
12184 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
12185 <= UNITS_PER_WORD)
12186 reg = SUBREG_REG (index);
12187 else
12188 /* Index is not a register. */
12189 return false;
12191 if (GET_MODE (index) != Pmode)
12192 /* Index is not in Pmode. */
12193 return false;
12195 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
12196 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
12197 /* Index is not valid. */
12198 return false;
12201 /* Validate scale factor. */
12202 if (scale != 1)
12204 if (!index)
12205 /* Scale without index. */
12206 return false;
12208 if (scale != 2 && scale != 4 && scale != 8)
12209 /* Scale is not a valid multiplier. */
12210 return false;
12213 /* Validate displacement. */
12214 if (disp)
12216 if (GET_CODE (disp) == CONST
12217 && GET_CODE (XEXP (disp, 0)) == UNSPEC
12218 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
12219 switch (XINT (XEXP (disp, 0), 1))
12221 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
12222 used. While ABI specify also 32bit relocations, we don't produce
12223 them at all and use IP relative instead. */
12224 case UNSPEC_GOT:
12225 case UNSPEC_GOTOFF:
12226 gcc_assert (flag_pic);
12227 if (!TARGET_64BIT)
12228 goto is_legitimate_pic;
12230 /* 64bit address unspec. */
12231 return false;
12233 case UNSPEC_GOTPCREL:
12234 case UNSPEC_PCREL:
12235 gcc_assert (flag_pic);
12236 goto is_legitimate_pic;
12238 case UNSPEC_GOTTPOFF:
12239 case UNSPEC_GOTNTPOFF:
12240 case UNSPEC_INDNTPOFF:
12241 case UNSPEC_NTPOFF:
12242 case UNSPEC_DTPOFF:
12243 break;
12245 case UNSPEC_STACK_CHECK:
12246 gcc_assert (flag_split_stack);
12247 break;
12249 default:
12250 /* Invalid address unspec. */
12251 return false;
12254 else if (SYMBOLIC_CONST (disp)
12255 && (flag_pic
12256 || (TARGET_MACHO
12257 #if TARGET_MACHO
12258 && MACHOPIC_INDIRECT
12259 && !machopic_operand_p (disp)
12260 #endif
12264 is_legitimate_pic:
12265 if (TARGET_64BIT && (index || base))
12267 /* foo@dtpoff(%rX) is ok. */
12268 if (GET_CODE (disp) != CONST
12269 || GET_CODE (XEXP (disp, 0)) != PLUS
12270 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
12271 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
12272 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
12273 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
12274 /* Non-constant pic memory reference. */
12275 return false;
12277 else if ((!TARGET_MACHO || flag_pic)
12278 && ! legitimate_pic_address_disp_p (disp))
12279 /* Displacement is an invalid pic construct. */
12280 return false;
12281 #if TARGET_MACHO
12282 else if (MACHO_DYNAMIC_NO_PIC_P && !legitimate_constant_p (disp))
12283 /* displacment must be referenced via non_lazy_pointer */
12284 return false;
12285 #endif
12287 /* This code used to verify that a symbolic pic displacement
12288 includes the pic_offset_table_rtx register.
12290 While this is good idea, unfortunately these constructs may
12291 be created by "adds using lea" optimization for incorrect
12292 code like:
12294 int a;
12295 int foo(int i)
12297 return *(&a+i);
12300 This code is nonsensical, but results in addressing
12301 GOT table with pic_offset_table_rtx base. We can't
12302 just refuse it easily, since it gets matched by
12303 "addsi3" pattern, that later gets split to lea in the
12304 case output register differs from input. While this
12305 can be handled by separate addsi pattern for this case
12306 that never results in lea, this seems to be easier and
12307 correct fix for crash to disable this test. */
12309 else if (GET_CODE (disp) != LABEL_REF
12310 && !CONST_INT_P (disp)
12311 && (GET_CODE (disp) != CONST
12312 || !legitimate_constant_p (disp))
12313 && (GET_CODE (disp) != SYMBOL_REF
12314 || !legitimate_constant_p (disp)))
12315 /* Displacement is not constant. */
12316 return false;
12317 else if (TARGET_64BIT
12318 && !x86_64_immediate_operand (disp, VOIDmode))
12319 /* Displacement is out of range. */
12320 return false;
12323 /* Everything looks valid. */
12324 return true;
12327 /* Determine if a given RTX is a valid constant address. */
12329 bool
12330 constant_address_p (rtx x)
12332 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
12335 /* Return a unique alias set for the GOT. */
12337 static alias_set_type
12338 ix86_GOT_alias_set (void)
12340 static alias_set_type set = -1;
12341 if (set == -1)
12342 set = new_alias_set ();
12343 return set;
12346 /* Return a legitimate reference for ORIG (an address) using the
12347 register REG. If REG is 0, a new pseudo is generated.
12349 There are two types of references that must be handled:
12351 1. Global data references must load the address from the GOT, via
12352 the PIC reg. An insn is emitted to do this load, and the reg is
12353 returned.
12355 2. Static data references, constant pool addresses, and code labels
12356 compute the address as an offset from the GOT, whose base is in
12357 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
12358 differentiate them from global data objects. The returned
12359 address is the PIC reg + an unspec constant.
12361 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
12362 reg also appears in the address. */
12364 static rtx
12365 legitimize_pic_address (rtx orig, rtx reg)
12367 rtx addr = orig;
12368 rtx new_rtx = orig;
12369 rtx base;
12371 #if TARGET_MACHO
12372 if (TARGET_MACHO && !TARGET_64BIT)
12374 if (reg == 0)
12375 reg = gen_reg_rtx (Pmode);
12376 /* Use the generic Mach-O PIC machinery. */
12377 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
12379 #endif
12381 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
12382 new_rtx = addr;
12383 else if (TARGET_64BIT
12384 && ix86_cmodel != CM_SMALL_PIC
12385 && gotoff_operand (addr, Pmode))
12387 rtx tmpreg;
12388 /* This symbol may be referenced via a displacement from the PIC
12389 base address (@GOTOFF). */
12391 if (reload_in_progress)
12392 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12393 if (GET_CODE (addr) == CONST)
12394 addr = XEXP (addr, 0);
12395 if (GET_CODE (addr) == PLUS)
12397 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
12398 UNSPEC_GOTOFF);
12399 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
12401 else
12402 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
12403 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12404 if (!reg)
12405 tmpreg = gen_reg_rtx (Pmode);
12406 else
12407 tmpreg = reg;
12408 emit_move_insn (tmpreg, new_rtx);
12410 if (reg != 0)
12412 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
12413 tmpreg, 1, OPTAB_DIRECT);
12414 new_rtx = reg;
12416 else new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
12418 else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
12420 /* This symbol may be referenced via a displacement from the PIC
12421 base address (@GOTOFF). */
12423 if (reload_in_progress)
12424 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12425 if (GET_CODE (addr) == CONST)
12426 addr = XEXP (addr, 0);
12427 if (GET_CODE (addr) == PLUS)
12429 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
12430 UNSPEC_GOTOFF);
12431 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
12433 else
12434 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
12435 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12436 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
12438 if (reg != 0)
12440 emit_move_insn (reg, new_rtx);
12441 new_rtx = reg;
12444 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
12445 /* We can't use @GOTOFF for text labels on VxWorks;
12446 see gotoff_operand. */
12447 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
12449 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
12451 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
12452 return legitimize_dllimport_symbol (addr, true);
12453 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
12454 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
12455 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
12457 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), true);
12458 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
12462 /* For x64 PE-COFF there is no GOT table. So we use address
12463 directly. */
12464 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
12466 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL);
12467 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12469 if (reg == 0)
12470 reg = gen_reg_rtx (Pmode);
12471 emit_move_insn (reg, new_rtx);
12472 new_rtx = reg;
12474 else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
12476 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
12477 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12478 new_rtx = gen_const_mem (Pmode, new_rtx);
12479 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
12481 if (reg == 0)
12482 reg = gen_reg_rtx (Pmode);
12483 /* Use directly gen_movsi, otherwise the address is loaded
12484 into register for CSE. We don't want to CSE this addresses,
12485 instead we CSE addresses from the GOT table, so skip this. */
12486 emit_insn (gen_movsi (reg, new_rtx));
12487 new_rtx = reg;
12489 else
12491 /* This symbol must be referenced via a load from the
12492 Global Offset Table (@GOT). */
12494 if (reload_in_progress)
12495 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12496 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
12497 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12498 if (TARGET_64BIT)
12499 new_rtx = force_reg (Pmode, new_rtx);
12500 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
12501 new_rtx = gen_const_mem (Pmode, new_rtx);
12502 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
12504 if (reg == 0)
12505 reg = gen_reg_rtx (Pmode);
12506 emit_move_insn (reg, new_rtx);
12507 new_rtx = reg;
12510 else
12512 if (CONST_INT_P (addr)
12513 && !x86_64_immediate_operand (addr, VOIDmode))
12515 if (reg)
12517 emit_move_insn (reg, addr);
12518 new_rtx = reg;
12520 else
12521 new_rtx = force_reg (Pmode, addr);
12523 else if (GET_CODE (addr) == CONST)
12525 addr = XEXP (addr, 0);
12527 /* We must match stuff we generate before. Assume the only
12528 unspecs that can get here are ours. Not that we could do
12529 anything with them anyway.... */
12530 if (GET_CODE (addr) == UNSPEC
12531 || (GET_CODE (addr) == PLUS
12532 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
12533 return orig;
12534 gcc_assert (GET_CODE (addr) == PLUS);
12536 if (GET_CODE (addr) == PLUS)
12538 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
12540 /* Check first to see if this is a constant offset from a @GOTOFF
12541 symbol reference. */
12542 if (gotoff_operand (op0, Pmode)
12543 && CONST_INT_P (op1))
12545 if (!TARGET_64BIT)
12547 if (reload_in_progress)
12548 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12549 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
12550 UNSPEC_GOTOFF);
12551 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
12552 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12553 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
12555 if (reg != 0)
12557 emit_move_insn (reg, new_rtx);
12558 new_rtx = reg;
12561 else
12563 if (INTVAL (op1) < -16*1024*1024
12564 || INTVAL (op1) >= 16*1024*1024)
12566 if (!x86_64_immediate_operand (op1, Pmode))
12567 op1 = force_reg (Pmode, op1);
12568 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
12572 else
12574 base = legitimize_pic_address (XEXP (addr, 0), reg);
12575 new_rtx = legitimize_pic_address (XEXP (addr, 1),
12576 base == reg ? NULL_RTX : reg);
12578 if (CONST_INT_P (new_rtx))
12579 new_rtx = plus_constant (base, INTVAL (new_rtx));
12580 else
12582 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
12584 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
12585 new_rtx = XEXP (new_rtx, 1);
12587 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
12592 return new_rtx;
12595 /* Load the thread pointer. If TO_REG is true, force it into a register. */
12597 static rtx
12598 get_thread_pointer (int to_reg)
12600 rtx tp, reg, insn;
12602 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
12603 if (!to_reg)
12604 return tp;
12606 reg = gen_reg_rtx (Pmode);
12607 insn = gen_rtx_SET (VOIDmode, reg, tp);
12608 insn = emit_insn (insn);
12610 return reg;
12613 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
12614 false if we expect this to be used for a memory address and true if
12615 we expect to load the address into a register. */
12617 static rtx
12618 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
12620 rtx dest, base, off, pic, tp;
12621 int type;
12623 switch (model)
12625 case TLS_MODEL_GLOBAL_DYNAMIC:
12626 dest = gen_reg_rtx (Pmode);
12627 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
12629 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
12631 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns;
12633 start_sequence ();
12634 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
12635 insns = get_insns ();
12636 end_sequence ();
12638 RTL_CONST_CALL_P (insns) = 1;
12639 emit_libcall_block (insns, dest, rax, x);
12641 else if (TARGET_64BIT && TARGET_GNU2_TLS)
12642 emit_insn (gen_tls_global_dynamic_64 (dest, x));
12643 else
12644 emit_insn (gen_tls_global_dynamic_32 (dest, x));
12646 if (TARGET_GNU2_TLS)
12648 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
12650 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
12652 break;
12654 case TLS_MODEL_LOCAL_DYNAMIC:
12655 base = gen_reg_rtx (Pmode);
12656 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
12658 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
12660 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns, note;
12662 start_sequence ();
12663 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
12664 insns = get_insns ();
12665 end_sequence ();
12667 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
12668 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
12669 RTL_CONST_CALL_P (insns) = 1;
12670 emit_libcall_block (insns, base, rax, note);
12672 else if (TARGET_64BIT && TARGET_GNU2_TLS)
12673 emit_insn (gen_tls_local_dynamic_base_64 (base));
12674 else
12675 emit_insn (gen_tls_local_dynamic_base_32 (base));
12677 if (TARGET_GNU2_TLS)
12679 rtx x = ix86_tls_module_base ();
12681 set_unique_reg_note (get_last_insn (), REG_EQUIV,
12682 gen_rtx_MINUS (Pmode, x, tp));
12685 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
12686 off = gen_rtx_CONST (Pmode, off);
12688 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
12690 if (TARGET_GNU2_TLS)
12692 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
12694 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
12697 break;
12699 case TLS_MODEL_INITIAL_EXEC:
12700 if (TARGET_64BIT)
12702 if (TARGET_SUN_TLS)
12704 /* The Sun linker took the AMD64 TLS spec literally
12705 and can only handle %rax as destination of the
12706 initial executable code sequence. */
12708 dest = gen_reg_rtx (Pmode);
12709 emit_insn (gen_tls_initial_exec_64_sun (dest, x));
12710 return dest;
12713 pic = NULL;
12714 type = UNSPEC_GOTNTPOFF;
12716 else if (flag_pic)
12718 if (reload_in_progress)
12719 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12720 pic = pic_offset_table_rtx;
12721 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
12723 else if (!TARGET_ANY_GNU_TLS)
12725 pic = gen_reg_rtx (Pmode);
12726 emit_insn (gen_set_got (pic));
12727 type = UNSPEC_GOTTPOFF;
12729 else
12731 pic = NULL;
12732 type = UNSPEC_INDNTPOFF;
12735 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
12736 off = gen_rtx_CONST (Pmode, off);
12737 if (pic)
12738 off = gen_rtx_PLUS (Pmode, pic, off);
12739 off = gen_const_mem (Pmode, off);
12740 set_mem_alias_set (off, ix86_GOT_alias_set ());
12742 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
12744 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
12745 off = force_reg (Pmode, off);
12746 return gen_rtx_PLUS (Pmode, base, off);
12748 else
12750 base = get_thread_pointer (true);
12751 dest = gen_reg_rtx (Pmode);
12752 emit_insn (gen_subsi3 (dest, base, off));
12754 break;
12756 case TLS_MODEL_LOCAL_EXEC:
12757 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
12758 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
12759 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
12760 off = gen_rtx_CONST (Pmode, off);
12762 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
12764 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
12765 return gen_rtx_PLUS (Pmode, base, off);
12767 else
12769 base = get_thread_pointer (true);
12770 dest = gen_reg_rtx (Pmode);
12771 emit_insn (gen_subsi3 (dest, base, off));
12773 break;
12775 default:
12776 gcc_unreachable ();
12779 return dest;
12782 /* Create or return the unique __imp_DECL dllimport symbol corresponding
12783 to symbol DECL. */
12785 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
12786 htab_t dllimport_map;
12788 static tree
12789 get_dllimport_decl (tree decl)
12791 struct tree_map *h, in;
12792 void **loc;
12793 const char *name;
12794 const char *prefix;
12795 size_t namelen, prefixlen;
12796 char *imp_name;
12797 tree to;
12798 rtx rtl;
12800 if (!dllimport_map)
12801 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
12803 in.hash = htab_hash_pointer (decl);
12804 in.base.from = decl;
12805 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
12806 h = (struct tree_map *) *loc;
12807 if (h)
12808 return h->to;
12810 *loc = h = ggc_alloc_tree_map ();
12811 h->hash = in.hash;
12812 h->base.from = decl;
12813 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
12814 VAR_DECL, NULL, ptr_type_node);
12815 DECL_ARTIFICIAL (to) = 1;
12816 DECL_IGNORED_P (to) = 1;
12817 DECL_EXTERNAL (to) = 1;
12818 TREE_READONLY (to) = 1;
12820 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
12821 name = targetm.strip_name_encoding (name);
12822 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
12823 ? "*__imp_" : "*__imp__";
12824 namelen = strlen (name);
12825 prefixlen = strlen (prefix);
12826 imp_name = (char *) alloca (namelen + prefixlen + 1);
12827 memcpy (imp_name, prefix, prefixlen);
12828 memcpy (imp_name + prefixlen, name, namelen + 1);
12830 name = ggc_alloc_string (imp_name, namelen + prefixlen);
12831 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
12832 SET_SYMBOL_REF_DECL (rtl, to);
12833 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL;
12835 rtl = gen_const_mem (Pmode, rtl);
12836 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
12838 SET_DECL_RTL (to, rtl);
12839 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
12841 return to;
12844 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
12845 true if we require the result be a register. */
12847 static rtx
12848 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
12850 tree imp_decl;
12851 rtx x;
12853 gcc_assert (SYMBOL_REF_DECL (symbol));
12854 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol));
12856 x = DECL_RTL (imp_decl);
12857 if (want_reg)
12858 x = force_reg (Pmode, x);
12859 return x;
12862 /* Try machine-dependent ways of modifying an illegitimate address
12863 to be legitimate. If we find one, return the new, valid address.
12864 This macro is used in only one place: `memory_address' in explow.c.
12866 OLDX is the address as it was before break_out_memory_refs was called.
12867 In some cases it is useful to look at this to decide what needs to be done.
12869 It is always safe for this macro to do nothing. It exists to recognize
12870 opportunities to optimize the output.
12872 For the 80386, we handle X+REG by loading X into a register R and
12873 using R+REG. R will go in a general reg and indexing will be used.
12874 However, if REG is a broken-out memory address or multiplication,
12875 nothing needs to be done because REG can certainly go in a general reg.
12877 When -fpic is used, special handling is needed for symbolic references.
12878 See comments by legitimize_pic_address in i386.c for details. */
12880 static rtx
12881 ix86_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
12882 enum machine_mode mode)
12884 int changed = 0;
12885 unsigned log;
12887 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
12888 if (log)
12889 return legitimize_tls_address (x, (enum tls_model) log, false);
12890 if (GET_CODE (x) == CONST
12891 && GET_CODE (XEXP (x, 0)) == PLUS
12892 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
12893 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
12895 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
12896 (enum tls_model) log, false);
12897 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
12900 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
12902 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (x))
12903 return legitimize_dllimport_symbol (x, true);
12904 if (GET_CODE (x) == CONST
12905 && GET_CODE (XEXP (x, 0)) == PLUS
12906 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
12907 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x, 0), 0)))
12909 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (x, 0), 0), true);
12910 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
12914 if (flag_pic && SYMBOLIC_CONST (x))
12915 return legitimize_pic_address (x, 0);
12917 #if TARGET_MACHO
12918 if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x))
12919 return machopic_indirect_data_reference (x, 0);
12920 #endif
12922 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
12923 if (GET_CODE (x) == ASHIFT
12924 && CONST_INT_P (XEXP (x, 1))
12925 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
12927 changed = 1;
12928 log = INTVAL (XEXP (x, 1));
12929 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
12930 GEN_INT (1 << log));
12933 if (GET_CODE (x) == PLUS)
12935 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
12937 if (GET_CODE (XEXP (x, 0)) == ASHIFT
12938 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
12939 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
12941 changed = 1;
12942 log = INTVAL (XEXP (XEXP (x, 0), 1));
12943 XEXP (x, 0) = gen_rtx_MULT (Pmode,
12944 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
12945 GEN_INT (1 << log));
12948 if (GET_CODE (XEXP (x, 1)) == ASHIFT
12949 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
12950 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
12952 changed = 1;
12953 log = INTVAL (XEXP (XEXP (x, 1), 1));
12954 XEXP (x, 1) = gen_rtx_MULT (Pmode,
12955 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
12956 GEN_INT (1 << log));
12959 /* Put multiply first if it isn't already. */
12960 if (GET_CODE (XEXP (x, 1)) == MULT)
12962 rtx tmp = XEXP (x, 0);
12963 XEXP (x, 0) = XEXP (x, 1);
12964 XEXP (x, 1) = tmp;
12965 changed = 1;
12968 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
12969 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
12970 created by virtual register instantiation, register elimination, and
12971 similar optimizations. */
12972 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
12974 changed = 1;
12975 x = gen_rtx_PLUS (Pmode,
12976 gen_rtx_PLUS (Pmode, XEXP (x, 0),
12977 XEXP (XEXP (x, 1), 0)),
12978 XEXP (XEXP (x, 1), 1));
12981 /* Canonicalize
12982 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
12983 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
12984 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
12985 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
12986 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
12987 && CONSTANT_P (XEXP (x, 1)))
12989 rtx constant;
12990 rtx other = NULL_RTX;
12992 if (CONST_INT_P (XEXP (x, 1)))
12994 constant = XEXP (x, 1);
12995 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
12997 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
12999 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
13000 other = XEXP (x, 1);
13002 else
13003 constant = 0;
13005 if (constant)
13007 changed = 1;
13008 x = gen_rtx_PLUS (Pmode,
13009 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
13010 XEXP (XEXP (XEXP (x, 0), 1), 0)),
13011 plus_constant (other, INTVAL (constant)));
13015 if (changed && ix86_legitimate_address_p (mode, x, false))
13016 return x;
13018 if (GET_CODE (XEXP (x, 0)) == MULT)
13020 changed = 1;
13021 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
13024 if (GET_CODE (XEXP (x, 1)) == MULT)
13026 changed = 1;
13027 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
13030 if (changed
13031 && REG_P (XEXP (x, 1))
13032 && REG_P (XEXP (x, 0)))
13033 return x;
13035 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
13037 changed = 1;
13038 x = legitimize_pic_address (x, 0);
13041 if (changed && ix86_legitimate_address_p (mode, x, false))
13042 return x;
13044 if (REG_P (XEXP (x, 0)))
13046 rtx temp = gen_reg_rtx (Pmode);
13047 rtx val = force_operand (XEXP (x, 1), temp);
13048 if (val != temp)
13049 emit_move_insn (temp, val);
13051 XEXP (x, 1) = temp;
13052 return x;
13055 else if (REG_P (XEXP (x, 1)))
13057 rtx temp = gen_reg_rtx (Pmode);
13058 rtx val = force_operand (XEXP (x, 0), temp);
13059 if (val != temp)
13060 emit_move_insn (temp, val);
13062 XEXP (x, 0) = temp;
13063 return x;
13067 return x;
13070 /* Print an integer constant expression in assembler syntax. Addition
13071 and subtraction are the only arithmetic that may appear in these
13072 expressions. FILE is the stdio stream to write to, X is the rtx, and
13073 CODE is the operand print code from the output string. */
13075 static void
13076 output_pic_addr_const (FILE *file, rtx x, int code)
13078 char buf[256];
13080 switch (GET_CODE (x))
13082 case PC:
13083 gcc_assert (flag_pic);
13084 putc ('.', file);
13085 break;
13087 case SYMBOL_REF:
13088 if (TARGET_64BIT || ! TARGET_MACHO_BRANCH_ISLANDS)
13089 output_addr_const (file, x);
13090 else
13092 const char *name = XSTR (x, 0);
13094 /* Mark the decl as referenced so that cgraph will
13095 output the function. */
13096 if (SYMBOL_REF_DECL (x))
13097 mark_decl_referenced (SYMBOL_REF_DECL (x));
13099 #if TARGET_MACHO
13100 if (MACHOPIC_INDIRECT
13101 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
13102 name = machopic_indirection_name (x, /*stub_p=*/true);
13103 #endif
13104 assemble_name (file, name);
13106 if (!TARGET_MACHO && !(TARGET_64BIT && DEFAULT_ABI == MS_ABI)
13107 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
13108 fputs ("@PLT", file);
13109 break;
13111 case LABEL_REF:
13112 x = XEXP (x, 0);
13113 /* FALLTHRU */
13114 case CODE_LABEL:
13115 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
13116 assemble_name (asm_out_file, buf);
13117 break;
13119 case CONST_INT:
13120 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
13121 break;
13123 case CONST:
13124 /* This used to output parentheses around the expression,
13125 but that does not work on the 386 (either ATT or BSD assembler). */
13126 output_pic_addr_const (file, XEXP (x, 0), code);
13127 break;
13129 case CONST_DOUBLE:
13130 if (GET_MODE (x) == VOIDmode)
13132 /* We can use %d if the number is <32 bits and positive. */
13133 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
13134 fprintf (file, "0x%lx%08lx",
13135 (unsigned long) CONST_DOUBLE_HIGH (x),
13136 (unsigned long) CONST_DOUBLE_LOW (x));
13137 else
13138 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
13140 else
13141 /* We can't handle floating point constants;
13142 TARGET_PRINT_OPERAND must handle them. */
13143 output_operand_lossage ("floating constant misused");
13144 break;
13146 case PLUS:
13147 /* Some assemblers need integer constants to appear first. */
13148 if (CONST_INT_P (XEXP (x, 0)))
13150 output_pic_addr_const (file, XEXP (x, 0), code);
13151 putc ('+', file);
13152 output_pic_addr_const (file, XEXP (x, 1), code);
13154 else
13156 gcc_assert (CONST_INT_P (XEXP (x, 1)));
13157 output_pic_addr_const (file, XEXP (x, 1), code);
13158 putc ('+', file);
13159 output_pic_addr_const (file, XEXP (x, 0), code);
13161 break;
13163 case MINUS:
13164 if (!TARGET_MACHO)
13165 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
13166 output_pic_addr_const (file, XEXP (x, 0), code);
13167 putc ('-', file);
13168 output_pic_addr_const (file, XEXP (x, 1), code);
13169 if (!TARGET_MACHO)
13170 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
13171 break;
13173 case UNSPEC:
13174 if (XINT (x, 1) == UNSPEC_STACK_CHECK)
13176 bool f = i386_asm_output_addr_const_extra (file, x);
13177 gcc_assert (f);
13178 break;
13181 gcc_assert (XVECLEN (x, 0) == 1);
13182 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
13183 switch (XINT (x, 1))
13185 case UNSPEC_GOT:
13186 fputs ("@GOT", file);
13187 break;
13188 case UNSPEC_GOTOFF:
13189 fputs ("@GOTOFF", file);
13190 break;
13191 case UNSPEC_PLTOFF:
13192 fputs ("@PLTOFF", file);
13193 break;
13194 case UNSPEC_PCREL:
13195 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
13196 "(%rip)" : "[rip]", file);
13197 break;
13198 case UNSPEC_GOTPCREL:
13199 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
13200 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
13201 break;
13202 case UNSPEC_GOTTPOFF:
13203 /* FIXME: This might be @TPOFF in Sun ld too. */
13204 fputs ("@gottpoff", file);
13205 break;
13206 case UNSPEC_TPOFF:
13207 fputs ("@tpoff", file);
13208 break;
13209 case UNSPEC_NTPOFF:
13210 if (TARGET_64BIT)
13211 fputs ("@tpoff", file);
13212 else
13213 fputs ("@ntpoff", file);
13214 break;
13215 case UNSPEC_DTPOFF:
13216 fputs ("@dtpoff", file);
13217 break;
13218 case UNSPEC_GOTNTPOFF:
13219 if (TARGET_64BIT)
13220 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
13221 "@gottpoff(%rip)": "@gottpoff[rip]", file);
13222 else
13223 fputs ("@gotntpoff", file);
13224 break;
13225 case UNSPEC_INDNTPOFF:
13226 fputs ("@indntpoff", file);
13227 break;
13228 #if TARGET_MACHO
13229 case UNSPEC_MACHOPIC_OFFSET:
13230 putc ('-', file);
13231 machopic_output_function_base_name (file);
13232 break;
13233 #endif
13234 default:
13235 output_operand_lossage ("invalid UNSPEC as operand");
13236 break;
13238 break;
13240 default:
13241 output_operand_lossage ("invalid expression as operand");
13245 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
13246 We need to emit DTP-relative relocations. */
13248 static void ATTRIBUTE_UNUSED
13249 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
13251 fputs (ASM_LONG, file);
13252 output_addr_const (file, x);
13253 fputs ("@dtpoff", file);
13254 switch (size)
13256 case 4:
13257 break;
13258 case 8:
13259 fputs (", 0", file);
13260 break;
13261 default:
13262 gcc_unreachable ();
13266 /* Return true if X is a representation of the PIC register. This copes
13267 with calls from ix86_find_base_term, where the register might have
13268 been replaced by a cselib value. */
13270 static bool
13271 ix86_pic_register_p (rtx x)
13273 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
13274 return (pic_offset_table_rtx
13275 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
13276 else
13277 return REG_P (x) && REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
13280 /* Helper function for ix86_delegitimize_address.
13281 Attempt to delegitimize TLS local-exec accesses. */
13283 static rtx
13284 ix86_delegitimize_tls_address (rtx orig_x)
13286 rtx x = orig_x, unspec;
13287 struct ix86_address addr;
13289 if (!TARGET_TLS_DIRECT_SEG_REFS)
13290 return orig_x;
13291 if (MEM_P (x))
13292 x = XEXP (x, 0);
13293 if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
13294 return orig_x;
13295 if (ix86_decompose_address (x, &addr) == 0
13296 || addr.seg != (TARGET_64BIT ? SEG_FS : SEG_GS)
13297 || addr.disp == NULL_RTX
13298 || GET_CODE (addr.disp) != CONST)
13299 return orig_x;
13300 unspec = XEXP (addr.disp, 0);
13301 if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
13302 unspec = XEXP (unspec, 0);
13303 if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
13304 return orig_x;
13305 x = XVECEXP (unspec, 0, 0);
13306 gcc_assert (GET_CODE (x) == SYMBOL_REF);
13307 if (unspec != XEXP (addr.disp, 0))
13308 x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
13309 if (addr.index)
13311 rtx idx = addr.index;
13312 if (addr.scale != 1)
13313 idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
13314 x = gen_rtx_PLUS (Pmode, idx, x);
13316 if (addr.base)
13317 x = gen_rtx_PLUS (Pmode, addr.base, x);
13318 if (MEM_P (orig_x))
13319 x = replace_equiv_address_nv (orig_x, x);
13320 return x;
13323 /* In the name of slightly smaller debug output, and to cater to
13324 general assembler lossage, recognize PIC+GOTOFF and turn it back
13325 into a direct symbol reference.
13327 On Darwin, this is necessary to avoid a crash, because Darwin
13328 has a different PIC label for each routine but the DWARF debugging
13329 information is not associated with any particular routine, so it's
13330 necessary to remove references to the PIC label from RTL stored by
13331 the DWARF output code. */
13333 static rtx
13334 ix86_delegitimize_address (rtx x)
13336 rtx orig_x = delegitimize_mem_from_attrs (x);
13337 /* addend is NULL or some rtx if x is something+GOTOFF where
13338 something doesn't include the PIC register. */
13339 rtx addend = NULL_RTX;
13340 /* reg_addend is NULL or a multiple of some register. */
13341 rtx reg_addend = NULL_RTX;
13342 /* const_addend is NULL or a const_int. */
13343 rtx const_addend = NULL_RTX;
13344 /* This is the result, or NULL. */
13345 rtx result = NULL_RTX;
13347 x = orig_x;
13349 if (MEM_P (x))
13350 x = XEXP (x, 0);
13352 if (TARGET_64BIT)
13354 if (GET_CODE (x) != CONST
13355 || GET_CODE (XEXP (x, 0)) != UNSPEC
13356 || (XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
13357 && XINT (XEXP (x, 0), 1) != UNSPEC_PCREL)
13358 || !MEM_P (orig_x))
13359 return ix86_delegitimize_tls_address (orig_x);
13360 x = XVECEXP (XEXP (x, 0), 0, 0);
13361 if (GET_MODE (orig_x) != Pmode)
13363 x = simplify_gen_subreg (GET_MODE (orig_x), x, Pmode, 0);
13364 if (x == NULL_RTX)
13365 return orig_x;
13367 return x;
13370 if (GET_CODE (x) != PLUS
13371 || GET_CODE (XEXP (x, 1)) != CONST)
13372 return ix86_delegitimize_tls_address (orig_x);
13374 if (ix86_pic_register_p (XEXP (x, 0)))
13375 /* %ebx + GOT/GOTOFF */
13377 else if (GET_CODE (XEXP (x, 0)) == PLUS)
13379 /* %ebx + %reg * scale + GOT/GOTOFF */
13380 reg_addend = XEXP (x, 0);
13381 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
13382 reg_addend = XEXP (reg_addend, 1);
13383 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
13384 reg_addend = XEXP (reg_addend, 0);
13385 else
13387 reg_addend = NULL_RTX;
13388 addend = XEXP (x, 0);
13391 else
13392 addend = XEXP (x, 0);
13394 x = XEXP (XEXP (x, 1), 0);
13395 if (GET_CODE (x) == PLUS
13396 && CONST_INT_P (XEXP (x, 1)))
13398 const_addend = XEXP (x, 1);
13399 x = XEXP (x, 0);
13402 if (GET_CODE (x) == UNSPEC
13403 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
13404 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
13405 result = XVECEXP (x, 0, 0);
13407 if (TARGET_MACHO && darwin_local_data_pic (x)
13408 && !MEM_P (orig_x))
13409 result = XVECEXP (x, 0, 0);
13411 if (! result)
13412 return ix86_delegitimize_tls_address (orig_x);
13414 if (const_addend)
13415 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
13416 if (reg_addend)
13417 result = gen_rtx_PLUS (Pmode, reg_addend, result);
13418 if (addend)
13420 /* If the rest of original X doesn't involve the PIC register, add
13421 addend and subtract pic_offset_table_rtx. This can happen e.g.
13422 for code like:
13423 leal (%ebx, %ecx, 4), %ecx
13425 movl foo@GOTOFF(%ecx), %edx
13426 in which case we return (%ecx - %ebx) + foo. */
13427 if (pic_offset_table_rtx)
13428 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
13429 pic_offset_table_rtx),
13430 result);
13431 else
13432 return orig_x;
13434 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
13436 result = simplify_gen_subreg (GET_MODE (orig_x), result, Pmode, 0);
13437 if (result == NULL_RTX)
13438 return orig_x;
13440 return result;
13443 /* If X is a machine specific address (i.e. a symbol or label being
13444 referenced as a displacement from the GOT implemented using an
13445 UNSPEC), then return the base term. Otherwise return X. */
13448 ix86_find_base_term (rtx x)
13450 rtx term;
13452 if (TARGET_64BIT)
13454 if (GET_CODE (x) != CONST)
13455 return x;
13456 term = XEXP (x, 0);
13457 if (GET_CODE (term) == PLUS
13458 && (CONST_INT_P (XEXP (term, 1))
13459 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
13460 term = XEXP (term, 0);
13461 if (GET_CODE (term) != UNSPEC
13462 || (XINT (term, 1) != UNSPEC_GOTPCREL
13463 && XINT (term, 1) != UNSPEC_PCREL))
13464 return x;
13466 return XVECEXP (term, 0, 0);
13469 return ix86_delegitimize_address (x);
13472 static void
13473 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
13474 int fp, FILE *file)
13476 const char *suffix;
13478 if (mode == CCFPmode || mode == CCFPUmode)
13480 code = ix86_fp_compare_code_to_integer (code);
13481 mode = CCmode;
13483 if (reverse)
13484 code = reverse_condition (code);
13486 switch (code)
13488 case EQ:
13489 switch (mode)
13491 case CCAmode:
13492 suffix = "a";
13493 break;
13495 case CCCmode:
13496 suffix = "c";
13497 break;
13499 case CCOmode:
13500 suffix = "o";
13501 break;
13503 case CCSmode:
13504 suffix = "s";
13505 break;
13507 default:
13508 suffix = "e";
13510 break;
13511 case NE:
13512 switch (mode)
13514 case CCAmode:
13515 suffix = "na";
13516 break;
13518 case CCCmode:
13519 suffix = "nc";
13520 break;
13522 case CCOmode:
13523 suffix = "no";
13524 break;
13526 case CCSmode:
13527 suffix = "ns";
13528 break;
13530 default:
13531 suffix = "ne";
13533 break;
13534 case GT:
13535 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
13536 suffix = "g";
13537 break;
13538 case GTU:
13539 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
13540 Those same assemblers have the same but opposite lossage on cmov. */
13541 if (mode == CCmode)
13542 suffix = fp ? "nbe" : "a";
13543 else if (mode == CCCmode)
13544 suffix = "b";
13545 else
13546 gcc_unreachable ();
13547 break;
13548 case LT:
13549 switch (mode)
13551 case CCNOmode:
13552 case CCGOCmode:
13553 suffix = "s";
13554 break;
13556 case CCmode:
13557 case CCGCmode:
13558 suffix = "l";
13559 break;
13561 default:
13562 gcc_unreachable ();
13564 break;
13565 case LTU:
13566 gcc_assert (mode == CCmode || mode == CCCmode);
13567 suffix = "b";
13568 break;
13569 case GE:
13570 switch (mode)
13572 case CCNOmode:
13573 case CCGOCmode:
13574 suffix = "ns";
13575 break;
13577 case CCmode:
13578 case CCGCmode:
13579 suffix = "ge";
13580 break;
13582 default:
13583 gcc_unreachable ();
13585 break;
13586 case GEU:
13587 /* ??? As above. */
13588 gcc_assert (mode == CCmode || mode == CCCmode);
13589 suffix = fp ? "nb" : "ae";
13590 break;
13591 case LE:
13592 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
13593 suffix = "le";
13594 break;
13595 case LEU:
13596 /* ??? As above. */
13597 if (mode == CCmode)
13598 suffix = "be";
13599 else if (mode == CCCmode)
13600 suffix = fp ? "nb" : "ae";
13601 else
13602 gcc_unreachable ();
13603 break;
13604 case UNORDERED:
13605 suffix = fp ? "u" : "p";
13606 break;
13607 case ORDERED:
13608 suffix = fp ? "nu" : "np";
13609 break;
13610 default:
13611 gcc_unreachable ();
13613 fputs (suffix, file);
13616 /* Print the name of register X to FILE based on its machine mode and number.
13617 If CODE is 'w', pretend the mode is HImode.
13618 If CODE is 'b', pretend the mode is QImode.
13619 If CODE is 'k', pretend the mode is SImode.
13620 If CODE is 'q', pretend the mode is DImode.
13621 If CODE is 'x', pretend the mode is V4SFmode.
13622 If CODE is 't', pretend the mode is V8SFmode.
13623 If CODE is 'h', pretend the reg is the 'high' byte register.
13624 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
13625 If CODE is 'd', duplicate the operand for AVX instruction.
13628 void
13629 print_reg (rtx x, int code, FILE *file)
13631 const char *reg;
13632 bool duplicated = code == 'd' && TARGET_AVX;
13634 gcc_assert (x == pc_rtx
13635 || (REGNO (x) != ARG_POINTER_REGNUM
13636 && REGNO (x) != FRAME_POINTER_REGNUM
13637 && REGNO (x) != FLAGS_REG
13638 && REGNO (x) != FPSR_REG
13639 && REGNO (x) != FPCR_REG));
13641 if (ASSEMBLER_DIALECT == ASM_ATT)
13642 putc ('%', file);
13644 if (x == pc_rtx)
13646 gcc_assert (TARGET_64BIT);
13647 fputs ("rip", file);
13648 return;
13651 if (code == 'w' || MMX_REG_P (x))
13652 code = 2;
13653 else if (code == 'b')
13654 code = 1;
13655 else if (code == 'k')
13656 code = 4;
13657 else if (code == 'q')
13658 code = 8;
13659 else if (code == 'y')
13660 code = 3;
13661 else if (code == 'h')
13662 code = 0;
13663 else if (code == 'x')
13664 code = 16;
13665 else if (code == 't')
13666 code = 32;
13667 else
13668 code = GET_MODE_SIZE (GET_MODE (x));
13670 /* Irritatingly, AMD extended registers use different naming convention
13671 from the normal registers. */
13672 if (REX_INT_REG_P (x))
13674 gcc_assert (TARGET_64BIT);
13675 switch (code)
13677 case 0:
13678 error ("extended registers have no high halves");
13679 break;
13680 case 1:
13681 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
13682 break;
13683 case 2:
13684 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
13685 break;
13686 case 4:
13687 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
13688 break;
13689 case 8:
13690 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
13691 break;
13692 default:
13693 error ("unsupported operand size for extended register");
13694 break;
13696 return;
13699 reg = NULL;
13700 switch (code)
13702 case 3:
13703 if (STACK_TOP_P (x))
13705 reg = "st(0)";
13706 break;
13708 /* FALLTHRU */
13709 case 8:
13710 case 4:
13711 case 12:
13712 if (! ANY_FP_REG_P (x))
13713 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
13714 /* FALLTHRU */
13715 case 16:
13716 case 2:
13717 normal:
13718 reg = hi_reg_name[REGNO (x)];
13719 break;
13720 case 1:
13721 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
13722 goto normal;
13723 reg = qi_reg_name[REGNO (x)];
13724 break;
13725 case 0:
13726 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
13727 goto normal;
13728 reg = qi_high_reg_name[REGNO (x)];
13729 break;
13730 case 32:
13731 if (SSE_REG_P (x))
13733 gcc_assert (!duplicated);
13734 putc ('y', file);
13735 fputs (hi_reg_name[REGNO (x)] + 1, file);
13736 return;
13738 break;
13739 default:
13740 gcc_unreachable ();
13743 fputs (reg, file);
13744 if (duplicated)
13746 if (ASSEMBLER_DIALECT == ASM_ATT)
13747 fprintf (file, ", %%%s", reg);
13748 else
13749 fprintf (file, ", %s", reg);
13753 /* Locate some local-dynamic symbol still in use by this function
13754 so that we can print its name in some tls_local_dynamic_base
13755 pattern. */
13757 static int
13758 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
13760 rtx x = *px;
13762 if (GET_CODE (x) == SYMBOL_REF
13763 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
13765 cfun->machine->some_ld_name = XSTR (x, 0);
13766 return 1;
13769 return 0;
13772 static const char *
13773 get_some_local_dynamic_name (void)
13775 rtx insn;
13777 if (cfun->machine->some_ld_name)
13778 return cfun->machine->some_ld_name;
13780 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
13781 if (NONDEBUG_INSN_P (insn)
13782 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
13783 return cfun->machine->some_ld_name;
13785 return NULL;
13788 /* Meaning of CODE:
13789 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
13790 C -- print opcode suffix for set/cmov insn.
13791 c -- like C, but print reversed condition
13792 F,f -- likewise, but for floating-point.
13793 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
13794 otherwise nothing
13795 R -- print the prefix for register names.
13796 z -- print the opcode suffix for the size of the current operand.
13797 Z -- likewise, with special suffixes for x87 instructions.
13798 * -- print a star (in certain assembler syntax)
13799 A -- print an absolute memory reference.
13800 w -- print the operand as if it's a "word" (HImode) even if it isn't.
13801 s -- print a shift double count, followed by the assemblers argument
13802 delimiter.
13803 b -- print the QImode name of the register for the indicated operand.
13804 %b0 would print %al if operands[0] is reg 0.
13805 w -- likewise, print the HImode name of the register.
13806 k -- likewise, print the SImode name of the register.
13807 q -- likewise, print the DImode name of the register.
13808 x -- likewise, print the V4SFmode name of the register.
13809 t -- likewise, print the V8SFmode name of the register.
13810 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
13811 y -- print "st(0)" instead of "st" as a register.
13812 d -- print duplicated register operand for AVX instruction.
13813 D -- print condition for SSE cmp instruction.
13814 P -- if PIC, print an @PLT suffix.
13815 X -- don't print any sort of PIC '@' suffix for a symbol.
13816 & -- print some in-use local-dynamic symbol name.
13817 H -- print a memory address offset by 8; used for sse high-parts
13818 Y -- print condition for XOP pcom* instruction.
13819 + -- print a branch hint as 'cs' or 'ds' prefix
13820 ; -- print a semicolon (after prefixes due to bug in older gas).
13821 @ -- print a segment register of thread base pointer load
13824 void
13825 ix86_print_operand (FILE *file, rtx x, int code)
13827 if (code)
13829 switch (code)
13831 case '*':
13832 if (ASSEMBLER_DIALECT == ASM_ATT)
13833 putc ('*', file);
13834 return;
13836 case '&':
13838 const char *name = get_some_local_dynamic_name ();
13839 if (name == NULL)
13840 output_operand_lossage ("'%%&' used without any "
13841 "local dynamic TLS references");
13842 else
13843 assemble_name (file, name);
13844 return;
13847 case 'A':
13848 switch (ASSEMBLER_DIALECT)
13850 case ASM_ATT:
13851 putc ('*', file);
13852 break;
13854 case ASM_INTEL:
13855 /* Intel syntax. For absolute addresses, registers should not
13856 be surrounded by braces. */
13857 if (!REG_P (x))
13859 putc ('[', file);
13860 ix86_print_operand (file, x, 0);
13861 putc (']', file);
13862 return;
13864 break;
13866 default:
13867 gcc_unreachable ();
13870 ix86_print_operand (file, x, 0);
13871 return;
13874 case 'L':
13875 if (ASSEMBLER_DIALECT == ASM_ATT)
13876 putc ('l', file);
13877 return;
13879 case 'W':
13880 if (ASSEMBLER_DIALECT == ASM_ATT)
13881 putc ('w', file);
13882 return;
13884 case 'B':
13885 if (ASSEMBLER_DIALECT == ASM_ATT)
13886 putc ('b', file);
13887 return;
13889 case 'Q':
13890 if (ASSEMBLER_DIALECT == ASM_ATT)
13891 putc ('l', file);
13892 return;
13894 case 'S':
13895 if (ASSEMBLER_DIALECT == ASM_ATT)
13896 putc ('s', file);
13897 return;
13899 case 'T':
13900 if (ASSEMBLER_DIALECT == ASM_ATT)
13901 putc ('t', file);
13902 return;
13904 case 'z':
13905 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
13907 /* Opcodes don't get size suffixes if using Intel opcodes. */
13908 if (ASSEMBLER_DIALECT == ASM_INTEL)
13909 return;
13911 switch (GET_MODE_SIZE (GET_MODE (x)))
13913 case 1:
13914 putc ('b', file);
13915 return;
13917 case 2:
13918 putc ('w', file);
13919 return;
13921 case 4:
13922 putc ('l', file);
13923 return;
13925 case 8:
13926 putc ('q', file);
13927 return;
13929 default:
13930 output_operand_lossage
13931 ("invalid operand size for operand code '%c'", code);
13932 return;
13936 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
13937 warning
13938 (0, "non-integer operand used with operand code '%c'", code);
13939 /* FALLTHRU */
13941 case 'Z':
13942 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
13943 if (ASSEMBLER_DIALECT == ASM_INTEL)
13944 return;
13946 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
13948 switch (GET_MODE_SIZE (GET_MODE (x)))
13950 case 2:
13951 #ifdef HAVE_AS_IX86_FILDS
13952 putc ('s', file);
13953 #endif
13954 return;
13956 case 4:
13957 putc ('l', file);
13958 return;
13960 case 8:
13961 #ifdef HAVE_AS_IX86_FILDQ
13962 putc ('q', file);
13963 #else
13964 fputs ("ll", file);
13965 #endif
13966 return;
13968 default:
13969 break;
13972 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
13974 /* 387 opcodes don't get size suffixes
13975 if the operands are registers. */
13976 if (STACK_REG_P (x))
13977 return;
13979 switch (GET_MODE_SIZE (GET_MODE (x)))
13981 case 4:
13982 putc ('s', file);
13983 return;
13985 case 8:
13986 putc ('l', file);
13987 return;
13989 case 12:
13990 case 16:
13991 putc ('t', file);
13992 return;
13994 default:
13995 break;
13998 else
14000 output_operand_lossage
14001 ("invalid operand type used with operand code '%c'", code);
14002 return;
14005 output_operand_lossage
14006 ("invalid operand size for operand code '%c'", code);
14007 return;
14009 case 'd':
14010 case 'b':
14011 case 'w':
14012 case 'k':
14013 case 'q':
14014 case 'h':
14015 case 't':
14016 case 'y':
14017 case 'x':
14018 case 'X':
14019 case 'P':
14020 break;
14022 case 's':
14023 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
14025 ix86_print_operand (file, x, 0);
14026 fputs (", ", file);
14028 return;
14030 case 'D':
14031 /* Little bit of braindamage here. The SSE compare instructions
14032 does use completely different names for the comparisons that the
14033 fp conditional moves. */
14034 if (TARGET_AVX)
14036 switch (GET_CODE (x))
14038 case EQ:
14039 fputs ("eq", file);
14040 break;
14041 case UNEQ:
14042 fputs ("eq_us", file);
14043 break;
14044 case LT:
14045 fputs ("lt", file);
14046 break;
14047 case UNLT:
14048 fputs ("nge", file);
14049 break;
14050 case LE:
14051 fputs ("le", file);
14052 break;
14053 case UNLE:
14054 fputs ("ngt", file);
14055 break;
14056 case UNORDERED:
14057 fputs ("unord", file);
14058 break;
14059 case NE:
14060 fputs ("neq", file);
14061 break;
14062 case LTGT:
14063 fputs ("neq_oq", file);
14064 break;
14065 case GE:
14066 fputs ("ge", file);
14067 break;
14068 case UNGE:
14069 fputs ("nlt", file);
14070 break;
14071 case GT:
14072 fputs ("gt", file);
14073 break;
14074 case UNGT:
14075 fputs ("nle", file);
14076 break;
14077 case ORDERED:
14078 fputs ("ord", file);
14079 break;
14080 default:
14081 output_operand_lossage ("operand is not a condition code, "
14082 "invalid operand code 'D'");
14083 return;
14086 else
14088 switch (GET_CODE (x))
14090 case EQ:
14091 case UNEQ:
14092 fputs ("eq", file);
14093 break;
14094 case LT:
14095 case UNLT:
14096 fputs ("lt", file);
14097 break;
14098 case LE:
14099 case UNLE:
14100 fputs ("le", file);
14101 break;
14102 case UNORDERED:
14103 fputs ("unord", file);
14104 break;
14105 case NE:
14106 case LTGT:
14107 fputs ("neq", file);
14108 break;
14109 case UNGE:
14110 case GE:
14111 fputs ("nlt", file);
14112 break;
14113 case UNGT:
14114 case GT:
14115 fputs ("nle", file);
14116 break;
14117 case ORDERED:
14118 fputs ("ord", file);
14119 break;
14120 default:
14121 output_operand_lossage ("operand is not a condition code, "
14122 "invalid operand code 'D'");
14123 return;
14126 return;
14127 case 'O':
14128 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14129 if (ASSEMBLER_DIALECT == ASM_ATT)
14131 switch (GET_MODE (x))
14133 case HImode: putc ('w', file); break;
14134 case SImode:
14135 case SFmode: putc ('l', file); break;
14136 case DImode:
14137 case DFmode: putc ('q', file); break;
14138 default: gcc_unreachable ();
14140 putc ('.', file);
14142 #endif
14143 return;
14144 case 'C':
14145 if (!COMPARISON_P (x))
14147 output_operand_lossage ("operand is neither a constant nor a "
14148 "condition code, invalid operand code "
14149 "'C'");
14150 return;
14152 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
14153 return;
14154 case 'F':
14155 if (!COMPARISON_P (x))
14157 output_operand_lossage ("operand is neither a constant nor a "
14158 "condition code, invalid operand code "
14159 "'F'");
14160 return;
14162 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14163 if (ASSEMBLER_DIALECT == ASM_ATT)
14164 putc ('.', file);
14165 #endif
14166 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
14167 return;
14169 /* Like above, but reverse condition */
14170 case 'c':
14171 /* Check to see if argument to %c is really a constant
14172 and not a condition code which needs to be reversed. */
14173 if (!COMPARISON_P (x))
14175 output_operand_lossage ("operand is neither a constant nor a "
14176 "condition code, invalid operand "
14177 "code 'c'");
14178 return;
14180 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
14181 return;
14182 case 'f':
14183 if (!COMPARISON_P (x))
14185 output_operand_lossage ("operand is neither a constant nor a "
14186 "condition code, invalid operand "
14187 "code 'f'");
14188 return;
14190 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14191 if (ASSEMBLER_DIALECT == ASM_ATT)
14192 putc ('.', file);
14193 #endif
14194 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
14195 return;
14197 case 'H':
14198 /* It doesn't actually matter what mode we use here, as we're
14199 only going to use this for printing. */
14200 x = adjust_address_nv (x, DImode, 8);
14201 break;
14203 case '+':
14205 rtx x;
14207 if (!optimize
14208 || optimize_function_for_size_p (cfun) || !TARGET_BRANCH_PREDICTION_HINTS)
14209 return;
14211 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
14212 if (x)
14214 int pred_val = INTVAL (XEXP (x, 0));
14216 if (pred_val < REG_BR_PROB_BASE * 45 / 100
14217 || pred_val > REG_BR_PROB_BASE * 55 / 100)
14219 int taken = pred_val > REG_BR_PROB_BASE / 2;
14220 int cputaken = final_forward_branch_p (current_output_insn) == 0;
14222 /* Emit hints only in the case default branch prediction
14223 heuristics would fail. */
14224 if (taken != cputaken)
14226 /* We use 3e (DS) prefix for taken branches and
14227 2e (CS) prefix for not taken branches. */
14228 if (taken)
14229 fputs ("ds ; ", file);
14230 else
14231 fputs ("cs ; ", file);
14235 return;
14238 case 'Y':
14239 switch (GET_CODE (x))
14241 case NE:
14242 fputs ("neq", file);
14243 break;
14244 case EQ:
14245 fputs ("eq", file);
14246 break;
14247 case GE:
14248 case GEU:
14249 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
14250 break;
14251 case GT:
14252 case GTU:
14253 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
14254 break;
14255 case LE:
14256 case LEU:
14257 fputs ("le", file);
14258 break;
14259 case LT:
14260 case LTU:
14261 fputs ("lt", file);
14262 break;
14263 case UNORDERED:
14264 fputs ("unord", file);
14265 break;
14266 case ORDERED:
14267 fputs ("ord", file);
14268 break;
14269 case UNEQ:
14270 fputs ("ueq", file);
14271 break;
14272 case UNGE:
14273 fputs ("nlt", file);
14274 break;
14275 case UNGT:
14276 fputs ("nle", file);
14277 break;
14278 case UNLE:
14279 fputs ("ule", file);
14280 break;
14281 case UNLT:
14282 fputs ("ult", file);
14283 break;
14284 case LTGT:
14285 fputs ("une", file);
14286 break;
14287 default:
14288 output_operand_lossage ("operand is not a condition code, "
14289 "invalid operand code 'Y'");
14290 return;
14292 return;
14294 case ';':
14295 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
14296 putc (';', file);
14297 #endif
14298 return;
14300 case '@':
14301 if (ASSEMBLER_DIALECT == ASM_ATT)
14302 putc ('%', file);
14304 /* The kernel uses a different segment register for performance
14305 reasons; a system call would not have to trash the userspace
14306 segment register, which would be expensive. */
14307 if (TARGET_64BIT && ix86_cmodel != CM_KERNEL)
14308 fputs ("fs", file);
14309 else
14310 fputs ("gs", file);
14311 return;
14313 default:
14314 output_operand_lossage ("invalid operand code '%c'", code);
14318 if (REG_P (x))
14319 print_reg (x, code, file);
14321 else if (MEM_P (x))
14323 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
14324 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
14325 && GET_MODE (x) != BLKmode)
14327 const char * size;
14328 switch (GET_MODE_SIZE (GET_MODE (x)))
14330 case 1: size = "BYTE"; break;
14331 case 2: size = "WORD"; break;
14332 case 4: size = "DWORD"; break;
14333 case 8: size = "QWORD"; break;
14334 case 12: size = "TBYTE"; break;
14335 case 16:
14336 if (GET_MODE (x) == XFmode)
14337 size = "TBYTE";
14338 else
14339 size = "XMMWORD";
14340 break;
14341 case 32: size = "YMMWORD"; break;
14342 default:
14343 gcc_unreachable ();
14346 /* Check for explicit size override (codes 'b', 'w' and 'k') */
14347 if (code == 'b')
14348 size = "BYTE";
14349 else if (code == 'w')
14350 size = "WORD";
14351 else if (code == 'k')
14352 size = "DWORD";
14354 fputs (size, file);
14355 fputs (" PTR ", file);
14358 x = XEXP (x, 0);
14359 /* Avoid (%rip) for call operands. */
14360 if (CONSTANT_ADDRESS_P (x) && code == 'P'
14361 && !CONST_INT_P (x))
14362 output_addr_const (file, x);
14363 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
14364 output_operand_lossage ("invalid constraints for operand");
14365 else
14366 output_address (x);
14369 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
14371 REAL_VALUE_TYPE r;
14372 long l;
14374 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
14375 REAL_VALUE_TO_TARGET_SINGLE (r, l);
14377 if (ASSEMBLER_DIALECT == ASM_ATT)
14378 putc ('$', file);
14379 /* Sign extend 32bit SFmode immediate to 8 bytes. */
14380 if (code == 'q')
14381 fprintf (file, "0x%08llx", (unsigned long long) (int) l);
14382 else
14383 fprintf (file, "0x%08x", (unsigned int) l);
14386 /* These float cases don't actually occur as immediate operands. */
14387 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
14389 char dstr[30];
14391 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
14392 fputs (dstr, file);
14395 else if (GET_CODE (x) == CONST_DOUBLE
14396 && GET_MODE (x) == XFmode)
14398 char dstr[30];
14400 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
14401 fputs (dstr, file);
14404 else
14406 /* We have patterns that allow zero sets of memory, for instance.
14407 In 64-bit mode, we should probably support all 8-byte vectors,
14408 since we can in fact encode that into an immediate. */
14409 if (GET_CODE (x) == CONST_VECTOR)
14411 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
14412 x = const0_rtx;
14415 if (code != 'P')
14417 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
14419 if (ASSEMBLER_DIALECT == ASM_ATT)
14420 putc ('$', file);
14422 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
14423 || GET_CODE (x) == LABEL_REF)
14425 if (ASSEMBLER_DIALECT == ASM_ATT)
14426 putc ('$', file);
14427 else
14428 fputs ("OFFSET FLAT:", file);
14431 if (CONST_INT_P (x))
14432 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
14433 else if (flag_pic || MACHOPIC_INDIRECT)
14434 output_pic_addr_const (file, x, code);
14435 else
14436 output_addr_const (file, x);
14440 static bool
14441 ix86_print_operand_punct_valid_p (unsigned char code)
14443 return (code == '@' || code == '*' || code == '+'
14444 || code == '&' || code == ';');
14447 /* Print a memory operand whose address is ADDR. */
14449 static void
14450 ix86_print_operand_address (FILE *file, rtx addr)
14452 struct ix86_address parts;
14453 rtx base, index, disp;
14454 int scale;
14455 int ok = ix86_decompose_address (addr, &parts);
14457 gcc_assert (ok);
14459 base = parts.base;
14460 index = parts.index;
14461 disp = parts.disp;
14462 scale = parts.scale;
14464 switch (parts.seg)
14466 case SEG_DEFAULT:
14467 break;
14468 case SEG_FS:
14469 case SEG_GS:
14470 if (ASSEMBLER_DIALECT == ASM_ATT)
14471 putc ('%', file);
14472 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
14473 break;
14474 default:
14475 gcc_unreachable ();
14478 /* Use one byte shorter RIP relative addressing for 64bit mode. */
14479 if (TARGET_64BIT && !base && !index)
14481 rtx symbol = disp;
14483 if (GET_CODE (disp) == CONST
14484 && GET_CODE (XEXP (disp, 0)) == PLUS
14485 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
14486 symbol = XEXP (XEXP (disp, 0), 0);
14488 if (GET_CODE (symbol) == LABEL_REF
14489 || (GET_CODE (symbol) == SYMBOL_REF
14490 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
14491 base = pc_rtx;
14493 if (!base && !index)
14495 /* Displacement only requires special attention. */
14497 if (CONST_INT_P (disp))
14499 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
14500 fputs ("ds:", file);
14501 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
14503 else if (flag_pic)
14504 output_pic_addr_const (file, disp, 0);
14505 else
14506 output_addr_const (file, disp);
14508 else
14510 if (ASSEMBLER_DIALECT == ASM_ATT)
14512 if (disp)
14514 if (flag_pic)
14515 output_pic_addr_const (file, disp, 0);
14516 else if (GET_CODE (disp) == LABEL_REF)
14517 output_asm_label (disp);
14518 else
14519 output_addr_const (file, disp);
14522 putc ('(', file);
14523 if (base)
14524 print_reg (base, 0, file);
14525 if (index)
14527 putc (',', file);
14528 print_reg (index, 0, file);
14529 if (scale != 1)
14530 fprintf (file, ",%d", scale);
14532 putc (')', file);
14534 else
14536 rtx offset = NULL_RTX;
14538 if (disp)
14540 /* Pull out the offset of a symbol; print any symbol itself. */
14541 if (GET_CODE (disp) == CONST
14542 && GET_CODE (XEXP (disp, 0)) == PLUS
14543 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
14545 offset = XEXP (XEXP (disp, 0), 1);
14546 disp = gen_rtx_CONST (VOIDmode,
14547 XEXP (XEXP (disp, 0), 0));
14550 if (flag_pic)
14551 output_pic_addr_const (file, disp, 0);
14552 else if (GET_CODE (disp) == LABEL_REF)
14553 output_asm_label (disp);
14554 else if (CONST_INT_P (disp))
14555 offset = disp;
14556 else
14557 output_addr_const (file, disp);
14560 putc ('[', file);
14561 if (base)
14563 print_reg (base, 0, file);
14564 if (offset)
14566 if (INTVAL (offset) >= 0)
14567 putc ('+', file);
14568 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
14571 else if (offset)
14572 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
14573 else
14574 putc ('0', file);
14576 if (index)
14578 putc ('+', file);
14579 print_reg (index, 0, file);
14580 if (scale != 1)
14581 fprintf (file, "*%d", scale);
14583 putc (']', file);
14588 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
14590 static bool
14591 i386_asm_output_addr_const_extra (FILE *file, rtx x)
14593 rtx op;
14595 if (GET_CODE (x) != UNSPEC)
14596 return false;
14598 op = XVECEXP (x, 0, 0);
14599 switch (XINT (x, 1))
14601 case UNSPEC_GOTTPOFF:
14602 output_addr_const (file, op);
14603 /* FIXME: This might be @TPOFF in Sun ld. */
14604 fputs ("@gottpoff", file);
14605 break;
14606 case UNSPEC_TPOFF:
14607 output_addr_const (file, op);
14608 fputs ("@tpoff", file);
14609 break;
14610 case UNSPEC_NTPOFF:
14611 output_addr_const (file, op);
14612 if (TARGET_64BIT)
14613 fputs ("@tpoff", file);
14614 else
14615 fputs ("@ntpoff", file);
14616 break;
14617 case UNSPEC_DTPOFF:
14618 output_addr_const (file, op);
14619 fputs ("@dtpoff", file);
14620 break;
14621 case UNSPEC_GOTNTPOFF:
14622 output_addr_const (file, op);
14623 if (TARGET_64BIT)
14624 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14625 "@gottpoff(%rip)" : "@gottpoff[rip]", file);
14626 else
14627 fputs ("@gotntpoff", file);
14628 break;
14629 case UNSPEC_INDNTPOFF:
14630 output_addr_const (file, op);
14631 fputs ("@indntpoff", file);
14632 break;
14633 #if TARGET_MACHO
14634 case UNSPEC_MACHOPIC_OFFSET:
14635 output_addr_const (file, op);
14636 putc ('-', file);
14637 machopic_output_function_base_name (file);
14638 break;
14639 #endif
14641 case UNSPEC_STACK_CHECK:
14643 int offset;
14645 gcc_assert (flag_split_stack);
14647 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
14648 offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
14649 #else
14650 gcc_unreachable ();
14651 #endif
14653 fprintf (file, "%s:%d", TARGET_64BIT ? "%fs" : "%gs", offset);
14655 break;
14657 default:
14658 return false;
14661 return true;
14664 /* Split one or more double-mode RTL references into pairs of half-mode
14665 references. The RTL can be REG, offsettable MEM, integer constant, or
14666 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
14667 split and "num" is its length. lo_half and hi_half are output arrays
14668 that parallel "operands". */
14670 void
14671 split_double_mode (enum machine_mode mode, rtx operands[],
14672 int num, rtx lo_half[], rtx hi_half[])
14674 enum machine_mode half_mode;
14675 unsigned int byte;
14677 switch (mode)
14679 case TImode:
14680 half_mode = DImode;
14681 break;
14682 case DImode:
14683 half_mode = SImode;
14684 break;
14685 default:
14686 gcc_unreachable ();
14689 byte = GET_MODE_SIZE (half_mode);
14691 while (num--)
14693 rtx op = operands[num];
14695 /* simplify_subreg refuse to split volatile memory addresses,
14696 but we still have to handle it. */
14697 if (MEM_P (op))
14699 lo_half[num] = adjust_address (op, half_mode, 0);
14700 hi_half[num] = adjust_address (op, half_mode, byte);
14702 else
14704 lo_half[num] = simplify_gen_subreg (half_mode, op,
14705 GET_MODE (op) == VOIDmode
14706 ? mode : GET_MODE (op), 0);
14707 hi_half[num] = simplify_gen_subreg (half_mode, op,
14708 GET_MODE (op) == VOIDmode
14709 ? mode : GET_MODE (op), byte);
14714 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
14715 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
14716 is the expression of the binary operation. The output may either be
14717 emitted here, or returned to the caller, like all output_* functions.
14719 There is no guarantee that the operands are the same mode, as they
14720 might be within FLOAT or FLOAT_EXTEND expressions. */
14722 #ifndef SYSV386_COMPAT
14723 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
14724 wants to fix the assemblers because that causes incompatibility
14725 with gcc. No-one wants to fix gcc because that causes
14726 incompatibility with assemblers... You can use the option of
14727 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
14728 #define SYSV386_COMPAT 1
14729 #endif
14731 const char *
14732 output_387_binary_op (rtx insn, rtx *operands)
14734 static char buf[40];
14735 const char *p;
14736 const char *ssep;
14737 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
14739 #ifdef ENABLE_CHECKING
14740 /* Even if we do not want to check the inputs, this documents input
14741 constraints. Which helps in understanding the following code. */
14742 if (STACK_REG_P (operands[0])
14743 && ((REG_P (operands[1])
14744 && REGNO (operands[0]) == REGNO (operands[1])
14745 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
14746 || (REG_P (operands[2])
14747 && REGNO (operands[0]) == REGNO (operands[2])
14748 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
14749 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
14750 ; /* ok */
14751 else
14752 gcc_assert (is_sse);
14753 #endif
14755 switch (GET_CODE (operands[3]))
14757 case PLUS:
14758 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14759 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14760 p = "fiadd";
14761 else
14762 p = "fadd";
14763 ssep = "vadd";
14764 break;
14766 case MINUS:
14767 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14768 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14769 p = "fisub";
14770 else
14771 p = "fsub";
14772 ssep = "vsub";
14773 break;
14775 case MULT:
14776 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14777 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14778 p = "fimul";
14779 else
14780 p = "fmul";
14781 ssep = "vmul";
14782 break;
14784 case DIV:
14785 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14786 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14787 p = "fidiv";
14788 else
14789 p = "fdiv";
14790 ssep = "vdiv";
14791 break;
14793 default:
14794 gcc_unreachable ();
14797 if (is_sse)
14799 if (TARGET_AVX)
14801 strcpy (buf, ssep);
14802 if (GET_MODE (operands[0]) == SFmode)
14803 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
14804 else
14805 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
14807 else
14809 strcpy (buf, ssep + 1);
14810 if (GET_MODE (operands[0]) == SFmode)
14811 strcat (buf, "ss\t{%2, %0|%0, %2}");
14812 else
14813 strcat (buf, "sd\t{%2, %0|%0, %2}");
14815 return buf;
14817 strcpy (buf, p);
14819 switch (GET_CODE (operands[3]))
14821 case MULT:
14822 case PLUS:
14823 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
14825 rtx temp = operands[2];
14826 operands[2] = operands[1];
14827 operands[1] = temp;
14830 /* know operands[0] == operands[1]. */
14832 if (MEM_P (operands[2]))
14834 p = "%Z2\t%2";
14835 break;
14838 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
14840 if (STACK_TOP_P (operands[0]))
14841 /* How is it that we are storing to a dead operand[2]?
14842 Well, presumably operands[1] is dead too. We can't
14843 store the result to st(0) as st(0) gets popped on this
14844 instruction. Instead store to operands[2] (which I
14845 think has to be st(1)). st(1) will be popped later.
14846 gcc <= 2.8.1 didn't have this check and generated
14847 assembly code that the Unixware assembler rejected. */
14848 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
14849 else
14850 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
14851 break;
14854 if (STACK_TOP_P (operands[0]))
14855 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
14856 else
14857 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
14858 break;
14860 case MINUS:
14861 case DIV:
14862 if (MEM_P (operands[1]))
14864 p = "r%Z1\t%1";
14865 break;
14868 if (MEM_P (operands[2]))
14870 p = "%Z2\t%2";
14871 break;
14874 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
14876 #if SYSV386_COMPAT
14877 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
14878 derived assemblers, confusingly reverse the direction of
14879 the operation for fsub{r} and fdiv{r} when the
14880 destination register is not st(0). The Intel assembler
14881 doesn't have this brain damage. Read !SYSV386_COMPAT to
14882 figure out what the hardware really does. */
14883 if (STACK_TOP_P (operands[0]))
14884 p = "{p\t%0, %2|rp\t%2, %0}";
14885 else
14886 p = "{rp\t%2, %0|p\t%0, %2}";
14887 #else
14888 if (STACK_TOP_P (operands[0]))
14889 /* As above for fmul/fadd, we can't store to st(0). */
14890 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
14891 else
14892 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
14893 #endif
14894 break;
14897 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
14899 #if SYSV386_COMPAT
14900 if (STACK_TOP_P (operands[0]))
14901 p = "{rp\t%0, %1|p\t%1, %0}";
14902 else
14903 p = "{p\t%1, %0|rp\t%0, %1}";
14904 #else
14905 if (STACK_TOP_P (operands[0]))
14906 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
14907 else
14908 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
14909 #endif
14910 break;
14913 if (STACK_TOP_P (operands[0]))
14915 if (STACK_TOP_P (operands[1]))
14916 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
14917 else
14918 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
14919 break;
14921 else if (STACK_TOP_P (operands[1]))
14923 #if SYSV386_COMPAT
14924 p = "{\t%1, %0|r\t%0, %1}";
14925 #else
14926 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
14927 #endif
14929 else
14931 #if SYSV386_COMPAT
14932 p = "{r\t%2, %0|\t%0, %2}";
14933 #else
14934 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
14935 #endif
14937 break;
14939 default:
14940 gcc_unreachable ();
14943 strcat (buf, p);
14944 return buf;
14947 /* Return needed mode for entity in optimize_mode_switching pass. */
14950 ix86_mode_needed (int entity, rtx insn)
14952 enum attr_i387_cw mode;
14954 /* The mode UNINITIALIZED is used to store control word after a
14955 function call or ASM pattern. The mode ANY specify that function
14956 has no requirements on the control word and make no changes in the
14957 bits we are interested in. */
14959 if (CALL_P (insn)
14960 || (NONJUMP_INSN_P (insn)
14961 && (asm_noperands (PATTERN (insn)) >= 0
14962 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
14963 return I387_CW_UNINITIALIZED;
14965 if (recog_memoized (insn) < 0)
14966 return I387_CW_ANY;
14968 mode = get_attr_i387_cw (insn);
14970 switch (entity)
14972 case I387_TRUNC:
14973 if (mode == I387_CW_TRUNC)
14974 return mode;
14975 break;
14977 case I387_FLOOR:
14978 if (mode == I387_CW_FLOOR)
14979 return mode;
14980 break;
14982 case I387_CEIL:
14983 if (mode == I387_CW_CEIL)
14984 return mode;
14985 break;
14987 case I387_MASK_PM:
14988 if (mode == I387_CW_MASK_PM)
14989 return mode;
14990 break;
14992 default:
14993 gcc_unreachable ();
14996 return I387_CW_ANY;
14999 /* Output code to initialize control word copies used by trunc?f?i and
15000 rounding patterns. CURRENT_MODE is set to current control word,
15001 while NEW_MODE is set to new control word. */
15003 void
15004 emit_i387_cw_initialization (int mode)
15006 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
15007 rtx new_mode;
15009 enum ix86_stack_slot slot;
15011 rtx reg = gen_reg_rtx (HImode);
15013 emit_insn (gen_x86_fnstcw_1 (stored_mode));
15014 emit_move_insn (reg, copy_rtx (stored_mode));
15016 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
15017 || optimize_function_for_size_p (cfun))
15019 switch (mode)
15021 case I387_CW_TRUNC:
15022 /* round toward zero (truncate) */
15023 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
15024 slot = SLOT_CW_TRUNC;
15025 break;
15027 case I387_CW_FLOOR:
15028 /* round down toward -oo */
15029 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
15030 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
15031 slot = SLOT_CW_FLOOR;
15032 break;
15034 case I387_CW_CEIL:
15035 /* round up toward +oo */
15036 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
15037 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
15038 slot = SLOT_CW_CEIL;
15039 break;
15041 case I387_CW_MASK_PM:
15042 /* mask precision exception for nearbyint() */
15043 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
15044 slot = SLOT_CW_MASK_PM;
15045 break;
15047 default:
15048 gcc_unreachable ();
15051 else
15053 switch (mode)
15055 case I387_CW_TRUNC:
15056 /* round toward zero (truncate) */
15057 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
15058 slot = SLOT_CW_TRUNC;
15059 break;
15061 case I387_CW_FLOOR:
15062 /* round down toward -oo */
15063 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
15064 slot = SLOT_CW_FLOOR;
15065 break;
15067 case I387_CW_CEIL:
15068 /* round up toward +oo */
15069 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
15070 slot = SLOT_CW_CEIL;
15071 break;
15073 case I387_CW_MASK_PM:
15074 /* mask precision exception for nearbyint() */
15075 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
15076 slot = SLOT_CW_MASK_PM;
15077 break;
15079 default:
15080 gcc_unreachable ();
15084 gcc_assert (slot < MAX_386_STACK_LOCALS);
15086 new_mode = assign_386_stack_local (HImode, slot);
15087 emit_move_insn (new_mode, reg);
15090 /* Output code for INSN to convert a float to a signed int. OPERANDS
15091 are the insn operands. The output may be [HSD]Imode and the input
15092 operand may be [SDX]Fmode. */
15094 const char *
15095 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
15097 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
15098 int dimode_p = GET_MODE (operands[0]) == DImode;
15099 int round_mode = get_attr_i387_cw (insn);
15101 /* Jump through a hoop or two for DImode, since the hardware has no
15102 non-popping instruction. We used to do this a different way, but
15103 that was somewhat fragile and broke with post-reload splitters. */
15104 if ((dimode_p || fisttp) && !stack_top_dies)
15105 output_asm_insn ("fld\t%y1", operands);
15107 gcc_assert (STACK_TOP_P (operands[1]));
15108 gcc_assert (MEM_P (operands[0]));
15109 gcc_assert (GET_MODE (operands[1]) != TFmode);
15111 if (fisttp)
15112 output_asm_insn ("fisttp%Z0\t%0", operands);
15113 else
15115 if (round_mode != I387_CW_ANY)
15116 output_asm_insn ("fldcw\t%3", operands);
15117 if (stack_top_dies || dimode_p)
15118 output_asm_insn ("fistp%Z0\t%0", operands);
15119 else
15120 output_asm_insn ("fist%Z0\t%0", operands);
15121 if (round_mode != I387_CW_ANY)
15122 output_asm_insn ("fldcw\t%2", operands);
15125 return "";
15128 /* Output code for x87 ffreep insn. The OPNO argument, which may only
15129 have the values zero or one, indicates the ffreep insn's operand
15130 from the OPERANDS array. */
15132 static const char *
15133 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
15135 if (TARGET_USE_FFREEP)
15136 #ifdef HAVE_AS_IX86_FFREEP
15137 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
15138 #else
15140 static char retval[32];
15141 int regno = REGNO (operands[opno]);
15143 gcc_assert (FP_REGNO_P (regno));
15145 regno -= FIRST_STACK_REG;
15147 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
15148 return retval;
15150 #endif
15152 return opno ? "fstp\t%y1" : "fstp\t%y0";
15156 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
15157 should be used. UNORDERED_P is true when fucom should be used. */
15159 const char *
15160 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
15162 int stack_top_dies;
15163 rtx cmp_op0, cmp_op1;
15164 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
15166 if (eflags_p)
15168 cmp_op0 = operands[0];
15169 cmp_op1 = operands[1];
15171 else
15173 cmp_op0 = operands[1];
15174 cmp_op1 = operands[2];
15177 if (is_sse)
15179 static const char ucomiss[] = "vucomiss\t{%1, %0|%0, %1}";
15180 static const char ucomisd[] = "vucomisd\t{%1, %0|%0, %1}";
15181 static const char comiss[] = "vcomiss\t{%1, %0|%0, %1}";
15182 static const char comisd[] = "vcomisd\t{%1, %0|%0, %1}";
15184 if (GET_MODE (operands[0]) == SFmode)
15185 if (unordered_p)
15186 return &ucomiss[TARGET_AVX ? 0 : 1];
15187 else
15188 return &comiss[TARGET_AVX ? 0 : 1];
15189 else
15190 if (unordered_p)
15191 return &ucomisd[TARGET_AVX ? 0 : 1];
15192 else
15193 return &comisd[TARGET_AVX ? 0 : 1];
15196 gcc_assert (STACK_TOP_P (cmp_op0));
15198 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
15200 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
15202 if (stack_top_dies)
15204 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
15205 return output_387_ffreep (operands, 1);
15207 else
15208 return "ftst\n\tfnstsw\t%0";
15211 if (STACK_REG_P (cmp_op1)
15212 && stack_top_dies
15213 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
15214 && REGNO (cmp_op1) != FIRST_STACK_REG)
15216 /* If both the top of the 387 stack dies, and the other operand
15217 is also a stack register that dies, then this must be a
15218 `fcompp' float compare */
15220 if (eflags_p)
15222 /* There is no double popping fcomi variant. Fortunately,
15223 eflags is immune from the fstp's cc clobbering. */
15224 if (unordered_p)
15225 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
15226 else
15227 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
15228 return output_387_ffreep (operands, 0);
15230 else
15232 if (unordered_p)
15233 return "fucompp\n\tfnstsw\t%0";
15234 else
15235 return "fcompp\n\tfnstsw\t%0";
15238 else
15240 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
15242 static const char * const alt[16] =
15244 "fcom%Z2\t%y2\n\tfnstsw\t%0",
15245 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
15246 "fucom%Z2\t%y2\n\tfnstsw\t%0",
15247 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
15249 "ficom%Z2\t%y2\n\tfnstsw\t%0",
15250 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
15251 NULL,
15252 NULL,
15254 "fcomi\t{%y1, %0|%0, %y1}",
15255 "fcomip\t{%y1, %0|%0, %y1}",
15256 "fucomi\t{%y1, %0|%0, %y1}",
15257 "fucomip\t{%y1, %0|%0, %y1}",
15259 NULL,
15260 NULL,
15261 NULL,
15262 NULL
15265 int mask;
15266 const char *ret;
15268 mask = eflags_p << 3;
15269 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
15270 mask |= unordered_p << 1;
15271 mask |= stack_top_dies;
15273 gcc_assert (mask < 16);
15274 ret = alt[mask];
15275 gcc_assert (ret);
15277 return ret;
15281 void
15282 ix86_output_addr_vec_elt (FILE *file, int value)
15284 const char *directive = ASM_LONG;
15286 #ifdef ASM_QUAD
15287 if (TARGET_64BIT)
15288 directive = ASM_QUAD;
15289 #else
15290 gcc_assert (!TARGET_64BIT);
15291 #endif
15293 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
15296 void
15297 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
15299 const char *directive = ASM_LONG;
15301 #ifdef ASM_QUAD
15302 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
15303 directive = ASM_QUAD;
15304 #else
15305 gcc_assert (!TARGET_64BIT);
15306 #endif
15307 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
15308 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
15309 fprintf (file, "%s%s%d-%s%d\n",
15310 directive, LPREFIX, value, LPREFIX, rel);
15311 else if (HAVE_AS_GOTOFF_IN_DATA)
15312 fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
15313 #if TARGET_MACHO
15314 else if (TARGET_MACHO)
15316 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
15317 machopic_output_function_base_name (file);
15318 putc ('\n', file);
15320 #endif
15321 else
15322 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
15323 GOT_SYMBOL_NAME, LPREFIX, value);
15326 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
15327 for the target. */
15329 void
15330 ix86_expand_clear (rtx dest)
15332 rtx tmp;
15334 /* We play register width games, which are only valid after reload. */
15335 gcc_assert (reload_completed);
15337 /* Avoid HImode and its attendant prefix byte. */
15338 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
15339 dest = gen_rtx_REG (SImode, REGNO (dest));
15340 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
15342 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
15343 if (!TARGET_USE_MOV0 || optimize_insn_for_speed_p ())
15345 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
15346 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
15349 emit_insn (tmp);
15352 /* X is an unchanging MEM. If it is a constant pool reference, return
15353 the constant pool rtx, else NULL. */
15356 maybe_get_pool_constant (rtx x)
15358 x = ix86_delegitimize_address (XEXP (x, 0));
15360 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
15361 return get_pool_constant (x);
15363 return NULL_RTX;
15366 void
15367 ix86_expand_move (enum machine_mode mode, rtx operands[])
15369 rtx op0, op1;
15370 enum tls_model model;
15372 op0 = operands[0];
15373 op1 = operands[1];
15375 if (GET_CODE (op1) == SYMBOL_REF)
15377 model = SYMBOL_REF_TLS_MODEL (op1);
15378 if (model)
15380 op1 = legitimize_tls_address (op1, model, true);
15381 op1 = force_operand (op1, op0);
15382 if (op1 == op0)
15383 return;
15385 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
15386 && SYMBOL_REF_DLLIMPORT_P (op1))
15387 op1 = legitimize_dllimport_symbol (op1, false);
15389 else if (GET_CODE (op1) == CONST
15390 && GET_CODE (XEXP (op1, 0)) == PLUS
15391 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
15393 rtx addend = XEXP (XEXP (op1, 0), 1);
15394 rtx symbol = XEXP (XEXP (op1, 0), 0);
15395 rtx tmp = NULL;
15397 model = SYMBOL_REF_TLS_MODEL (symbol);
15398 if (model)
15399 tmp = legitimize_tls_address (symbol, model, true);
15400 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
15401 && SYMBOL_REF_DLLIMPORT_P (symbol))
15402 tmp = legitimize_dllimport_symbol (symbol, true);
15404 if (tmp)
15406 tmp = force_operand (tmp, NULL);
15407 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
15408 op0, 1, OPTAB_DIRECT);
15409 if (tmp == op0)
15410 return;
15414 if ((flag_pic || MACHOPIC_INDIRECT)
15415 && mode == Pmode && symbolic_operand (op1, Pmode))
15417 if (TARGET_MACHO && !TARGET_64BIT)
15419 #if TARGET_MACHO
15420 /* dynamic-no-pic */
15421 if (MACHOPIC_INDIRECT)
15423 rtx temp = ((reload_in_progress
15424 || ((op0 && REG_P (op0))
15425 && mode == Pmode))
15426 ? op0 : gen_reg_rtx (Pmode));
15427 op1 = machopic_indirect_data_reference (op1, temp);
15428 if (MACHOPIC_PURE)
15429 op1 = machopic_legitimize_pic_address (op1, mode,
15430 temp == op1 ? 0 : temp);
15432 if (op0 != op1 && GET_CODE (op0) != MEM)
15434 rtx insn = gen_rtx_SET (VOIDmode, op0, op1);
15435 emit_insn (insn);
15436 return;
15438 if (GET_CODE (op0) == MEM)
15439 op1 = force_reg (Pmode, op1);
15440 else
15442 rtx temp = op0;
15443 if (GET_CODE (temp) != REG)
15444 temp = gen_reg_rtx (Pmode);
15445 temp = legitimize_pic_address (op1, temp);
15446 if (temp == op0)
15447 return;
15448 op1 = temp;
15450 /* dynamic-no-pic */
15451 #endif
15453 else
15455 if (MEM_P (op0))
15456 op1 = force_reg (Pmode, op1);
15457 else if (!TARGET_64BIT || !x86_64_movabs_operand (op1, Pmode))
15459 rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
15460 op1 = legitimize_pic_address (op1, reg);
15461 if (op0 == op1)
15462 return;
15466 else
15468 if (MEM_P (op0)
15469 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
15470 || !push_operand (op0, mode))
15471 && MEM_P (op1))
15472 op1 = force_reg (mode, op1);
15474 if (push_operand (op0, mode)
15475 && ! general_no_elim_operand (op1, mode))
15476 op1 = copy_to_mode_reg (mode, op1);
15478 /* Force large constants in 64bit compilation into register
15479 to get them CSEed. */
15480 if (can_create_pseudo_p ()
15481 && (mode == DImode) && TARGET_64BIT
15482 && immediate_operand (op1, mode)
15483 && !x86_64_zext_immediate_operand (op1, VOIDmode)
15484 && !register_operand (op0, mode)
15485 && optimize)
15486 op1 = copy_to_mode_reg (mode, op1);
15488 if (can_create_pseudo_p ()
15489 && FLOAT_MODE_P (mode)
15490 && GET_CODE (op1) == CONST_DOUBLE)
15492 /* If we are loading a floating point constant to a register,
15493 force the value to memory now, since we'll get better code
15494 out the back end. */
15496 op1 = validize_mem (force_const_mem (mode, op1));
15497 if (!register_operand (op0, mode))
15499 rtx temp = gen_reg_rtx (mode);
15500 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
15501 emit_move_insn (op0, temp);
15502 return;
15507 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
15510 void
15511 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
15513 rtx op0 = operands[0], op1 = operands[1];
15514 unsigned int align = GET_MODE_ALIGNMENT (mode);
15516 /* Force constants other than zero into memory. We do not know how
15517 the instructions used to build constants modify the upper 64 bits
15518 of the register, once we have that information we may be able
15519 to handle some of them more efficiently. */
15520 if (can_create_pseudo_p ()
15521 && register_operand (op0, mode)
15522 && (CONSTANT_P (op1)
15523 || (GET_CODE (op1) == SUBREG
15524 && CONSTANT_P (SUBREG_REG (op1))))
15525 && !standard_sse_constant_p (op1))
15526 op1 = validize_mem (force_const_mem (mode, op1));
15528 /* We need to check memory alignment for SSE mode since attribute
15529 can make operands unaligned. */
15530 if (can_create_pseudo_p ()
15531 && SSE_REG_MODE_P (mode)
15532 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
15533 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
15535 rtx tmp[2];
15537 /* ix86_expand_vector_move_misalign() does not like constants ... */
15538 if (CONSTANT_P (op1)
15539 || (GET_CODE (op1) == SUBREG
15540 && CONSTANT_P (SUBREG_REG (op1))))
15541 op1 = validize_mem (force_const_mem (mode, op1));
15543 /* ... nor both arguments in memory. */
15544 if (!register_operand (op0, mode)
15545 && !register_operand (op1, mode))
15546 op1 = force_reg (mode, op1);
15548 tmp[0] = op0; tmp[1] = op1;
15549 ix86_expand_vector_move_misalign (mode, tmp);
15550 return;
15553 /* Make operand1 a register if it isn't already. */
15554 if (can_create_pseudo_p ()
15555 && !register_operand (op0, mode)
15556 && !register_operand (op1, mode))
15558 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
15559 return;
15562 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
15565 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
15566 straight to ix86_expand_vector_move. */
15567 /* Code generation for scalar reg-reg moves of single and double precision data:
15568 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
15569 movaps reg, reg
15570 else
15571 movss reg, reg
15572 if (x86_sse_partial_reg_dependency == true)
15573 movapd reg, reg
15574 else
15575 movsd reg, reg
15577 Code generation for scalar loads of double precision data:
15578 if (x86_sse_split_regs == true)
15579 movlpd mem, reg (gas syntax)
15580 else
15581 movsd mem, reg
15583 Code generation for unaligned packed loads of single precision data
15584 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
15585 if (x86_sse_unaligned_move_optimal)
15586 movups mem, reg
15588 if (x86_sse_partial_reg_dependency == true)
15590 xorps reg, reg
15591 movlps mem, reg
15592 movhps mem+8, reg
15594 else
15596 movlps mem, reg
15597 movhps mem+8, reg
15600 Code generation for unaligned packed loads of double precision data
15601 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
15602 if (x86_sse_unaligned_move_optimal)
15603 movupd mem, reg
15605 if (x86_sse_split_regs == true)
15607 movlpd mem, reg
15608 movhpd mem+8, reg
15610 else
15612 movsd mem, reg
15613 movhpd mem+8, reg
15617 void
15618 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
15620 rtx op0, op1, m;
15622 op0 = operands[0];
15623 op1 = operands[1];
15625 if (TARGET_AVX)
15627 switch (GET_MODE_CLASS (mode))
15629 case MODE_VECTOR_INT:
15630 case MODE_INT:
15631 switch (GET_MODE_SIZE (mode))
15633 case 16:
15634 /* If we're optimizing for size, movups is the smallest. */
15635 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
15637 op0 = gen_lowpart (V4SFmode, op0);
15638 op1 = gen_lowpart (V4SFmode, op1);
15639 emit_insn (gen_avx_movups (op0, op1));
15640 return;
15642 op0 = gen_lowpart (V16QImode, op0);
15643 op1 = gen_lowpart (V16QImode, op1);
15644 emit_insn (gen_avx_movdqu (op0, op1));
15645 break;
15646 case 32:
15647 op0 = gen_lowpart (V32QImode, op0);
15648 op1 = gen_lowpart (V32QImode, op1);
15649 emit_insn (gen_avx_movdqu256 (op0, op1));
15650 break;
15651 default:
15652 gcc_unreachable ();
15654 break;
15655 case MODE_VECTOR_FLOAT:
15656 op0 = gen_lowpart (mode, op0);
15657 op1 = gen_lowpart (mode, op1);
15659 switch (mode)
15661 case V4SFmode:
15662 emit_insn (gen_avx_movups (op0, op1));
15663 break;
15664 case V8SFmode:
15665 emit_insn (gen_avx_movups256 (op0, op1));
15666 break;
15667 case V2DFmode:
15668 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
15670 op0 = gen_lowpart (V4SFmode, op0);
15671 op1 = gen_lowpart (V4SFmode, op1);
15672 emit_insn (gen_avx_movups (op0, op1));
15673 return;
15675 emit_insn (gen_avx_movupd (op0, op1));
15676 break;
15677 case V4DFmode:
15678 emit_insn (gen_avx_movupd256 (op0, op1));
15679 break;
15680 default:
15681 gcc_unreachable ();
15683 break;
15685 default:
15686 gcc_unreachable ();
15689 return;
15692 if (MEM_P (op1))
15694 /* If we're optimizing for size, movups is the smallest. */
15695 if (optimize_insn_for_size_p ()
15696 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
15698 op0 = gen_lowpart (V4SFmode, op0);
15699 op1 = gen_lowpart (V4SFmode, op1);
15700 emit_insn (gen_sse_movups (op0, op1));
15701 return;
15704 /* ??? If we have typed data, then it would appear that using
15705 movdqu is the only way to get unaligned data loaded with
15706 integer type. */
15707 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
15709 op0 = gen_lowpart (V16QImode, op0);
15710 op1 = gen_lowpart (V16QImode, op1);
15711 emit_insn (gen_sse2_movdqu (op0, op1));
15712 return;
15715 if (TARGET_SSE2 && mode == V2DFmode)
15717 rtx zero;
15719 if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL)
15721 op0 = gen_lowpart (V2DFmode, op0);
15722 op1 = gen_lowpart (V2DFmode, op1);
15723 emit_insn (gen_sse2_movupd (op0, op1));
15724 return;
15727 /* When SSE registers are split into halves, we can avoid
15728 writing to the top half twice. */
15729 if (TARGET_SSE_SPLIT_REGS)
15731 emit_clobber (op0);
15732 zero = op0;
15734 else
15736 /* ??? Not sure about the best option for the Intel chips.
15737 The following would seem to satisfy; the register is
15738 entirely cleared, breaking the dependency chain. We
15739 then store to the upper half, with a dependency depth
15740 of one. A rumor has it that Intel recommends two movsd
15741 followed by an unpacklpd, but this is unconfirmed. And
15742 given that the dependency depth of the unpacklpd would
15743 still be one, I'm not sure why this would be better. */
15744 zero = CONST0_RTX (V2DFmode);
15747 m = adjust_address (op1, DFmode, 0);
15748 emit_insn (gen_sse2_loadlpd (op0, zero, m));
15749 m = adjust_address (op1, DFmode, 8);
15750 emit_insn (gen_sse2_loadhpd (op0, op0, m));
15752 else
15754 if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL)
15756 op0 = gen_lowpart (V4SFmode, op0);
15757 op1 = gen_lowpart (V4SFmode, op1);
15758 emit_insn (gen_sse_movups (op0, op1));
15759 return;
15762 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
15763 emit_move_insn (op0, CONST0_RTX (mode));
15764 else
15765 emit_clobber (op0);
15767 if (mode != V4SFmode)
15768 op0 = gen_lowpart (V4SFmode, op0);
15769 m = adjust_address (op1, V2SFmode, 0);
15770 emit_insn (gen_sse_loadlps (op0, op0, m));
15771 m = adjust_address (op1, V2SFmode, 8);
15772 emit_insn (gen_sse_loadhps (op0, op0, m));
15775 else if (MEM_P (op0))
15777 /* If we're optimizing for size, movups is the smallest. */
15778 if (optimize_insn_for_size_p ()
15779 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
15781 op0 = gen_lowpart (V4SFmode, op0);
15782 op1 = gen_lowpart (V4SFmode, op1);
15783 emit_insn (gen_sse_movups (op0, op1));
15784 return;
15787 /* ??? Similar to above, only less clear because of quote
15788 typeless stores unquote. */
15789 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
15790 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
15792 op0 = gen_lowpart (V16QImode, op0);
15793 op1 = gen_lowpart (V16QImode, op1);
15794 emit_insn (gen_sse2_movdqu (op0, op1));
15795 return;
15798 if (TARGET_SSE2 && mode == V2DFmode)
15800 if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
15802 op0 = gen_lowpart (V2DFmode, op0);
15803 op1 = gen_lowpart (V2DFmode, op1);
15804 emit_insn (gen_sse2_movupd (op0, op1));
15806 else
15808 m = adjust_address (op0, DFmode, 0);
15809 emit_insn (gen_sse2_storelpd (m, op1));
15810 m = adjust_address (op0, DFmode, 8);
15811 emit_insn (gen_sse2_storehpd (m, op1));
15814 else
15816 if (mode != V4SFmode)
15817 op1 = gen_lowpart (V4SFmode, op1);
15819 if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
15821 op0 = gen_lowpart (V4SFmode, op0);
15822 emit_insn (gen_sse_movups (op0, op1));
15824 else
15826 m = adjust_address (op0, V2SFmode, 0);
15827 emit_insn (gen_sse_storelps (m, op1));
15828 m = adjust_address (op0, V2SFmode, 8);
15829 emit_insn (gen_sse_storehps (m, op1));
15833 else
15834 gcc_unreachable ();
15837 /* Expand a push in MODE. This is some mode for which we do not support
15838 proper push instructions, at least from the registers that we expect
15839 the value to live in. */
15841 void
15842 ix86_expand_push (enum machine_mode mode, rtx x)
15844 rtx tmp;
15846 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
15847 GEN_INT (-GET_MODE_SIZE (mode)),
15848 stack_pointer_rtx, 1, OPTAB_DIRECT);
15849 if (tmp != stack_pointer_rtx)
15850 emit_move_insn (stack_pointer_rtx, tmp);
15852 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
15854 /* When we push an operand onto stack, it has to be aligned at least
15855 at the function argument boundary. However since we don't have
15856 the argument type, we can't determine the actual argument
15857 boundary. */
15858 emit_move_insn (tmp, x);
15861 /* Helper function of ix86_fixup_binary_operands to canonicalize
15862 operand order. Returns true if the operands should be swapped. */
15864 static bool
15865 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
15866 rtx operands[])
15868 rtx dst = operands[0];
15869 rtx src1 = operands[1];
15870 rtx src2 = operands[2];
15872 /* If the operation is not commutative, we can't do anything. */
15873 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
15874 return false;
15876 /* Highest priority is that src1 should match dst. */
15877 if (rtx_equal_p (dst, src1))
15878 return false;
15879 if (rtx_equal_p (dst, src2))
15880 return true;
15882 /* Next highest priority is that immediate constants come second. */
15883 if (immediate_operand (src2, mode))
15884 return false;
15885 if (immediate_operand (src1, mode))
15886 return true;
15888 /* Lowest priority is that memory references should come second. */
15889 if (MEM_P (src2))
15890 return false;
15891 if (MEM_P (src1))
15892 return true;
15894 return false;
15898 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
15899 destination to use for the operation. If different from the true
15900 destination in operands[0], a copy operation will be required. */
15903 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
15904 rtx operands[])
15906 rtx dst = operands[0];
15907 rtx src1 = operands[1];
15908 rtx src2 = operands[2];
15910 /* Canonicalize operand order. */
15911 if (ix86_swap_binary_operands_p (code, mode, operands))
15913 rtx temp;
15915 /* It is invalid to swap operands of different modes. */
15916 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
15918 temp = src1;
15919 src1 = src2;
15920 src2 = temp;
15923 /* Both source operands cannot be in memory. */
15924 if (MEM_P (src1) && MEM_P (src2))
15926 /* Optimization: Only read from memory once. */
15927 if (rtx_equal_p (src1, src2))
15929 src2 = force_reg (mode, src2);
15930 src1 = src2;
15932 else
15933 src2 = force_reg (mode, src2);
15936 /* If the destination is memory, and we do not have matching source
15937 operands, do things in registers. */
15938 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
15939 dst = gen_reg_rtx (mode);
15941 /* Source 1 cannot be a constant. */
15942 if (CONSTANT_P (src1))
15943 src1 = force_reg (mode, src1);
15945 /* Source 1 cannot be a non-matching memory. */
15946 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
15947 src1 = force_reg (mode, src1);
15949 operands[1] = src1;
15950 operands[2] = src2;
15951 return dst;
15954 /* Similarly, but assume that the destination has already been
15955 set up properly. */
15957 void
15958 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
15959 enum machine_mode mode, rtx operands[])
15961 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
15962 gcc_assert (dst == operands[0]);
15965 /* Attempt to expand a binary operator. Make the expansion closer to the
15966 actual machine, then just general_operand, which will allow 3 separate
15967 memory references (one output, two input) in a single insn. */
15969 void
15970 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
15971 rtx operands[])
15973 rtx src1, src2, dst, op, clob;
15975 dst = ix86_fixup_binary_operands (code, mode, operands);
15976 src1 = operands[1];
15977 src2 = operands[2];
15979 /* Emit the instruction. */
15981 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
15982 if (reload_in_progress)
15984 /* Reload doesn't know about the flags register, and doesn't know that
15985 it doesn't want to clobber it. We can only do this with PLUS. */
15986 gcc_assert (code == PLUS);
15987 emit_insn (op);
15989 else if (reload_completed
15990 && code == PLUS
15991 && !rtx_equal_p (dst, src1))
15993 /* This is going to be an LEA; avoid splitting it later. */
15994 emit_insn (op);
15996 else
15998 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
15999 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
16002 /* Fix up the destination if needed. */
16003 if (dst != operands[0])
16004 emit_move_insn (operands[0], dst);
16007 /* Return TRUE or FALSE depending on whether the binary operator meets the
16008 appropriate constraints. */
16010 bool
16011 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
16012 rtx operands[3])
16014 rtx dst = operands[0];
16015 rtx src1 = operands[1];
16016 rtx src2 = operands[2];
16018 /* Both source operands cannot be in memory. */
16019 if (MEM_P (src1) && MEM_P (src2))
16020 return false;
16022 /* Canonicalize operand order for commutative operators. */
16023 if (ix86_swap_binary_operands_p (code, mode, operands))
16025 rtx temp = src1;
16026 src1 = src2;
16027 src2 = temp;
16030 /* If the destination is memory, we must have a matching source operand. */
16031 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
16032 return false;
16034 /* Source 1 cannot be a constant. */
16035 if (CONSTANT_P (src1))
16036 return false;
16038 /* Source 1 cannot be a non-matching memory. */
16039 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
16041 /* Support "andhi/andsi/anddi" as a zero-extending move. */
16042 return (code == AND
16043 && (mode == HImode
16044 || mode == SImode
16045 || (TARGET_64BIT && mode == DImode))
16046 && CONST_INT_P (src2)
16047 && (INTVAL (src2) == 0xff
16048 || INTVAL (src2) == 0xffff));
16051 return true;
16054 /* Attempt to expand a unary operator. Make the expansion closer to the
16055 actual machine, then just general_operand, which will allow 2 separate
16056 memory references (one output, one input) in a single insn. */
16058 void
16059 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
16060 rtx operands[])
16062 int matching_memory;
16063 rtx src, dst, op, clob;
16065 dst = operands[0];
16066 src = operands[1];
16068 /* If the destination is memory, and we do not have matching source
16069 operands, do things in registers. */
16070 matching_memory = 0;
16071 if (MEM_P (dst))
16073 if (rtx_equal_p (dst, src))
16074 matching_memory = 1;
16075 else
16076 dst = gen_reg_rtx (mode);
16079 /* When source operand is memory, destination must match. */
16080 if (MEM_P (src) && !matching_memory)
16081 src = force_reg (mode, src);
16083 /* Emit the instruction. */
16085 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
16086 if (reload_in_progress || code == NOT)
16088 /* Reload doesn't know about the flags register, and doesn't know that
16089 it doesn't want to clobber it. */
16090 gcc_assert (code == NOT);
16091 emit_insn (op);
16093 else
16095 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
16096 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
16099 /* Fix up the destination if needed. */
16100 if (dst != operands[0])
16101 emit_move_insn (operands[0], dst);
16104 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
16105 divisor are within the the range [0-255]. */
16107 void
16108 ix86_split_idivmod (enum machine_mode mode, rtx operands[],
16109 bool signed_p)
16111 rtx end_label, qimode_label;
16112 rtx insn, div, mod;
16113 rtx scratch, tmp0, tmp1, tmp2;
16114 rtx (*gen_divmod4_1) (rtx, rtx, rtx, rtx);
16115 rtx (*gen_zero_extend) (rtx, rtx);
16116 rtx (*gen_test_ccno_1) (rtx, rtx);
16118 switch (mode)
16120 case SImode:
16121 gen_divmod4_1 = signed_p ? gen_divmodsi4_1 : gen_udivmodsi4_1;
16122 gen_test_ccno_1 = gen_testsi_ccno_1;
16123 gen_zero_extend = gen_zero_extendqisi2;
16124 break;
16125 case DImode:
16126 gen_divmod4_1 = signed_p ? gen_divmoddi4_1 : gen_udivmoddi4_1;
16127 gen_test_ccno_1 = gen_testdi_ccno_1;
16128 gen_zero_extend = gen_zero_extendqidi2;
16129 break;
16130 default:
16131 gcc_unreachable ();
16134 end_label = gen_label_rtx ();
16135 qimode_label = gen_label_rtx ();
16137 scratch = gen_reg_rtx (mode);
16139 /* Use 8bit unsigned divimod if dividend and divisor are within the
16140 the range [0-255]. */
16141 emit_move_insn (scratch, operands[2]);
16142 scratch = expand_simple_binop (mode, IOR, scratch, operands[3],
16143 scratch, 1, OPTAB_DIRECT);
16144 emit_insn (gen_test_ccno_1 (scratch, GEN_INT (-0x100)));
16145 tmp0 = gen_rtx_REG (CCNOmode, FLAGS_REG);
16146 tmp0 = gen_rtx_EQ (VOIDmode, tmp0, const0_rtx);
16147 tmp0 = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp0,
16148 gen_rtx_LABEL_REF (VOIDmode, qimode_label),
16149 pc_rtx);
16150 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp0));
16151 predict_jump (REG_BR_PROB_BASE * 50 / 100);
16152 JUMP_LABEL (insn) = qimode_label;
16154 /* Generate original signed/unsigned divimod. */
16155 div = gen_divmod4_1 (operands[0], operands[1],
16156 operands[2], operands[3]);
16157 emit_insn (div);
16159 /* Branch to the end. */
16160 emit_jump_insn (gen_jump (end_label));
16161 emit_barrier ();
16163 /* Generate 8bit unsigned divide. */
16164 emit_label (qimode_label);
16165 /* Don't use operands[0] for result of 8bit divide since not all
16166 registers support QImode ZERO_EXTRACT. */
16167 tmp0 = simplify_gen_subreg (HImode, scratch, mode, 0);
16168 tmp1 = simplify_gen_subreg (HImode, operands[2], mode, 0);
16169 tmp2 = simplify_gen_subreg (QImode, operands[3], mode, 0);
16170 emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, tmp2));
16172 if (signed_p)
16174 div = gen_rtx_DIV (SImode, operands[2], operands[3]);
16175 mod = gen_rtx_MOD (SImode, operands[2], operands[3]);
16177 else
16179 div = gen_rtx_UDIV (SImode, operands[2], operands[3]);
16180 mod = gen_rtx_UMOD (SImode, operands[2], operands[3]);
16183 /* Extract remainder from AH. */
16184 tmp1 = gen_rtx_ZERO_EXTRACT (mode, tmp0, GEN_INT (8), GEN_INT (8));
16185 if (REG_P (operands[1]))
16186 insn = emit_move_insn (operands[1], tmp1);
16187 else
16189 /* Need a new scratch register since the old one has result
16190 of 8bit divide. */
16191 scratch = gen_reg_rtx (mode);
16192 emit_move_insn (scratch, tmp1);
16193 insn = emit_move_insn (operands[1], scratch);
16195 set_unique_reg_note (insn, REG_EQUAL, mod);
16197 /* Zero extend quotient from AL. */
16198 tmp1 = gen_lowpart (QImode, tmp0);
16199 insn = emit_insn (gen_zero_extend (operands[0], tmp1));
16200 set_unique_reg_note (insn, REG_EQUAL, div);
16202 emit_label (end_label);
16205 #define LEA_SEARCH_THRESHOLD 12
16207 /* Search backward for non-agu definition of register number REGNO1
16208 or register number REGNO2 in INSN's basic block until
16209 1. Pass LEA_SEARCH_THRESHOLD instructions, or
16210 2. Reach BB boundary, or
16211 3. Reach agu definition.
16212 Returns the distance between the non-agu definition point and INSN.
16213 If no definition point, returns -1. */
16215 static int
16216 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
16217 rtx insn)
16219 basic_block bb = BLOCK_FOR_INSN (insn);
16220 int distance = 0;
16221 df_ref *def_rec;
16222 enum attr_type insn_type;
16224 if (insn != BB_HEAD (bb))
16226 rtx prev = PREV_INSN (insn);
16227 while (prev && distance < LEA_SEARCH_THRESHOLD)
16229 if (NONDEBUG_INSN_P (prev))
16231 distance++;
16232 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
16233 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
16234 && !DF_REF_IS_ARTIFICIAL (*def_rec)
16235 && (regno1 == DF_REF_REGNO (*def_rec)
16236 || regno2 == DF_REF_REGNO (*def_rec)))
16238 insn_type = get_attr_type (prev);
16239 if (insn_type != TYPE_LEA)
16240 goto done;
16243 if (prev == BB_HEAD (bb))
16244 break;
16245 prev = PREV_INSN (prev);
16249 if (distance < LEA_SEARCH_THRESHOLD)
16251 edge e;
16252 edge_iterator ei;
16253 bool simple_loop = false;
16255 FOR_EACH_EDGE (e, ei, bb->preds)
16256 if (e->src == bb)
16258 simple_loop = true;
16259 break;
16262 if (simple_loop)
16264 rtx prev = BB_END (bb);
16265 while (prev
16266 && prev != insn
16267 && distance < LEA_SEARCH_THRESHOLD)
16269 if (NONDEBUG_INSN_P (prev))
16271 distance++;
16272 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
16273 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
16274 && !DF_REF_IS_ARTIFICIAL (*def_rec)
16275 && (regno1 == DF_REF_REGNO (*def_rec)
16276 || regno2 == DF_REF_REGNO (*def_rec)))
16278 insn_type = get_attr_type (prev);
16279 if (insn_type != TYPE_LEA)
16280 goto done;
16283 prev = PREV_INSN (prev);
16288 distance = -1;
16290 done:
16291 /* get_attr_type may modify recog data. We want to make sure
16292 that recog data is valid for instruction INSN, on which
16293 distance_non_agu_define is called. INSN is unchanged here. */
16294 extract_insn_cached (insn);
16295 return distance;
16298 /* Return the distance between INSN and the next insn that uses
16299 register number REGNO0 in memory address. Return -1 if no such
16300 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
16302 static int
16303 distance_agu_use (unsigned int regno0, rtx insn)
16305 basic_block bb = BLOCK_FOR_INSN (insn);
16306 int distance = 0;
16307 df_ref *def_rec;
16308 df_ref *use_rec;
16310 if (insn != BB_END (bb))
16312 rtx next = NEXT_INSN (insn);
16313 while (next && distance < LEA_SEARCH_THRESHOLD)
16315 if (NONDEBUG_INSN_P (next))
16317 distance++;
16319 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
16320 if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
16321 || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
16322 && regno0 == DF_REF_REGNO (*use_rec))
16324 /* Return DISTANCE if OP0 is used in memory
16325 address in NEXT. */
16326 return distance;
16329 for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
16330 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
16331 && !DF_REF_IS_ARTIFICIAL (*def_rec)
16332 && regno0 == DF_REF_REGNO (*def_rec))
16334 /* Return -1 if OP0 is set in NEXT. */
16335 return -1;
16338 if (next == BB_END (bb))
16339 break;
16340 next = NEXT_INSN (next);
16344 if (distance < LEA_SEARCH_THRESHOLD)
16346 edge e;
16347 edge_iterator ei;
16348 bool simple_loop = false;
16350 FOR_EACH_EDGE (e, ei, bb->succs)
16351 if (e->dest == bb)
16353 simple_loop = true;
16354 break;
16357 if (simple_loop)
16359 rtx next = BB_HEAD (bb);
16360 while (next
16361 && next != insn
16362 && distance < LEA_SEARCH_THRESHOLD)
16364 if (NONDEBUG_INSN_P (next))
16366 distance++;
16368 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
16369 if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
16370 || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
16371 && regno0 == DF_REF_REGNO (*use_rec))
16373 /* Return DISTANCE if OP0 is used in memory
16374 address in NEXT. */
16375 return distance;
16378 for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
16379 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
16380 && !DF_REF_IS_ARTIFICIAL (*def_rec)
16381 && regno0 == DF_REF_REGNO (*def_rec))
16383 /* Return -1 if OP0 is set in NEXT. */
16384 return -1;
16388 next = NEXT_INSN (next);
16393 return -1;
16396 /* Define this macro to tune LEA priority vs ADD, it take effect when
16397 there is a dilemma of choicing LEA or ADD
16398 Negative value: ADD is more preferred than LEA
16399 Zero: Netrual
16400 Positive value: LEA is more preferred than ADD*/
16401 #define IX86_LEA_PRIORITY 2
16403 /* Return true if it is ok to optimize an ADD operation to LEA
16404 operation to avoid flag register consumation. For most processors,
16405 ADD is faster than LEA. For the processors like ATOM, if the
16406 destination register of LEA holds an actual address which will be
16407 used soon, LEA is better and otherwise ADD is better. */
16409 bool
16410 ix86_lea_for_add_ok (rtx insn, rtx operands[])
16412 unsigned int regno0 = true_regnum (operands[0]);
16413 unsigned int regno1 = true_regnum (operands[1]);
16414 unsigned int regno2 = true_regnum (operands[2]);
16416 /* If a = b + c, (a!=b && a!=c), must use lea form. */
16417 if (regno0 != regno1 && regno0 != regno2)
16418 return true;
16420 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
16421 return false;
16422 else
16424 int dist_define, dist_use;
16426 /* Return false if REGNO0 isn't used in memory address. */
16427 dist_use = distance_agu_use (regno0, insn);
16428 if (dist_use <= 0)
16429 return false;
16431 dist_define = distance_non_agu_define (regno1, regno2, insn);
16432 if (dist_define <= 0)
16433 return true;
16435 /* If this insn has both backward non-agu dependence and forward
16436 agu dependence, the one with short distance take effect. */
16437 if ((dist_define + IX86_LEA_PRIORITY) < dist_use)
16438 return false;
16440 return true;
16444 /* Return true if destination reg of SET_BODY is shift count of
16445 USE_BODY. */
16447 static bool
16448 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
16450 rtx set_dest;
16451 rtx shift_rtx;
16452 int i;
16454 /* Retrieve destination of SET_BODY. */
16455 switch (GET_CODE (set_body))
16457 case SET:
16458 set_dest = SET_DEST (set_body);
16459 if (!set_dest || !REG_P (set_dest))
16460 return false;
16461 break;
16462 case PARALLEL:
16463 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
16464 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
16465 use_body))
16466 return true;
16467 default:
16468 return false;
16469 break;
16472 /* Retrieve shift count of USE_BODY. */
16473 switch (GET_CODE (use_body))
16475 case SET:
16476 shift_rtx = XEXP (use_body, 1);
16477 break;
16478 case PARALLEL:
16479 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
16480 if (ix86_dep_by_shift_count_body (set_body,
16481 XVECEXP (use_body, 0, i)))
16482 return true;
16483 default:
16484 return false;
16485 break;
16488 if (shift_rtx
16489 && (GET_CODE (shift_rtx) == ASHIFT
16490 || GET_CODE (shift_rtx) == LSHIFTRT
16491 || GET_CODE (shift_rtx) == ASHIFTRT
16492 || GET_CODE (shift_rtx) == ROTATE
16493 || GET_CODE (shift_rtx) == ROTATERT))
16495 rtx shift_count = XEXP (shift_rtx, 1);
16497 /* Return true if shift count is dest of SET_BODY. */
16498 if (REG_P (shift_count)
16499 && true_regnum (set_dest) == true_regnum (shift_count))
16500 return true;
16503 return false;
16506 /* Return true if destination reg of SET_INSN is shift count of
16507 USE_INSN. */
16509 bool
16510 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
16512 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
16513 PATTERN (use_insn));
16516 /* Return TRUE or FALSE depending on whether the unary operator meets the
16517 appropriate constraints. */
16519 bool
16520 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
16521 enum machine_mode mode ATTRIBUTE_UNUSED,
16522 rtx operands[2] ATTRIBUTE_UNUSED)
16524 /* If one of operands is memory, source and destination must match. */
16525 if ((MEM_P (operands[0])
16526 || MEM_P (operands[1]))
16527 && ! rtx_equal_p (operands[0], operands[1]))
16528 return false;
16529 return true;
16532 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
16533 are ok, keeping in mind the possible movddup alternative. */
16535 bool
16536 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
16538 if (MEM_P (operands[0]))
16539 return rtx_equal_p (operands[0], operands[1 + high]);
16540 if (MEM_P (operands[1]) && MEM_P (operands[2]))
16541 return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
16542 return true;
16545 /* Post-reload splitter for converting an SF or DFmode value in an
16546 SSE register into an unsigned SImode. */
16548 void
16549 ix86_split_convert_uns_si_sse (rtx operands[])
16551 enum machine_mode vecmode;
16552 rtx value, large, zero_or_two31, input, two31, x;
16554 large = operands[1];
16555 zero_or_two31 = operands[2];
16556 input = operands[3];
16557 two31 = operands[4];
16558 vecmode = GET_MODE (large);
16559 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
16561 /* Load up the value into the low element. We must ensure that the other
16562 elements are valid floats -- zero is the easiest such value. */
16563 if (MEM_P (input))
16565 if (vecmode == V4SFmode)
16566 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
16567 else
16568 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
16570 else
16572 input = gen_rtx_REG (vecmode, REGNO (input));
16573 emit_move_insn (value, CONST0_RTX (vecmode));
16574 if (vecmode == V4SFmode)
16575 emit_insn (gen_sse_movss (value, value, input));
16576 else
16577 emit_insn (gen_sse2_movsd (value, value, input));
16580 emit_move_insn (large, two31);
16581 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
16583 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
16584 emit_insn (gen_rtx_SET (VOIDmode, large, x));
16586 x = gen_rtx_AND (vecmode, zero_or_two31, large);
16587 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
16589 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
16590 emit_insn (gen_rtx_SET (VOIDmode, value, x));
16592 large = gen_rtx_REG (V4SImode, REGNO (large));
16593 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
16595 x = gen_rtx_REG (V4SImode, REGNO (value));
16596 if (vecmode == V4SFmode)
16597 emit_insn (gen_sse2_cvttps2dq (x, value));
16598 else
16599 emit_insn (gen_sse2_cvttpd2dq (x, value));
16600 value = x;
16602 emit_insn (gen_xorv4si3 (value, value, large));
16605 /* Convert an unsigned DImode value into a DFmode, using only SSE.
16606 Expects the 64-bit DImode to be supplied in a pair of integral
16607 registers. Requires SSE2; will use SSE3 if available. For x86_32,
16608 -mfpmath=sse, !optimize_size only. */
16610 void
16611 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
16613 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
16614 rtx int_xmm, fp_xmm;
16615 rtx biases, exponents;
16616 rtx x;
16618 int_xmm = gen_reg_rtx (V4SImode);
16619 if (TARGET_INTER_UNIT_MOVES)
16620 emit_insn (gen_movdi_to_sse (int_xmm, input));
16621 else if (TARGET_SSE_SPLIT_REGS)
16623 emit_clobber (int_xmm);
16624 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
16626 else
16628 x = gen_reg_rtx (V2DImode);
16629 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
16630 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
16633 x = gen_rtx_CONST_VECTOR (V4SImode,
16634 gen_rtvec (4, GEN_INT (0x43300000UL),
16635 GEN_INT (0x45300000UL),
16636 const0_rtx, const0_rtx));
16637 exponents = validize_mem (force_const_mem (V4SImode, x));
16639 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
16640 emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents));
16642 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
16643 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
16644 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
16645 (0x1.0p84 + double(fp_value_hi_xmm)).
16646 Note these exponents differ by 32. */
16648 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
16650 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
16651 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
16652 real_ldexp (&bias_lo_rvt, &dconst1, 52);
16653 real_ldexp (&bias_hi_rvt, &dconst1, 84);
16654 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
16655 x = const_double_from_real_value (bias_hi_rvt, DFmode);
16656 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
16657 biases = validize_mem (force_const_mem (V2DFmode, biases));
16658 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
16660 /* Add the upper and lower DFmode values together. */
16661 if (TARGET_SSE3)
16662 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
16663 else
16665 x = copy_to_mode_reg (V2DFmode, fp_xmm);
16666 emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm));
16667 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
16670 ix86_expand_vector_extract (false, target, fp_xmm, 0);
16673 /* Not used, but eases macroization of patterns. */
16674 void
16675 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED,
16676 rtx input ATTRIBUTE_UNUSED)
16678 gcc_unreachable ();
16681 /* Convert an unsigned SImode value into a DFmode. Only currently used
16682 for SSE, but applicable anywhere. */
16684 void
16685 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
16687 REAL_VALUE_TYPE TWO31r;
16688 rtx x, fp;
16690 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
16691 NULL, 1, OPTAB_DIRECT);
16693 fp = gen_reg_rtx (DFmode);
16694 emit_insn (gen_floatsidf2 (fp, x));
16696 real_ldexp (&TWO31r, &dconst1, 31);
16697 x = const_double_from_real_value (TWO31r, DFmode);
16699 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
16700 if (x != target)
16701 emit_move_insn (target, x);
16704 /* Convert a signed DImode value into a DFmode. Only used for SSE in
16705 32-bit mode; otherwise we have a direct convert instruction. */
16707 void
16708 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
16710 REAL_VALUE_TYPE TWO32r;
16711 rtx fp_lo, fp_hi, x;
16713 fp_lo = gen_reg_rtx (DFmode);
16714 fp_hi = gen_reg_rtx (DFmode);
16716 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
16718 real_ldexp (&TWO32r, &dconst1, 32);
16719 x = const_double_from_real_value (TWO32r, DFmode);
16720 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
16722 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
16724 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
16725 0, OPTAB_DIRECT);
16726 if (x != target)
16727 emit_move_insn (target, x);
16730 /* Convert an unsigned SImode value into a SFmode, using only SSE.
16731 For x86_32, -mfpmath=sse, !optimize_size only. */
16732 void
16733 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
16735 REAL_VALUE_TYPE ONE16r;
16736 rtx fp_hi, fp_lo, int_hi, int_lo, x;
16738 real_ldexp (&ONE16r, &dconst1, 16);
16739 x = const_double_from_real_value (ONE16r, SFmode);
16740 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
16741 NULL, 0, OPTAB_DIRECT);
16742 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
16743 NULL, 0, OPTAB_DIRECT);
16744 fp_hi = gen_reg_rtx (SFmode);
16745 fp_lo = gen_reg_rtx (SFmode);
16746 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
16747 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
16748 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
16749 0, OPTAB_DIRECT);
16750 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
16751 0, OPTAB_DIRECT);
16752 if (!rtx_equal_p (target, fp_hi))
16753 emit_move_insn (target, fp_hi);
16756 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
16757 then replicate the value for all elements of the vector
16758 register. */
16761 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
16763 rtvec v;
16764 switch (mode)
16766 case V4SImode:
16767 gcc_assert (vect);
16768 v = gen_rtvec (4, value, value, value, value);
16769 return gen_rtx_CONST_VECTOR (V4SImode, v);
16771 case V2DImode:
16772 gcc_assert (vect);
16773 v = gen_rtvec (2, value, value);
16774 return gen_rtx_CONST_VECTOR (V2DImode, v);
16776 case V8SFmode:
16777 if (vect)
16778 v = gen_rtvec (8, value, value, value, value,
16779 value, value, value, value);
16780 else
16781 v = gen_rtvec (8, value, CONST0_RTX (SFmode),
16782 CONST0_RTX (SFmode), CONST0_RTX (SFmode),
16783 CONST0_RTX (SFmode), CONST0_RTX (SFmode),
16784 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
16785 return gen_rtx_CONST_VECTOR (V8SFmode, v);
16787 case V4SFmode:
16788 if (vect)
16789 v = gen_rtvec (4, value, value, value, value);
16790 else
16791 v = gen_rtvec (4, value, CONST0_RTX (SFmode),
16792 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
16793 return gen_rtx_CONST_VECTOR (V4SFmode, v);
16795 case V4DFmode:
16796 if (vect)
16797 v = gen_rtvec (4, value, value, value, value);
16798 else
16799 v = gen_rtvec (4, value, CONST0_RTX (DFmode),
16800 CONST0_RTX (DFmode), CONST0_RTX (DFmode));
16801 return gen_rtx_CONST_VECTOR (V4DFmode, v);
16803 case V2DFmode:
16804 if (vect)
16805 v = gen_rtvec (2, value, value);
16806 else
16807 v = gen_rtvec (2, value, CONST0_RTX (DFmode));
16808 return gen_rtx_CONST_VECTOR (V2DFmode, v);
16810 default:
16811 gcc_unreachable ();
16815 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
16816 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
16817 for an SSE register. If VECT is true, then replicate the mask for
16818 all elements of the vector register. If INVERT is true, then create
16819 a mask excluding the sign bit. */
16822 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
16824 enum machine_mode vec_mode, imode;
16825 HOST_WIDE_INT hi, lo;
16826 int shift = 63;
16827 rtx v;
16828 rtx mask;
16830 /* Find the sign bit, sign extended to 2*HWI. */
16831 switch (mode)
16833 case V4SImode:
16834 case V8SFmode:
16835 case V4SFmode:
16836 vec_mode = mode;
16837 mode = GET_MODE_INNER (mode);
16838 imode = SImode;
16839 lo = 0x80000000, hi = lo < 0;
16840 break;
16842 case V2DImode:
16843 case V4DFmode:
16844 case V2DFmode:
16845 vec_mode = mode;
16846 mode = GET_MODE_INNER (mode);
16847 imode = DImode;
16848 if (HOST_BITS_PER_WIDE_INT >= 64)
16849 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
16850 else
16851 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
16852 break;
16854 case TImode:
16855 case TFmode:
16856 vec_mode = VOIDmode;
16857 if (HOST_BITS_PER_WIDE_INT >= 64)
16859 imode = TImode;
16860 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
16862 else
16864 rtvec vec;
16866 imode = DImode;
16867 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
16869 if (invert)
16871 lo = ~lo, hi = ~hi;
16872 v = constm1_rtx;
16874 else
16875 v = const0_rtx;
16877 mask = immed_double_const (lo, hi, imode);
16879 vec = gen_rtvec (2, v, mask);
16880 v = gen_rtx_CONST_VECTOR (V2DImode, vec);
16881 v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
16883 return v;
16885 break;
16887 default:
16888 gcc_unreachable ();
16891 if (invert)
16892 lo = ~lo, hi = ~hi;
16894 /* Force this value into the low part of a fp vector constant. */
16895 mask = immed_double_const (lo, hi, imode);
16896 mask = gen_lowpart (mode, mask);
16898 if (vec_mode == VOIDmode)
16899 return force_reg (mode, mask);
16901 v = ix86_build_const_vector (vec_mode, vect, mask);
16902 return force_reg (vec_mode, v);
16905 /* Generate code for floating point ABS or NEG. */
16907 void
16908 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
16909 rtx operands[])
16911 rtx mask, set, dst, src;
16912 bool use_sse = false;
16913 bool vector_mode = VECTOR_MODE_P (mode);
16914 enum machine_mode vmode = mode;
16916 if (vector_mode)
16917 use_sse = true;
16918 else if (mode == TFmode)
16919 use_sse = true;
16920 else if (TARGET_SSE_MATH)
16922 use_sse = SSE_FLOAT_MODE_P (mode);
16923 if (mode == SFmode)
16924 vmode = V4SFmode;
16925 else if (mode == DFmode)
16926 vmode = V2DFmode;
16929 /* NEG and ABS performed with SSE use bitwise mask operations.
16930 Create the appropriate mask now. */
16931 if (use_sse)
16932 mask = ix86_build_signbit_mask (vmode, vector_mode, code == ABS);
16933 else
16934 mask = NULL_RTX;
16936 dst = operands[0];
16937 src = operands[1];
16939 set = gen_rtx_fmt_e (code, mode, src);
16940 set = gen_rtx_SET (VOIDmode, dst, set);
16942 if (mask)
16944 rtx use, clob;
16945 rtvec par;
16947 use = gen_rtx_USE (VOIDmode, mask);
16948 if (vector_mode)
16949 par = gen_rtvec (2, set, use);
16950 else
16952 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
16953 par = gen_rtvec (3, set, use, clob);
16955 emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
16957 else
16958 emit_insn (set);
16961 /* Expand a copysign operation. Special case operand 0 being a constant. */
16963 void
16964 ix86_expand_copysign (rtx operands[])
16966 enum machine_mode mode, vmode;
16967 rtx dest, op0, op1, mask, nmask;
16969 dest = operands[0];
16970 op0 = operands[1];
16971 op1 = operands[2];
16973 mode = GET_MODE (dest);
16975 if (mode == SFmode)
16976 vmode = V4SFmode;
16977 else if (mode == DFmode)
16978 vmode = V2DFmode;
16979 else
16980 vmode = mode;
16982 if (GET_CODE (op0) == CONST_DOUBLE)
16984 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
16986 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
16987 op0 = simplify_unary_operation (ABS, mode, op0, mode);
16989 if (mode == SFmode || mode == DFmode)
16991 if (op0 == CONST0_RTX (mode))
16992 op0 = CONST0_RTX (vmode);
16993 else
16995 rtx v = ix86_build_const_vector (vmode, false, op0);
16997 op0 = force_reg (vmode, v);
17000 else if (op0 != CONST0_RTX (mode))
17001 op0 = force_reg (mode, op0);
17003 mask = ix86_build_signbit_mask (vmode, 0, 0);
17005 if (mode == SFmode)
17006 copysign_insn = gen_copysignsf3_const;
17007 else if (mode == DFmode)
17008 copysign_insn = gen_copysigndf3_const;
17009 else
17010 copysign_insn = gen_copysigntf3_const;
17012 emit_insn (copysign_insn (dest, op0, op1, mask));
17014 else
17016 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
17018 nmask = ix86_build_signbit_mask (vmode, 0, 1);
17019 mask = ix86_build_signbit_mask (vmode, 0, 0);
17021 if (mode == SFmode)
17022 copysign_insn = gen_copysignsf3_var;
17023 else if (mode == DFmode)
17024 copysign_insn = gen_copysigndf3_var;
17025 else
17026 copysign_insn = gen_copysigntf3_var;
17028 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
17032 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
17033 be a constant, and so has already been expanded into a vector constant. */
17035 void
17036 ix86_split_copysign_const (rtx operands[])
17038 enum machine_mode mode, vmode;
17039 rtx dest, op0, mask, x;
17041 dest = operands[0];
17042 op0 = operands[1];
17043 mask = operands[3];
17045 mode = GET_MODE (dest);
17046 vmode = GET_MODE (mask);
17048 dest = simplify_gen_subreg (vmode, dest, mode, 0);
17049 x = gen_rtx_AND (vmode, dest, mask);
17050 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17052 if (op0 != CONST0_RTX (vmode))
17054 x = gen_rtx_IOR (vmode, dest, op0);
17055 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17059 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
17060 so we have to do two masks. */
17062 void
17063 ix86_split_copysign_var (rtx operands[])
17065 enum machine_mode mode, vmode;
17066 rtx dest, scratch, op0, op1, mask, nmask, x;
17068 dest = operands[0];
17069 scratch = operands[1];
17070 op0 = operands[2];
17071 op1 = operands[3];
17072 nmask = operands[4];
17073 mask = operands[5];
17075 mode = GET_MODE (dest);
17076 vmode = GET_MODE (mask);
17078 if (rtx_equal_p (op0, op1))
17080 /* Shouldn't happen often (it's useless, obviously), but when it does
17081 we'd generate incorrect code if we continue below. */
17082 emit_move_insn (dest, op0);
17083 return;
17086 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
17088 gcc_assert (REGNO (op1) == REGNO (scratch));
17090 x = gen_rtx_AND (vmode, scratch, mask);
17091 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
17093 dest = mask;
17094 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
17095 x = gen_rtx_NOT (vmode, dest);
17096 x = gen_rtx_AND (vmode, x, op0);
17097 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17099 else
17101 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
17103 x = gen_rtx_AND (vmode, scratch, mask);
17105 else /* alternative 2,4 */
17107 gcc_assert (REGNO (mask) == REGNO (scratch));
17108 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
17109 x = gen_rtx_AND (vmode, scratch, op1);
17111 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
17113 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
17115 dest = simplify_gen_subreg (vmode, op0, mode, 0);
17116 x = gen_rtx_AND (vmode, dest, nmask);
17118 else /* alternative 3,4 */
17120 gcc_assert (REGNO (nmask) == REGNO (dest));
17121 dest = nmask;
17122 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
17123 x = gen_rtx_AND (vmode, dest, op0);
17125 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17128 x = gen_rtx_IOR (vmode, dest, scratch);
17129 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17132 /* Return TRUE or FALSE depending on whether the first SET in INSN
17133 has source and destination with matching CC modes, and that the
17134 CC mode is at least as constrained as REQ_MODE. */
17136 bool
17137 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
17139 rtx set;
17140 enum machine_mode set_mode;
17142 set = PATTERN (insn);
17143 if (GET_CODE (set) == PARALLEL)
17144 set = XVECEXP (set, 0, 0);
17145 gcc_assert (GET_CODE (set) == SET);
17146 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
17148 set_mode = GET_MODE (SET_DEST (set));
17149 switch (set_mode)
17151 case CCNOmode:
17152 if (req_mode != CCNOmode
17153 && (req_mode != CCmode
17154 || XEXP (SET_SRC (set), 1) != const0_rtx))
17155 return false;
17156 break;
17157 case CCmode:
17158 if (req_mode == CCGCmode)
17159 return false;
17160 /* FALLTHRU */
17161 case CCGCmode:
17162 if (req_mode == CCGOCmode || req_mode == CCNOmode)
17163 return false;
17164 /* FALLTHRU */
17165 case CCGOCmode:
17166 if (req_mode == CCZmode)
17167 return false;
17168 /* FALLTHRU */
17169 case CCAmode:
17170 case CCCmode:
17171 case CCOmode:
17172 case CCSmode:
17173 case CCZmode:
17174 break;
17176 default:
17177 gcc_unreachable ();
17180 return GET_MODE (SET_SRC (set)) == set_mode;
17183 /* Generate insn patterns to do an integer compare of OPERANDS. */
17185 static rtx
17186 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
17188 enum machine_mode cmpmode;
17189 rtx tmp, flags;
17191 cmpmode = SELECT_CC_MODE (code, op0, op1);
17192 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
17194 /* This is very simple, but making the interface the same as in the
17195 FP case makes the rest of the code easier. */
17196 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
17197 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
17199 /* Return the test that should be put into the flags user, i.e.
17200 the bcc, scc, or cmov instruction. */
17201 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
17204 /* Figure out whether to use ordered or unordered fp comparisons.
17205 Return the appropriate mode to use. */
17207 enum machine_mode
17208 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
17210 /* ??? In order to make all comparisons reversible, we do all comparisons
17211 non-trapping when compiling for IEEE. Once gcc is able to distinguish
17212 all forms trapping and nontrapping comparisons, we can make inequality
17213 comparisons trapping again, since it results in better code when using
17214 FCOM based compares. */
17215 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
17218 enum machine_mode
17219 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
17221 enum machine_mode mode = GET_MODE (op0);
17223 if (SCALAR_FLOAT_MODE_P (mode))
17225 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
17226 return ix86_fp_compare_mode (code);
17229 switch (code)
17231 /* Only zero flag is needed. */
17232 case EQ: /* ZF=0 */
17233 case NE: /* ZF!=0 */
17234 return CCZmode;
17235 /* Codes needing carry flag. */
17236 case GEU: /* CF=0 */
17237 case LTU: /* CF=1 */
17238 /* Detect overflow checks. They need just the carry flag. */
17239 if (GET_CODE (op0) == PLUS
17240 && rtx_equal_p (op1, XEXP (op0, 0)))
17241 return CCCmode;
17242 else
17243 return CCmode;
17244 case GTU: /* CF=0 & ZF=0 */
17245 case LEU: /* CF=1 | ZF=1 */
17246 /* Detect overflow checks. They need just the carry flag. */
17247 if (GET_CODE (op0) == MINUS
17248 && rtx_equal_p (op1, XEXP (op0, 0)))
17249 return CCCmode;
17250 else
17251 return CCmode;
17252 /* Codes possibly doable only with sign flag when
17253 comparing against zero. */
17254 case GE: /* SF=OF or SF=0 */
17255 case LT: /* SF<>OF or SF=1 */
17256 if (op1 == const0_rtx)
17257 return CCGOCmode;
17258 else
17259 /* For other cases Carry flag is not required. */
17260 return CCGCmode;
17261 /* Codes doable only with sign flag when comparing
17262 against zero, but we miss jump instruction for it
17263 so we need to use relational tests against overflow
17264 that thus needs to be zero. */
17265 case GT: /* ZF=0 & SF=OF */
17266 case LE: /* ZF=1 | SF<>OF */
17267 if (op1 == const0_rtx)
17268 return CCNOmode;
17269 else
17270 return CCGCmode;
17271 /* strcmp pattern do (use flags) and combine may ask us for proper
17272 mode. */
17273 case USE:
17274 return CCmode;
17275 default:
17276 gcc_unreachable ();
17280 /* Return the fixed registers used for condition codes. */
17282 static bool
17283 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
17285 *p1 = FLAGS_REG;
17286 *p2 = FPSR_REG;
17287 return true;
17290 /* If two condition code modes are compatible, return a condition code
17291 mode which is compatible with both. Otherwise, return
17292 VOIDmode. */
17294 static enum machine_mode
17295 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
17297 if (m1 == m2)
17298 return m1;
17300 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
17301 return VOIDmode;
17303 if ((m1 == CCGCmode && m2 == CCGOCmode)
17304 || (m1 == CCGOCmode && m2 == CCGCmode))
17305 return CCGCmode;
17307 switch (m1)
17309 default:
17310 gcc_unreachable ();
17312 case CCmode:
17313 case CCGCmode:
17314 case CCGOCmode:
17315 case CCNOmode:
17316 case CCAmode:
17317 case CCCmode:
17318 case CCOmode:
17319 case CCSmode:
17320 case CCZmode:
17321 switch (m2)
17323 default:
17324 return VOIDmode;
17326 case CCmode:
17327 case CCGCmode:
17328 case CCGOCmode:
17329 case CCNOmode:
17330 case CCAmode:
17331 case CCCmode:
17332 case CCOmode:
17333 case CCSmode:
17334 case CCZmode:
17335 return CCmode;
17338 case CCFPmode:
17339 case CCFPUmode:
17340 /* These are only compatible with themselves, which we already
17341 checked above. */
17342 return VOIDmode;
17347 /* Return a comparison we can do and that it is equivalent to
17348 swap_condition (code) apart possibly from orderedness.
17349 But, never change orderedness if TARGET_IEEE_FP, returning
17350 UNKNOWN in that case if necessary. */
17352 static enum rtx_code
17353 ix86_fp_swap_condition (enum rtx_code code)
17355 switch (code)
17357 case GT: /* GTU - CF=0 & ZF=0 */
17358 return TARGET_IEEE_FP ? UNKNOWN : UNLT;
17359 case GE: /* GEU - CF=0 */
17360 return TARGET_IEEE_FP ? UNKNOWN : UNLE;
17361 case UNLT: /* LTU - CF=1 */
17362 return TARGET_IEEE_FP ? UNKNOWN : GT;
17363 case UNLE: /* LEU - CF=1 | ZF=1 */
17364 return TARGET_IEEE_FP ? UNKNOWN : GE;
17365 default:
17366 return swap_condition (code);
17370 /* Return cost of comparison CODE using the best strategy for performance.
17371 All following functions do use number of instructions as a cost metrics.
17372 In future this should be tweaked to compute bytes for optimize_size and
17373 take into account performance of various instructions on various CPUs. */
17375 static int
17376 ix86_fp_comparison_cost (enum rtx_code code)
17378 int arith_cost;
17380 /* The cost of code using bit-twiddling on %ah. */
17381 switch (code)
17383 case UNLE:
17384 case UNLT:
17385 case LTGT:
17386 case GT:
17387 case GE:
17388 case UNORDERED:
17389 case ORDERED:
17390 case UNEQ:
17391 arith_cost = 4;
17392 break;
17393 case LT:
17394 case NE:
17395 case EQ:
17396 case UNGE:
17397 arith_cost = TARGET_IEEE_FP ? 5 : 4;
17398 break;
17399 case LE:
17400 case UNGT:
17401 arith_cost = TARGET_IEEE_FP ? 6 : 4;
17402 break;
17403 default:
17404 gcc_unreachable ();
17407 switch (ix86_fp_comparison_strategy (code))
17409 case IX86_FPCMP_COMI:
17410 return arith_cost > 4 ? 3 : 2;
17411 case IX86_FPCMP_SAHF:
17412 return arith_cost > 4 ? 4 : 3;
17413 default:
17414 return arith_cost;
17418 /* Return strategy to use for floating-point. We assume that fcomi is always
17419 preferrable where available, since that is also true when looking at size
17420 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
17422 enum ix86_fpcmp_strategy
17423 ix86_fp_comparison_strategy (enum rtx_code code ATTRIBUTE_UNUSED)
17425 /* Do fcomi/sahf based test when profitable. */
17427 if (TARGET_CMOVE)
17428 return IX86_FPCMP_COMI;
17430 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_function_for_size_p (cfun)))
17431 return IX86_FPCMP_SAHF;
17433 return IX86_FPCMP_ARITH;
17436 /* Swap, force into registers, or otherwise massage the two operands
17437 to a fp comparison. The operands are updated in place; the new
17438 comparison code is returned. */
17440 static enum rtx_code
17441 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
17443 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
17444 rtx op0 = *pop0, op1 = *pop1;
17445 enum machine_mode op_mode = GET_MODE (op0);
17446 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
17448 /* All of the unordered compare instructions only work on registers.
17449 The same is true of the fcomi compare instructions. The XFmode
17450 compare instructions require registers except when comparing
17451 against zero or when converting operand 1 from fixed point to
17452 floating point. */
17454 if (!is_sse
17455 && (fpcmp_mode == CCFPUmode
17456 || (op_mode == XFmode
17457 && ! (standard_80387_constant_p (op0) == 1
17458 || standard_80387_constant_p (op1) == 1)
17459 && GET_CODE (op1) != FLOAT)
17460 || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
17462 op0 = force_reg (op_mode, op0);
17463 op1 = force_reg (op_mode, op1);
17465 else
17467 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
17468 things around if they appear profitable, otherwise force op0
17469 into a register. */
17471 if (standard_80387_constant_p (op0) == 0
17472 || (MEM_P (op0)
17473 && ! (standard_80387_constant_p (op1) == 0
17474 || MEM_P (op1))))
17476 enum rtx_code new_code = ix86_fp_swap_condition (code);
17477 if (new_code != UNKNOWN)
17479 rtx tmp;
17480 tmp = op0, op0 = op1, op1 = tmp;
17481 code = new_code;
17485 if (!REG_P (op0))
17486 op0 = force_reg (op_mode, op0);
17488 if (CONSTANT_P (op1))
17490 int tmp = standard_80387_constant_p (op1);
17491 if (tmp == 0)
17492 op1 = validize_mem (force_const_mem (op_mode, op1));
17493 else if (tmp == 1)
17495 if (TARGET_CMOVE)
17496 op1 = force_reg (op_mode, op1);
17498 else
17499 op1 = force_reg (op_mode, op1);
17503 /* Try to rearrange the comparison to make it cheaper. */
17504 if (ix86_fp_comparison_cost (code)
17505 > ix86_fp_comparison_cost (swap_condition (code))
17506 && (REG_P (op1) || can_create_pseudo_p ()))
17508 rtx tmp;
17509 tmp = op0, op0 = op1, op1 = tmp;
17510 code = swap_condition (code);
17511 if (!REG_P (op0))
17512 op0 = force_reg (op_mode, op0);
17515 *pop0 = op0;
17516 *pop1 = op1;
17517 return code;
17520 /* Convert comparison codes we use to represent FP comparison to integer
17521 code that will result in proper branch. Return UNKNOWN if no such code
17522 is available. */
17524 enum rtx_code
17525 ix86_fp_compare_code_to_integer (enum rtx_code code)
17527 switch (code)
17529 case GT:
17530 return GTU;
17531 case GE:
17532 return GEU;
17533 case ORDERED:
17534 case UNORDERED:
17535 return code;
17536 break;
17537 case UNEQ:
17538 return EQ;
17539 break;
17540 case UNLT:
17541 return LTU;
17542 break;
17543 case UNLE:
17544 return LEU;
17545 break;
17546 case LTGT:
17547 return NE;
17548 break;
17549 default:
17550 return UNKNOWN;
17554 /* Generate insn patterns to do a floating point compare of OPERANDS. */
17556 static rtx
17557 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch)
17559 enum machine_mode fpcmp_mode, intcmp_mode;
17560 rtx tmp, tmp2;
17562 fpcmp_mode = ix86_fp_compare_mode (code);
17563 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
17565 /* Do fcomi/sahf based test when profitable. */
17566 switch (ix86_fp_comparison_strategy (code))
17568 case IX86_FPCMP_COMI:
17569 intcmp_mode = fpcmp_mode;
17570 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
17571 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
17572 tmp);
17573 emit_insn (tmp);
17574 break;
17576 case IX86_FPCMP_SAHF:
17577 intcmp_mode = fpcmp_mode;
17578 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
17579 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
17580 tmp);
17582 if (!scratch)
17583 scratch = gen_reg_rtx (HImode);
17584 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
17585 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
17586 break;
17588 case IX86_FPCMP_ARITH:
17589 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
17590 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
17591 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
17592 if (!scratch)
17593 scratch = gen_reg_rtx (HImode);
17594 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
17596 /* In the unordered case, we have to check C2 for NaN's, which
17597 doesn't happen to work out to anything nice combination-wise.
17598 So do some bit twiddling on the value we've got in AH to come
17599 up with an appropriate set of condition codes. */
17601 intcmp_mode = CCNOmode;
17602 switch (code)
17604 case GT:
17605 case UNGT:
17606 if (code == GT || !TARGET_IEEE_FP)
17608 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
17609 code = EQ;
17611 else
17613 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17614 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
17615 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
17616 intcmp_mode = CCmode;
17617 code = GEU;
17619 break;
17620 case LT:
17621 case UNLT:
17622 if (code == LT && TARGET_IEEE_FP)
17624 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17625 emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx));
17626 intcmp_mode = CCmode;
17627 code = EQ;
17629 else
17631 emit_insn (gen_testqi_ext_ccno_0 (scratch, const1_rtx));
17632 code = NE;
17634 break;
17635 case GE:
17636 case UNGE:
17637 if (code == GE || !TARGET_IEEE_FP)
17639 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
17640 code = EQ;
17642 else
17644 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17645 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, const1_rtx));
17646 code = NE;
17648 break;
17649 case LE:
17650 case UNLE:
17651 if (code == LE && TARGET_IEEE_FP)
17653 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17654 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
17655 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
17656 intcmp_mode = CCmode;
17657 code = LTU;
17659 else
17661 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
17662 code = NE;
17664 break;
17665 case EQ:
17666 case UNEQ:
17667 if (code == EQ && TARGET_IEEE_FP)
17669 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17670 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
17671 intcmp_mode = CCmode;
17672 code = EQ;
17674 else
17676 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
17677 code = NE;
17679 break;
17680 case NE:
17681 case LTGT:
17682 if (code == NE && TARGET_IEEE_FP)
17684 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17685 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
17686 GEN_INT (0x40)));
17687 code = NE;
17689 else
17691 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
17692 code = EQ;
17694 break;
17696 case UNORDERED:
17697 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
17698 code = NE;
17699 break;
17700 case ORDERED:
17701 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
17702 code = EQ;
17703 break;
17705 default:
17706 gcc_unreachable ();
17708 break;
17710 default:
17711 gcc_unreachable();
17714 /* Return the test that should be put into the flags user, i.e.
17715 the bcc, scc, or cmov instruction. */
17716 return gen_rtx_fmt_ee (code, VOIDmode,
17717 gen_rtx_REG (intcmp_mode, FLAGS_REG),
17718 const0_rtx);
17721 static rtx
17722 ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1)
17724 rtx ret;
17726 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
17727 ret = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
17729 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
17731 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
17732 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
17734 else
17735 ret = ix86_expand_int_compare (code, op0, op1);
17737 return ret;
17740 void
17741 ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label)
17743 enum machine_mode mode = GET_MODE (op0);
17744 rtx tmp;
17746 switch (mode)
17748 case SFmode:
17749 case DFmode:
17750 case XFmode:
17751 case QImode:
17752 case HImode:
17753 case SImode:
17754 simple:
17755 tmp = ix86_expand_compare (code, op0, op1);
17756 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
17757 gen_rtx_LABEL_REF (VOIDmode, label),
17758 pc_rtx);
17759 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
17760 return;
17762 case DImode:
17763 if (TARGET_64BIT)
17764 goto simple;
17765 case TImode:
17766 /* Expand DImode branch into multiple compare+branch. */
17768 rtx lo[2], hi[2], label2;
17769 enum rtx_code code1, code2, code3;
17770 enum machine_mode submode;
17772 if (CONSTANT_P (op0) && !CONSTANT_P (op1))
17774 tmp = op0, op0 = op1, op1 = tmp;
17775 code = swap_condition (code);
17778 split_double_mode (mode, &op0, 1, lo+0, hi+0);
17779 split_double_mode (mode, &op1, 1, lo+1, hi+1);
17781 submode = mode == DImode ? SImode : DImode;
17783 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
17784 avoid two branches. This costs one extra insn, so disable when
17785 optimizing for size. */
17787 if ((code == EQ || code == NE)
17788 && (!optimize_insn_for_size_p ()
17789 || hi[1] == const0_rtx || lo[1] == const0_rtx))
17791 rtx xor0, xor1;
17793 xor1 = hi[0];
17794 if (hi[1] != const0_rtx)
17795 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
17796 NULL_RTX, 0, OPTAB_WIDEN);
17798 xor0 = lo[0];
17799 if (lo[1] != const0_rtx)
17800 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
17801 NULL_RTX, 0, OPTAB_WIDEN);
17803 tmp = expand_binop (submode, ior_optab, xor1, xor0,
17804 NULL_RTX, 0, OPTAB_WIDEN);
17806 ix86_expand_branch (code, tmp, const0_rtx, label);
17807 return;
17810 /* Otherwise, if we are doing less-than or greater-or-equal-than,
17811 op1 is a constant and the low word is zero, then we can just
17812 examine the high word. Similarly for low word -1 and
17813 less-or-equal-than or greater-than. */
17815 if (CONST_INT_P (hi[1]))
17816 switch (code)
17818 case LT: case LTU: case GE: case GEU:
17819 if (lo[1] == const0_rtx)
17821 ix86_expand_branch (code, hi[0], hi[1], label);
17822 return;
17824 break;
17825 case LE: case LEU: case GT: case GTU:
17826 if (lo[1] == constm1_rtx)
17828 ix86_expand_branch (code, hi[0], hi[1], label);
17829 return;
17831 break;
17832 default:
17833 break;
17836 /* Otherwise, we need two or three jumps. */
17838 label2 = gen_label_rtx ();
17840 code1 = code;
17841 code2 = swap_condition (code);
17842 code3 = unsigned_condition (code);
17844 switch (code)
17846 case LT: case GT: case LTU: case GTU:
17847 break;
17849 case LE: code1 = LT; code2 = GT; break;
17850 case GE: code1 = GT; code2 = LT; break;
17851 case LEU: code1 = LTU; code2 = GTU; break;
17852 case GEU: code1 = GTU; code2 = LTU; break;
17854 case EQ: code1 = UNKNOWN; code2 = NE; break;
17855 case NE: code2 = UNKNOWN; break;
17857 default:
17858 gcc_unreachable ();
17862 * a < b =>
17863 * if (hi(a) < hi(b)) goto true;
17864 * if (hi(a) > hi(b)) goto false;
17865 * if (lo(a) < lo(b)) goto true;
17866 * false:
17869 if (code1 != UNKNOWN)
17870 ix86_expand_branch (code1, hi[0], hi[1], label);
17871 if (code2 != UNKNOWN)
17872 ix86_expand_branch (code2, hi[0], hi[1], label2);
17874 ix86_expand_branch (code3, lo[0], lo[1], label);
17876 if (code2 != UNKNOWN)
17877 emit_label (label2);
17878 return;
17881 default:
17882 gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC);
17883 goto simple;
17887 /* Split branch based on floating point condition. */
17888 void
17889 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
17890 rtx target1, rtx target2, rtx tmp, rtx pushed)
17892 rtx condition;
17893 rtx i;
17895 if (target2 != pc_rtx)
17897 rtx tmp = target2;
17898 code = reverse_condition_maybe_unordered (code);
17899 target2 = target1;
17900 target1 = tmp;
17903 condition = ix86_expand_fp_compare (code, op1, op2,
17904 tmp);
17906 /* Remove pushed operand from stack. */
17907 if (pushed)
17908 ix86_free_from_memory (GET_MODE (pushed));
17910 i = emit_jump_insn (gen_rtx_SET
17911 (VOIDmode, pc_rtx,
17912 gen_rtx_IF_THEN_ELSE (VOIDmode,
17913 condition, target1, target2)));
17914 if (split_branch_probability >= 0)
17915 add_reg_note (i, REG_BR_PROB, GEN_INT (split_branch_probability));
17918 void
17919 ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1)
17921 rtx ret;
17923 gcc_assert (GET_MODE (dest) == QImode);
17925 ret = ix86_expand_compare (code, op0, op1);
17926 PUT_MODE (ret, QImode);
17927 emit_insn (gen_rtx_SET (VOIDmode, dest, ret));
17930 /* Expand comparison setting or clearing carry flag. Return true when
17931 successful and set pop for the operation. */
17932 static bool
17933 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
17935 enum machine_mode mode =
17936 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
17938 /* Do not handle double-mode compares that go through special path. */
17939 if (mode == (TARGET_64BIT ? TImode : DImode))
17940 return false;
17942 if (SCALAR_FLOAT_MODE_P (mode))
17944 rtx compare_op, compare_seq;
17946 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
17948 /* Shortcut: following common codes never translate
17949 into carry flag compares. */
17950 if (code == EQ || code == NE || code == UNEQ || code == LTGT
17951 || code == ORDERED || code == UNORDERED)
17952 return false;
17954 /* These comparisons require zero flag; swap operands so they won't. */
17955 if ((code == GT || code == UNLE || code == LE || code == UNGT)
17956 && !TARGET_IEEE_FP)
17958 rtx tmp = op0;
17959 op0 = op1;
17960 op1 = tmp;
17961 code = swap_condition (code);
17964 /* Try to expand the comparison and verify that we end up with
17965 carry flag based comparison. This fails to be true only when
17966 we decide to expand comparison using arithmetic that is not
17967 too common scenario. */
17968 start_sequence ();
17969 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
17970 compare_seq = get_insns ();
17971 end_sequence ();
17973 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
17974 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
17975 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
17976 else
17977 code = GET_CODE (compare_op);
17979 if (code != LTU && code != GEU)
17980 return false;
17982 emit_insn (compare_seq);
17983 *pop = compare_op;
17984 return true;
17987 if (!INTEGRAL_MODE_P (mode))
17988 return false;
17990 switch (code)
17992 case LTU:
17993 case GEU:
17994 break;
17996 /* Convert a==0 into (unsigned)a<1. */
17997 case EQ:
17998 case NE:
17999 if (op1 != const0_rtx)
18000 return false;
18001 op1 = const1_rtx;
18002 code = (code == EQ ? LTU : GEU);
18003 break;
18005 /* Convert a>b into b<a or a>=b-1. */
18006 case GTU:
18007 case LEU:
18008 if (CONST_INT_P (op1))
18010 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
18011 /* Bail out on overflow. We still can swap operands but that
18012 would force loading of the constant into register. */
18013 if (op1 == const0_rtx
18014 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
18015 return false;
18016 code = (code == GTU ? GEU : LTU);
18018 else
18020 rtx tmp = op1;
18021 op1 = op0;
18022 op0 = tmp;
18023 code = (code == GTU ? LTU : GEU);
18025 break;
18027 /* Convert a>=0 into (unsigned)a<0x80000000. */
18028 case LT:
18029 case GE:
18030 if (mode == DImode || op1 != const0_rtx)
18031 return false;
18032 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
18033 code = (code == LT ? GEU : LTU);
18034 break;
18035 case LE:
18036 case GT:
18037 if (mode == DImode || op1 != constm1_rtx)
18038 return false;
18039 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
18040 code = (code == LE ? GEU : LTU);
18041 break;
18043 default:
18044 return false;
18046 /* Swapping operands may cause constant to appear as first operand. */
18047 if (!nonimmediate_operand (op0, VOIDmode))
18049 if (!can_create_pseudo_p ())
18050 return false;
18051 op0 = force_reg (mode, op0);
18053 *pop = ix86_expand_compare (code, op0, op1);
18054 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
18055 return true;
18058 bool
18059 ix86_expand_int_movcc (rtx operands[])
18061 enum rtx_code code = GET_CODE (operands[1]), compare_code;
18062 rtx compare_seq, compare_op;
18063 enum machine_mode mode = GET_MODE (operands[0]);
18064 bool sign_bit_compare_p = false;
18065 rtx op0 = XEXP (operands[1], 0);
18066 rtx op1 = XEXP (operands[1], 1);
18068 start_sequence ();
18069 compare_op = ix86_expand_compare (code, op0, op1);
18070 compare_seq = get_insns ();
18071 end_sequence ();
18073 compare_code = GET_CODE (compare_op);
18075 if ((op1 == const0_rtx && (code == GE || code == LT))
18076 || (op1 == constm1_rtx && (code == GT || code == LE)))
18077 sign_bit_compare_p = true;
18079 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
18080 HImode insns, we'd be swallowed in word prefix ops. */
18082 if ((mode != HImode || TARGET_FAST_PREFIX)
18083 && (mode != (TARGET_64BIT ? TImode : DImode))
18084 && CONST_INT_P (operands[2])
18085 && CONST_INT_P (operands[3]))
18087 rtx out = operands[0];
18088 HOST_WIDE_INT ct = INTVAL (operands[2]);
18089 HOST_WIDE_INT cf = INTVAL (operands[3]);
18090 HOST_WIDE_INT diff;
18092 diff = ct - cf;
18093 /* Sign bit compares are better done using shifts than we do by using
18094 sbb. */
18095 if (sign_bit_compare_p
18096 || ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
18098 /* Detect overlap between destination and compare sources. */
18099 rtx tmp = out;
18101 if (!sign_bit_compare_p)
18103 rtx flags;
18104 bool fpcmp = false;
18106 compare_code = GET_CODE (compare_op);
18108 flags = XEXP (compare_op, 0);
18110 if (GET_MODE (flags) == CCFPmode
18111 || GET_MODE (flags) == CCFPUmode)
18113 fpcmp = true;
18114 compare_code
18115 = ix86_fp_compare_code_to_integer (compare_code);
18118 /* To simplify rest of code, restrict to the GEU case. */
18119 if (compare_code == LTU)
18121 HOST_WIDE_INT tmp = ct;
18122 ct = cf;
18123 cf = tmp;
18124 compare_code = reverse_condition (compare_code);
18125 code = reverse_condition (code);
18127 else
18129 if (fpcmp)
18130 PUT_CODE (compare_op,
18131 reverse_condition_maybe_unordered
18132 (GET_CODE (compare_op)));
18133 else
18134 PUT_CODE (compare_op,
18135 reverse_condition (GET_CODE (compare_op)));
18137 diff = ct - cf;
18139 if (reg_overlap_mentioned_p (out, op0)
18140 || reg_overlap_mentioned_p (out, op1))
18141 tmp = gen_reg_rtx (mode);
18143 if (mode == DImode)
18144 emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op));
18145 else
18146 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp),
18147 flags, compare_op));
18149 else
18151 if (code == GT || code == GE)
18152 code = reverse_condition (code);
18153 else
18155 HOST_WIDE_INT tmp = ct;
18156 ct = cf;
18157 cf = tmp;
18158 diff = ct - cf;
18160 tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1);
18163 if (diff == 1)
18166 * cmpl op0,op1
18167 * sbbl dest,dest
18168 * [addl dest, ct]
18170 * Size 5 - 8.
18172 if (ct)
18173 tmp = expand_simple_binop (mode, PLUS,
18174 tmp, GEN_INT (ct),
18175 copy_rtx (tmp), 1, OPTAB_DIRECT);
18177 else if (cf == -1)
18180 * cmpl op0,op1
18181 * sbbl dest,dest
18182 * orl $ct, dest
18184 * Size 8.
18186 tmp = expand_simple_binop (mode, IOR,
18187 tmp, GEN_INT (ct),
18188 copy_rtx (tmp), 1, OPTAB_DIRECT);
18190 else if (diff == -1 && ct)
18193 * cmpl op0,op1
18194 * sbbl dest,dest
18195 * notl dest
18196 * [addl dest, cf]
18198 * Size 8 - 11.
18200 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
18201 if (cf)
18202 tmp = expand_simple_binop (mode, PLUS,
18203 copy_rtx (tmp), GEN_INT (cf),
18204 copy_rtx (tmp), 1, OPTAB_DIRECT);
18206 else
18209 * cmpl op0,op1
18210 * sbbl dest,dest
18211 * [notl dest]
18212 * andl cf - ct, dest
18213 * [addl dest, ct]
18215 * Size 8 - 11.
18218 if (cf == 0)
18220 cf = ct;
18221 ct = 0;
18222 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
18225 tmp = expand_simple_binop (mode, AND,
18226 copy_rtx (tmp),
18227 gen_int_mode (cf - ct, mode),
18228 copy_rtx (tmp), 1, OPTAB_DIRECT);
18229 if (ct)
18230 tmp = expand_simple_binop (mode, PLUS,
18231 copy_rtx (tmp), GEN_INT (ct),
18232 copy_rtx (tmp), 1, OPTAB_DIRECT);
18235 if (!rtx_equal_p (tmp, out))
18236 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
18238 return true;
18241 if (diff < 0)
18243 enum machine_mode cmp_mode = GET_MODE (op0);
18245 HOST_WIDE_INT tmp;
18246 tmp = ct, ct = cf, cf = tmp;
18247 diff = -diff;
18249 if (SCALAR_FLOAT_MODE_P (cmp_mode))
18251 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
18253 /* We may be reversing unordered compare to normal compare, that
18254 is not valid in general (we may convert non-trapping condition
18255 to trapping one), however on i386 we currently emit all
18256 comparisons unordered. */
18257 compare_code = reverse_condition_maybe_unordered (compare_code);
18258 code = reverse_condition_maybe_unordered (code);
18260 else
18262 compare_code = reverse_condition (compare_code);
18263 code = reverse_condition (code);
18267 compare_code = UNKNOWN;
18268 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT
18269 && CONST_INT_P (op1))
18271 if (op1 == const0_rtx
18272 && (code == LT || code == GE))
18273 compare_code = code;
18274 else if (op1 == constm1_rtx)
18276 if (code == LE)
18277 compare_code = LT;
18278 else if (code == GT)
18279 compare_code = GE;
18283 /* Optimize dest = (op0 < 0) ? -1 : cf. */
18284 if (compare_code != UNKNOWN
18285 && GET_MODE (op0) == GET_MODE (out)
18286 && (cf == -1 || ct == -1))
18288 /* If lea code below could be used, only optimize
18289 if it results in a 2 insn sequence. */
18291 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
18292 || diff == 3 || diff == 5 || diff == 9)
18293 || (compare_code == LT && ct == -1)
18294 || (compare_code == GE && cf == -1))
18297 * notl op1 (if necessary)
18298 * sarl $31, op1
18299 * orl cf, op1
18301 if (ct != -1)
18303 cf = ct;
18304 ct = -1;
18305 code = reverse_condition (code);
18308 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
18310 out = expand_simple_binop (mode, IOR,
18311 out, GEN_INT (cf),
18312 out, 1, OPTAB_DIRECT);
18313 if (out != operands[0])
18314 emit_move_insn (operands[0], out);
18316 return true;
18321 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
18322 || diff == 3 || diff == 5 || diff == 9)
18323 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
18324 && (mode != DImode
18325 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
18328 * xorl dest,dest
18329 * cmpl op1,op2
18330 * setcc dest
18331 * lea cf(dest*(ct-cf)),dest
18333 * Size 14.
18335 * This also catches the degenerate setcc-only case.
18338 rtx tmp;
18339 int nops;
18341 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
18343 nops = 0;
18344 /* On x86_64 the lea instruction operates on Pmode, so we need
18345 to get arithmetics done in proper mode to match. */
18346 if (diff == 1)
18347 tmp = copy_rtx (out);
18348 else
18350 rtx out1;
18351 out1 = copy_rtx (out);
18352 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
18353 nops++;
18354 if (diff & 1)
18356 tmp = gen_rtx_PLUS (mode, tmp, out1);
18357 nops++;
18360 if (cf != 0)
18362 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
18363 nops++;
18365 if (!rtx_equal_p (tmp, out))
18367 if (nops == 1)
18368 out = force_operand (tmp, copy_rtx (out));
18369 else
18370 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
18372 if (!rtx_equal_p (out, operands[0]))
18373 emit_move_insn (operands[0], copy_rtx (out));
18375 return true;
18379 * General case: Jumpful:
18380 * xorl dest,dest cmpl op1, op2
18381 * cmpl op1, op2 movl ct, dest
18382 * setcc dest jcc 1f
18383 * decl dest movl cf, dest
18384 * andl (cf-ct),dest 1:
18385 * addl ct,dest
18387 * Size 20. Size 14.
18389 * This is reasonably steep, but branch mispredict costs are
18390 * high on modern cpus, so consider failing only if optimizing
18391 * for space.
18394 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
18395 && BRANCH_COST (optimize_insn_for_speed_p (),
18396 false) >= 2)
18398 if (cf == 0)
18400 enum machine_mode cmp_mode = GET_MODE (op0);
18402 cf = ct;
18403 ct = 0;
18405 if (SCALAR_FLOAT_MODE_P (cmp_mode))
18407 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
18409 /* We may be reversing unordered compare to normal compare,
18410 that is not valid in general (we may convert non-trapping
18411 condition to trapping one), however on i386 we currently
18412 emit all comparisons unordered. */
18413 code = reverse_condition_maybe_unordered (code);
18415 else
18417 code = reverse_condition (code);
18418 if (compare_code != UNKNOWN)
18419 compare_code = reverse_condition (compare_code);
18423 if (compare_code != UNKNOWN)
18425 /* notl op1 (if needed)
18426 sarl $31, op1
18427 andl (cf-ct), op1
18428 addl ct, op1
18430 For x < 0 (resp. x <= -1) there will be no notl,
18431 so if possible swap the constants to get rid of the
18432 complement.
18433 True/false will be -1/0 while code below (store flag
18434 followed by decrement) is 0/-1, so the constants need
18435 to be exchanged once more. */
18437 if (compare_code == GE || !cf)
18439 code = reverse_condition (code);
18440 compare_code = LT;
18442 else
18444 HOST_WIDE_INT tmp = cf;
18445 cf = ct;
18446 ct = tmp;
18449 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
18451 else
18453 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
18455 out = expand_simple_binop (mode, PLUS, copy_rtx (out),
18456 constm1_rtx,
18457 copy_rtx (out), 1, OPTAB_DIRECT);
18460 out = expand_simple_binop (mode, AND, copy_rtx (out),
18461 gen_int_mode (cf - ct, mode),
18462 copy_rtx (out), 1, OPTAB_DIRECT);
18463 if (ct)
18464 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
18465 copy_rtx (out), 1, OPTAB_DIRECT);
18466 if (!rtx_equal_p (out, operands[0]))
18467 emit_move_insn (operands[0], copy_rtx (out));
18469 return true;
18473 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
18475 /* Try a few things more with specific constants and a variable. */
18477 optab op;
18478 rtx var, orig_out, out, tmp;
18480 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
18481 return false;
18483 /* If one of the two operands is an interesting constant, load a
18484 constant with the above and mask it in with a logical operation. */
18486 if (CONST_INT_P (operands[2]))
18488 var = operands[3];
18489 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
18490 operands[3] = constm1_rtx, op = and_optab;
18491 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
18492 operands[3] = const0_rtx, op = ior_optab;
18493 else
18494 return false;
18496 else if (CONST_INT_P (operands[3]))
18498 var = operands[2];
18499 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
18500 operands[2] = constm1_rtx, op = and_optab;
18501 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
18502 operands[2] = const0_rtx, op = ior_optab;
18503 else
18504 return false;
18506 else
18507 return false;
18509 orig_out = operands[0];
18510 tmp = gen_reg_rtx (mode);
18511 operands[0] = tmp;
18513 /* Recurse to get the constant loaded. */
18514 if (ix86_expand_int_movcc (operands) == 0)
18515 return false;
18517 /* Mask in the interesting variable. */
18518 out = expand_binop (mode, op, var, tmp, orig_out, 0,
18519 OPTAB_WIDEN);
18520 if (!rtx_equal_p (out, orig_out))
18521 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
18523 return true;
18527 * For comparison with above,
18529 * movl cf,dest
18530 * movl ct,tmp
18531 * cmpl op1,op2
18532 * cmovcc tmp,dest
18534 * Size 15.
18537 if (! nonimmediate_operand (operands[2], mode))
18538 operands[2] = force_reg (mode, operands[2]);
18539 if (! nonimmediate_operand (operands[3], mode))
18540 operands[3] = force_reg (mode, operands[3]);
18542 if (! register_operand (operands[2], VOIDmode)
18543 && (mode == QImode
18544 || ! register_operand (operands[3], VOIDmode)))
18545 operands[2] = force_reg (mode, operands[2]);
18547 if (mode == QImode
18548 && ! register_operand (operands[3], VOIDmode))
18549 operands[3] = force_reg (mode, operands[3]);
18551 emit_insn (compare_seq);
18552 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
18553 gen_rtx_IF_THEN_ELSE (mode,
18554 compare_op, operands[2],
18555 operands[3])));
18556 return true;
18559 /* Swap, force into registers, or otherwise massage the two operands
18560 to an sse comparison with a mask result. Thus we differ a bit from
18561 ix86_prepare_fp_compare_args which expects to produce a flags result.
18563 The DEST operand exists to help determine whether to commute commutative
18564 operators. The POP0/POP1 operands are updated in place. The new
18565 comparison code is returned, or UNKNOWN if not implementable. */
18567 static enum rtx_code
18568 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
18569 rtx *pop0, rtx *pop1)
18571 rtx tmp;
18573 switch (code)
18575 case LTGT:
18576 case UNEQ:
18577 /* We have no LTGT as an operator. We could implement it with
18578 NE & ORDERED, but this requires an extra temporary. It's
18579 not clear that it's worth it. */
18580 return UNKNOWN;
18582 case LT:
18583 case LE:
18584 case UNGT:
18585 case UNGE:
18586 /* These are supported directly. */
18587 break;
18589 case EQ:
18590 case NE:
18591 case UNORDERED:
18592 case ORDERED:
18593 /* For commutative operators, try to canonicalize the destination
18594 operand to be first in the comparison - this helps reload to
18595 avoid extra moves. */
18596 if (!dest || !rtx_equal_p (dest, *pop1))
18597 break;
18598 /* FALLTHRU */
18600 case GE:
18601 case GT:
18602 case UNLE:
18603 case UNLT:
18604 /* These are not supported directly. Swap the comparison operands
18605 to transform into something that is supported. */
18606 tmp = *pop0;
18607 *pop0 = *pop1;
18608 *pop1 = tmp;
18609 code = swap_condition (code);
18610 break;
18612 default:
18613 gcc_unreachable ();
18616 return code;
18619 /* Detect conditional moves that exactly match min/max operational
18620 semantics. Note that this is IEEE safe, as long as we don't
18621 interchange the operands.
18623 Returns FALSE if this conditional move doesn't match a MIN/MAX,
18624 and TRUE if the operation is successful and instructions are emitted. */
18626 static bool
18627 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
18628 rtx cmp_op1, rtx if_true, rtx if_false)
18630 enum machine_mode mode;
18631 bool is_min;
18632 rtx tmp;
18634 if (code == LT)
18636 else if (code == UNGE)
18638 tmp = if_true;
18639 if_true = if_false;
18640 if_false = tmp;
18642 else
18643 return false;
18645 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
18646 is_min = true;
18647 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
18648 is_min = false;
18649 else
18650 return false;
18652 mode = GET_MODE (dest);
18654 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
18655 but MODE may be a vector mode and thus not appropriate. */
18656 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
18658 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
18659 rtvec v;
18661 if_true = force_reg (mode, if_true);
18662 v = gen_rtvec (2, if_true, if_false);
18663 tmp = gen_rtx_UNSPEC (mode, v, u);
18665 else
18667 code = is_min ? SMIN : SMAX;
18668 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
18671 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
18672 return true;
18675 /* Expand an sse vector comparison. Return the register with the result. */
18677 static rtx
18678 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
18679 rtx op_true, rtx op_false)
18681 enum machine_mode mode = GET_MODE (dest);
18682 rtx x;
18684 cmp_op0 = force_reg (mode, cmp_op0);
18685 if (!nonimmediate_operand (cmp_op1, mode))
18686 cmp_op1 = force_reg (mode, cmp_op1);
18688 if (optimize
18689 || reg_overlap_mentioned_p (dest, op_true)
18690 || reg_overlap_mentioned_p (dest, op_false))
18691 dest = gen_reg_rtx (mode);
18693 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
18694 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
18696 return dest;
18699 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
18700 operations. This is used for both scalar and vector conditional moves. */
18702 static void
18703 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
18705 enum machine_mode mode = GET_MODE (dest);
18706 rtx t2, t3, x;
18708 if (op_false == CONST0_RTX (mode))
18710 op_true = force_reg (mode, op_true);
18711 x = gen_rtx_AND (mode, cmp, op_true);
18712 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
18714 else if (op_true == CONST0_RTX (mode))
18716 op_false = force_reg (mode, op_false);
18717 x = gen_rtx_NOT (mode, cmp);
18718 x = gen_rtx_AND (mode, x, op_false);
18719 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
18721 else if (TARGET_XOP)
18723 rtx pcmov = gen_rtx_SET (mode, dest,
18724 gen_rtx_IF_THEN_ELSE (mode, cmp,
18725 op_true,
18726 op_false));
18727 emit_insn (pcmov);
18729 else
18731 op_true = force_reg (mode, op_true);
18732 op_false = force_reg (mode, op_false);
18734 t2 = gen_reg_rtx (mode);
18735 if (optimize)
18736 t3 = gen_reg_rtx (mode);
18737 else
18738 t3 = dest;
18740 x = gen_rtx_AND (mode, op_true, cmp);
18741 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
18743 x = gen_rtx_NOT (mode, cmp);
18744 x = gen_rtx_AND (mode, x, op_false);
18745 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
18747 x = gen_rtx_IOR (mode, t3, t2);
18748 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
18752 /* Expand a floating-point conditional move. Return true if successful. */
18754 bool
18755 ix86_expand_fp_movcc (rtx operands[])
18757 enum machine_mode mode = GET_MODE (operands[0]);
18758 enum rtx_code code = GET_CODE (operands[1]);
18759 rtx tmp, compare_op;
18760 rtx op0 = XEXP (operands[1], 0);
18761 rtx op1 = XEXP (operands[1], 1);
18763 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
18765 enum machine_mode cmode;
18767 /* Since we've no cmove for sse registers, don't force bad register
18768 allocation just to gain access to it. Deny movcc when the
18769 comparison mode doesn't match the move mode. */
18770 cmode = GET_MODE (op0);
18771 if (cmode == VOIDmode)
18772 cmode = GET_MODE (op1);
18773 if (cmode != mode)
18774 return false;
18776 code = ix86_prepare_sse_fp_compare_args (operands[0], code, &op0, &op1);
18777 if (code == UNKNOWN)
18778 return false;
18780 if (ix86_expand_sse_fp_minmax (operands[0], code, op0, op1,
18781 operands[2], operands[3]))
18782 return true;
18784 tmp = ix86_expand_sse_cmp (operands[0], code, op0, op1,
18785 operands[2], operands[3]);
18786 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
18787 return true;
18790 /* The floating point conditional move instructions don't directly
18791 support conditions resulting from a signed integer comparison. */
18793 compare_op = ix86_expand_compare (code, op0, op1);
18794 if (!fcmov_comparison_operator (compare_op, VOIDmode))
18796 tmp = gen_reg_rtx (QImode);
18797 ix86_expand_setcc (tmp, code, op0, op1);
18799 compare_op = ix86_expand_compare (NE, tmp, const0_rtx);
18802 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
18803 gen_rtx_IF_THEN_ELSE (mode, compare_op,
18804 operands[2], operands[3])));
18806 return true;
18809 /* Expand a floating-point vector conditional move; a vcond operation
18810 rather than a movcc operation. */
18812 bool
18813 ix86_expand_fp_vcond (rtx operands[])
18815 enum rtx_code code = GET_CODE (operands[3]);
18816 rtx cmp;
18818 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
18819 &operands[4], &operands[5]);
18820 if (code == UNKNOWN)
18821 return false;
18823 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
18824 operands[5], operands[1], operands[2]))
18825 return true;
18827 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
18828 operands[1], operands[2]);
18829 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
18830 return true;
18833 /* Expand a signed/unsigned integral vector conditional move. */
18835 bool
18836 ix86_expand_int_vcond (rtx operands[])
18838 enum machine_mode mode = GET_MODE (operands[0]);
18839 enum rtx_code code = GET_CODE (operands[3]);
18840 bool negate = false;
18841 rtx x, cop0, cop1;
18843 cop0 = operands[4];
18844 cop1 = operands[5];
18846 /* XOP supports all of the comparisons on all vector int types. */
18847 if (!TARGET_XOP)
18849 /* Canonicalize the comparison to EQ, GT, GTU. */
18850 switch (code)
18852 case EQ:
18853 case GT:
18854 case GTU:
18855 break;
18857 case NE:
18858 case LE:
18859 case LEU:
18860 code = reverse_condition (code);
18861 negate = true;
18862 break;
18864 case GE:
18865 case GEU:
18866 code = reverse_condition (code);
18867 negate = true;
18868 /* FALLTHRU */
18870 case LT:
18871 case LTU:
18872 code = swap_condition (code);
18873 x = cop0, cop0 = cop1, cop1 = x;
18874 break;
18876 default:
18877 gcc_unreachable ();
18880 /* Only SSE4.1/SSE4.2 supports V2DImode. */
18881 if (mode == V2DImode)
18883 switch (code)
18885 case EQ:
18886 /* SSE4.1 supports EQ. */
18887 if (!TARGET_SSE4_1)
18888 return false;
18889 break;
18891 case GT:
18892 case GTU:
18893 /* SSE4.2 supports GT/GTU. */
18894 if (!TARGET_SSE4_2)
18895 return false;
18896 break;
18898 default:
18899 gcc_unreachable ();
18903 /* Unsigned parallel compare is not supported by the hardware.
18904 Play some tricks to turn this into a signed comparison
18905 against 0. */
18906 if (code == GTU)
18908 cop0 = force_reg (mode, cop0);
18910 switch (mode)
18912 case V4SImode:
18913 case V2DImode:
18915 rtx t1, t2, mask;
18916 rtx (*gen_sub3) (rtx, rtx, rtx);
18918 /* Subtract (-(INT MAX) - 1) from both operands to make
18919 them signed. */
18920 mask = ix86_build_signbit_mask (mode, true, false);
18921 gen_sub3 = (mode == V4SImode
18922 ? gen_subv4si3 : gen_subv2di3);
18923 t1 = gen_reg_rtx (mode);
18924 emit_insn (gen_sub3 (t1, cop0, mask));
18926 t2 = gen_reg_rtx (mode);
18927 emit_insn (gen_sub3 (t2, cop1, mask));
18929 cop0 = t1;
18930 cop1 = t2;
18931 code = GT;
18933 break;
18935 case V16QImode:
18936 case V8HImode:
18937 /* Perform a parallel unsigned saturating subtraction. */
18938 x = gen_reg_rtx (mode);
18939 emit_insn (gen_rtx_SET (VOIDmode, x,
18940 gen_rtx_US_MINUS (mode, cop0, cop1)));
18942 cop0 = x;
18943 cop1 = CONST0_RTX (mode);
18944 code = EQ;
18945 negate = !negate;
18946 break;
18948 default:
18949 gcc_unreachable ();
18954 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
18955 operands[1+negate], operands[2-negate]);
18957 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
18958 operands[2-negate]);
18959 return true;
18962 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
18963 true if we should do zero extension, else sign extension. HIGH_P is
18964 true if we want the N/2 high elements, else the low elements. */
18966 void
18967 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
18969 enum machine_mode imode = GET_MODE (operands[1]);
18970 rtx (*unpack)(rtx, rtx, rtx);
18971 rtx se, dest;
18973 switch (imode)
18975 case V16QImode:
18976 if (high_p)
18977 unpack = gen_vec_interleave_highv16qi;
18978 else
18979 unpack = gen_vec_interleave_lowv16qi;
18980 break;
18981 case V8HImode:
18982 if (high_p)
18983 unpack = gen_vec_interleave_highv8hi;
18984 else
18985 unpack = gen_vec_interleave_lowv8hi;
18986 break;
18987 case V4SImode:
18988 if (high_p)
18989 unpack = gen_vec_interleave_highv4si;
18990 else
18991 unpack = gen_vec_interleave_lowv4si;
18992 break;
18993 default:
18994 gcc_unreachable ();
18997 dest = gen_lowpart (imode, operands[0]);
18999 if (unsigned_p)
19000 se = force_reg (imode, CONST0_RTX (imode));
19001 else
19002 se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
19003 operands[1], pc_rtx, pc_rtx);
19005 emit_insn (unpack (dest, operands[1], se));
19008 /* This function performs the same task as ix86_expand_sse_unpack,
19009 but with SSE4.1 instructions. */
19011 void
19012 ix86_expand_sse4_unpack (rtx operands[2], bool unsigned_p, bool high_p)
19014 enum machine_mode imode = GET_MODE (operands[1]);
19015 rtx (*unpack)(rtx, rtx);
19016 rtx src, dest;
19018 switch (imode)
19020 case V16QImode:
19021 if (unsigned_p)
19022 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
19023 else
19024 unpack = gen_sse4_1_sign_extendv8qiv8hi2;
19025 break;
19026 case V8HImode:
19027 if (unsigned_p)
19028 unpack = gen_sse4_1_zero_extendv4hiv4si2;
19029 else
19030 unpack = gen_sse4_1_sign_extendv4hiv4si2;
19031 break;
19032 case V4SImode:
19033 if (unsigned_p)
19034 unpack = gen_sse4_1_zero_extendv2siv2di2;
19035 else
19036 unpack = gen_sse4_1_sign_extendv2siv2di2;
19037 break;
19038 default:
19039 gcc_unreachable ();
19042 dest = operands[0];
19043 if (high_p)
19045 /* Shift higher 8 bytes to lower 8 bytes. */
19046 src = gen_reg_rtx (imode);
19047 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, src),
19048 gen_lowpart (V1TImode, operands[1]),
19049 GEN_INT (64)));
19051 else
19052 src = operands[1];
19054 emit_insn (unpack (dest, src));
19057 /* Expand conditional increment or decrement using adb/sbb instructions.
19058 The default case using setcc followed by the conditional move can be
19059 done by generic code. */
19060 bool
19061 ix86_expand_int_addcc (rtx operands[])
19063 enum rtx_code code = GET_CODE (operands[1]);
19064 rtx flags;
19065 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
19066 rtx compare_op;
19067 rtx val = const0_rtx;
19068 bool fpcmp = false;
19069 enum machine_mode mode;
19070 rtx op0 = XEXP (operands[1], 0);
19071 rtx op1 = XEXP (operands[1], 1);
19073 if (operands[3] != const1_rtx
19074 && operands[3] != constm1_rtx)
19075 return false;
19076 if (!ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
19077 return false;
19078 code = GET_CODE (compare_op);
19080 flags = XEXP (compare_op, 0);
19082 if (GET_MODE (flags) == CCFPmode
19083 || GET_MODE (flags) == CCFPUmode)
19085 fpcmp = true;
19086 code = ix86_fp_compare_code_to_integer (code);
19089 if (code != LTU)
19091 val = constm1_rtx;
19092 if (fpcmp)
19093 PUT_CODE (compare_op,
19094 reverse_condition_maybe_unordered
19095 (GET_CODE (compare_op)));
19096 else
19097 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
19100 mode = GET_MODE (operands[0]);
19102 /* Construct either adc or sbb insn. */
19103 if ((code == LTU) == (operands[3] == constm1_rtx))
19105 switch (mode)
19107 case QImode:
19108 insn = gen_subqi3_carry;
19109 break;
19110 case HImode:
19111 insn = gen_subhi3_carry;
19112 break;
19113 case SImode:
19114 insn = gen_subsi3_carry;
19115 break;
19116 case DImode:
19117 insn = gen_subdi3_carry;
19118 break;
19119 default:
19120 gcc_unreachable ();
19123 else
19125 switch (mode)
19127 case QImode:
19128 insn = gen_addqi3_carry;
19129 break;
19130 case HImode:
19131 insn = gen_addhi3_carry;
19132 break;
19133 case SImode:
19134 insn = gen_addsi3_carry;
19135 break;
19136 case DImode:
19137 insn = gen_adddi3_carry;
19138 break;
19139 default:
19140 gcc_unreachable ();
19143 emit_insn (insn (operands[0], operands[2], val, flags, compare_op));
19145 return true;
19149 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
19150 but works for floating pointer parameters and nonoffsetable memories.
19151 For pushes, it returns just stack offsets; the values will be saved
19152 in the right order. Maximally three parts are generated. */
19154 static int
19155 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
19157 int size;
19159 if (!TARGET_64BIT)
19160 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
19161 else
19162 size = (GET_MODE_SIZE (mode) + 4) / 8;
19164 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
19165 gcc_assert (size >= 2 && size <= 4);
19167 /* Optimize constant pool reference to immediates. This is used by fp
19168 moves, that force all constants to memory to allow combining. */
19169 if (MEM_P (operand) && MEM_READONLY_P (operand))
19171 rtx tmp = maybe_get_pool_constant (operand);
19172 if (tmp)
19173 operand = tmp;
19176 if (MEM_P (operand) && !offsettable_memref_p (operand))
19178 /* The only non-offsetable memories we handle are pushes. */
19179 int ok = push_operand (operand, VOIDmode);
19181 gcc_assert (ok);
19183 operand = copy_rtx (operand);
19184 PUT_MODE (operand, Pmode);
19185 parts[0] = parts[1] = parts[2] = parts[3] = operand;
19186 return size;
19189 if (GET_CODE (operand) == CONST_VECTOR)
19191 enum machine_mode imode = int_mode_for_mode (mode);
19192 /* Caution: if we looked through a constant pool memory above,
19193 the operand may actually have a different mode now. That's
19194 ok, since we want to pun this all the way back to an integer. */
19195 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
19196 gcc_assert (operand != NULL);
19197 mode = imode;
19200 if (!TARGET_64BIT)
19202 if (mode == DImode)
19203 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
19204 else
19206 int i;
19208 if (REG_P (operand))
19210 gcc_assert (reload_completed);
19211 for (i = 0; i < size; i++)
19212 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
19214 else if (offsettable_memref_p (operand))
19216 operand = adjust_address (operand, SImode, 0);
19217 parts[0] = operand;
19218 for (i = 1; i < size; i++)
19219 parts[i] = adjust_address (operand, SImode, 4 * i);
19221 else if (GET_CODE (operand) == CONST_DOUBLE)
19223 REAL_VALUE_TYPE r;
19224 long l[4];
19226 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
19227 switch (mode)
19229 case TFmode:
19230 real_to_target (l, &r, mode);
19231 parts[3] = gen_int_mode (l[3], SImode);
19232 parts[2] = gen_int_mode (l[2], SImode);
19233 break;
19234 case XFmode:
19235 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
19236 parts[2] = gen_int_mode (l[2], SImode);
19237 break;
19238 case DFmode:
19239 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
19240 break;
19241 default:
19242 gcc_unreachable ();
19244 parts[1] = gen_int_mode (l[1], SImode);
19245 parts[0] = gen_int_mode (l[0], SImode);
19247 else
19248 gcc_unreachable ();
19251 else
19253 if (mode == TImode)
19254 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
19255 if (mode == XFmode || mode == TFmode)
19257 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
19258 if (REG_P (operand))
19260 gcc_assert (reload_completed);
19261 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
19262 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
19264 else if (offsettable_memref_p (operand))
19266 operand = adjust_address (operand, DImode, 0);
19267 parts[0] = operand;
19268 parts[1] = adjust_address (operand, upper_mode, 8);
19270 else if (GET_CODE (operand) == CONST_DOUBLE)
19272 REAL_VALUE_TYPE r;
19273 long l[4];
19275 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
19276 real_to_target (l, &r, mode);
19278 /* Do not use shift by 32 to avoid warning on 32bit systems. */
19279 if (HOST_BITS_PER_WIDE_INT >= 64)
19280 parts[0]
19281 = gen_int_mode
19282 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
19283 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
19284 DImode);
19285 else
19286 parts[0] = immed_double_const (l[0], l[1], DImode);
19288 if (upper_mode == SImode)
19289 parts[1] = gen_int_mode (l[2], SImode);
19290 else if (HOST_BITS_PER_WIDE_INT >= 64)
19291 parts[1]
19292 = gen_int_mode
19293 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
19294 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
19295 DImode);
19296 else
19297 parts[1] = immed_double_const (l[2], l[3], DImode);
19299 else
19300 gcc_unreachable ();
19304 return size;
19307 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
19308 Return false when normal moves are needed; true when all required
19309 insns have been emitted. Operands 2-4 contain the input values
19310 int the correct order; operands 5-7 contain the output values. */
19312 void
19313 ix86_split_long_move (rtx operands[])
19315 rtx part[2][4];
19316 int nparts, i, j;
19317 int push = 0;
19318 int collisions = 0;
19319 enum machine_mode mode = GET_MODE (operands[0]);
19320 bool collisionparts[4];
19322 /* The DFmode expanders may ask us to move double.
19323 For 64bit target this is single move. By hiding the fact
19324 here we simplify i386.md splitters. */
19325 if (TARGET_64BIT && GET_MODE_SIZE (GET_MODE (operands[0])) == 8)
19327 /* Optimize constant pool reference to immediates. This is used by
19328 fp moves, that force all constants to memory to allow combining. */
19330 if (MEM_P (operands[1])
19331 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
19332 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
19333 operands[1] = get_pool_constant (XEXP (operands[1], 0));
19334 if (push_operand (operands[0], VOIDmode))
19336 operands[0] = copy_rtx (operands[0]);
19337 PUT_MODE (operands[0], Pmode);
19339 else
19340 operands[0] = gen_lowpart (DImode, operands[0]);
19341 operands[1] = gen_lowpart (DImode, operands[1]);
19342 emit_move_insn (operands[0], operands[1]);
19343 return;
19346 /* The only non-offsettable memory we handle is push. */
19347 if (push_operand (operands[0], VOIDmode))
19348 push = 1;
19349 else
19350 gcc_assert (!MEM_P (operands[0])
19351 || offsettable_memref_p (operands[0]));
19353 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
19354 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
19356 /* When emitting push, take care for source operands on the stack. */
19357 if (push && MEM_P (operands[1])
19358 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
19360 rtx src_base = XEXP (part[1][nparts - 1], 0);
19362 /* Compensate for the stack decrement by 4. */
19363 if (!TARGET_64BIT && nparts == 3
19364 && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
19365 src_base = plus_constant (src_base, 4);
19367 /* src_base refers to the stack pointer and is
19368 automatically decreased by emitted push. */
19369 for (i = 0; i < nparts; i++)
19370 part[1][i] = change_address (part[1][i],
19371 GET_MODE (part[1][i]), src_base);
19374 /* We need to do copy in the right order in case an address register
19375 of the source overlaps the destination. */
19376 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
19378 rtx tmp;
19380 for (i = 0; i < nparts; i++)
19382 collisionparts[i]
19383 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
19384 if (collisionparts[i])
19385 collisions++;
19388 /* Collision in the middle part can be handled by reordering. */
19389 if (collisions == 1 && nparts == 3 && collisionparts [1])
19391 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
19392 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
19394 else if (collisions == 1
19395 && nparts == 4
19396 && (collisionparts [1] || collisionparts [2]))
19398 if (collisionparts [1])
19400 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
19401 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
19403 else
19405 tmp = part[0][2]; part[0][2] = part[0][3]; part[0][3] = tmp;
19406 tmp = part[1][2]; part[1][2] = part[1][3]; part[1][3] = tmp;
19410 /* If there are more collisions, we can't handle it by reordering.
19411 Do an lea to the last part and use only one colliding move. */
19412 else if (collisions > 1)
19414 rtx base;
19416 collisions = 1;
19418 base = part[0][nparts - 1];
19420 /* Handle the case when the last part isn't valid for lea.
19421 Happens in 64-bit mode storing the 12-byte XFmode. */
19422 if (GET_MODE (base) != Pmode)
19423 base = gen_rtx_REG (Pmode, REGNO (base));
19425 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
19426 part[1][0] = replace_equiv_address (part[1][0], base);
19427 for (i = 1; i < nparts; i++)
19429 tmp = plus_constant (base, UNITS_PER_WORD * i);
19430 part[1][i] = replace_equiv_address (part[1][i], tmp);
19435 if (push)
19437 if (!TARGET_64BIT)
19439 if (nparts == 3)
19441 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
19442 emit_insn (gen_addsi3 (stack_pointer_rtx,
19443 stack_pointer_rtx, GEN_INT (-4)));
19444 emit_move_insn (part[0][2], part[1][2]);
19446 else if (nparts == 4)
19448 emit_move_insn (part[0][3], part[1][3]);
19449 emit_move_insn (part[0][2], part[1][2]);
19452 else
19454 /* In 64bit mode we don't have 32bit push available. In case this is
19455 register, it is OK - we will just use larger counterpart. We also
19456 retype memory - these comes from attempt to avoid REX prefix on
19457 moving of second half of TFmode value. */
19458 if (GET_MODE (part[1][1]) == SImode)
19460 switch (GET_CODE (part[1][1]))
19462 case MEM:
19463 part[1][1] = adjust_address (part[1][1], DImode, 0);
19464 break;
19466 case REG:
19467 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
19468 break;
19470 default:
19471 gcc_unreachable ();
19474 if (GET_MODE (part[1][0]) == SImode)
19475 part[1][0] = part[1][1];
19478 emit_move_insn (part[0][1], part[1][1]);
19479 emit_move_insn (part[0][0], part[1][0]);
19480 return;
19483 /* Choose correct order to not overwrite the source before it is copied. */
19484 if ((REG_P (part[0][0])
19485 && REG_P (part[1][1])
19486 && (REGNO (part[0][0]) == REGNO (part[1][1])
19487 || (nparts == 3
19488 && REGNO (part[0][0]) == REGNO (part[1][2]))
19489 || (nparts == 4
19490 && REGNO (part[0][0]) == REGNO (part[1][3]))))
19491 || (collisions > 0
19492 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
19494 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
19496 operands[2 + i] = part[0][j];
19497 operands[6 + i] = part[1][j];
19500 else
19502 for (i = 0; i < nparts; i++)
19504 operands[2 + i] = part[0][i];
19505 operands[6 + i] = part[1][i];
19509 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
19510 if (optimize_insn_for_size_p ())
19512 for (j = 0; j < nparts - 1; j++)
19513 if (CONST_INT_P (operands[6 + j])
19514 && operands[6 + j] != const0_rtx
19515 && REG_P (operands[2 + j]))
19516 for (i = j; i < nparts - 1; i++)
19517 if (CONST_INT_P (operands[7 + i])
19518 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
19519 operands[7 + i] = operands[2 + j];
19522 for (i = 0; i < nparts; i++)
19523 emit_move_insn (operands[2 + i], operands[6 + i]);
19525 return;
19528 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
19529 left shift by a constant, either using a single shift or
19530 a sequence of add instructions. */
19532 static void
19533 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
19535 rtx (*insn)(rtx, rtx, rtx);
19537 if (count == 1
19538 || (count * ix86_cost->add <= ix86_cost->shift_const
19539 && !optimize_insn_for_size_p ()))
19541 insn = mode == DImode ? gen_addsi3 : gen_adddi3;
19542 while (count-- > 0)
19543 emit_insn (insn (operand, operand, operand));
19545 else
19547 insn = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
19548 emit_insn (insn (operand, operand, GEN_INT (count)));
19552 void
19553 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
19555 rtx (*gen_ashl3)(rtx, rtx, rtx);
19556 rtx (*gen_shld)(rtx, rtx, rtx);
19557 int half_width = GET_MODE_BITSIZE (mode) >> 1;
19559 rtx low[2], high[2];
19560 int count;
19562 if (CONST_INT_P (operands[2]))
19564 split_double_mode (mode, operands, 2, low, high);
19565 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
19567 if (count >= half_width)
19569 emit_move_insn (high[0], low[1]);
19570 emit_move_insn (low[0], const0_rtx);
19572 if (count > half_width)
19573 ix86_expand_ashl_const (high[0], count - half_width, mode);
19575 else
19577 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
19579 if (!rtx_equal_p (operands[0], operands[1]))
19580 emit_move_insn (operands[0], operands[1]);
19582 emit_insn (gen_shld (high[0], low[0], GEN_INT (count)));
19583 ix86_expand_ashl_const (low[0], count, mode);
19585 return;
19588 split_double_mode (mode, operands, 1, low, high);
19590 gen_ashl3 = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
19592 if (operands[1] == const1_rtx)
19594 /* Assuming we've chosen a QImode capable registers, then 1 << N
19595 can be done with two 32/64-bit shifts, no branches, no cmoves. */
19596 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
19598 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
19600 ix86_expand_clear (low[0]);
19601 ix86_expand_clear (high[0]);
19602 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (half_width)));
19604 d = gen_lowpart (QImode, low[0]);
19605 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
19606 s = gen_rtx_EQ (QImode, flags, const0_rtx);
19607 emit_insn (gen_rtx_SET (VOIDmode, d, s));
19609 d = gen_lowpart (QImode, high[0]);
19610 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
19611 s = gen_rtx_NE (QImode, flags, const0_rtx);
19612 emit_insn (gen_rtx_SET (VOIDmode, d, s));
19615 /* Otherwise, we can get the same results by manually performing
19616 a bit extract operation on bit 5/6, and then performing the two
19617 shifts. The two methods of getting 0/1 into low/high are exactly
19618 the same size. Avoiding the shift in the bit extract case helps
19619 pentium4 a bit; no one else seems to care much either way. */
19620 else
19622 enum machine_mode half_mode;
19623 rtx (*gen_lshr3)(rtx, rtx, rtx);
19624 rtx (*gen_and3)(rtx, rtx, rtx);
19625 rtx (*gen_xor3)(rtx, rtx, rtx);
19626 HOST_WIDE_INT bits;
19627 rtx x;
19629 if (mode == DImode)
19631 half_mode = SImode;
19632 gen_lshr3 = gen_lshrsi3;
19633 gen_and3 = gen_andsi3;
19634 gen_xor3 = gen_xorsi3;
19635 bits = 5;
19637 else
19639 half_mode = DImode;
19640 gen_lshr3 = gen_lshrdi3;
19641 gen_and3 = gen_anddi3;
19642 gen_xor3 = gen_xordi3;
19643 bits = 6;
19646 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
19647 x = gen_rtx_ZERO_EXTEND (half_mode, operands[2]);
19648 else
19649 x = gen_lowpart (half_mode, operands[2]);
19650 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
19652 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (bits)));
19653 emit_insn (gen_and3 (high[0], high[0], const1_rtx));
19654 emit_move_insn (low[0], high[0]);
19655 emit_insn (gen_xor3 (low[0], low[0], const1_rtx));
19658 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
19659 emit_insn (gen_ashl3 (high[0], high[0], operands[2]));
19660 return;
19663 if (operands[1] == constm1_rtx)
19665 /* For -1 << N, we can avoid the shld instruction, because we
19666 know that we're shifting 0...31/63 ones into a -1. */
19667 emit_move_insn (low[0], constm1_rtx);
19668 if (optimize_insn_for_size_p ())
19669 emit_move_insn (high[0], low[0]);
19670 else
19671 emit_move_insn (high[0], constm1_rtx);
19673 else
19675 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
19677 if (!rtx_equal_p (operands[0], operands[1]))
19678 emit_move_insn (operands[0], operands[1]);
19680 split_double_mode (mode, operands, 1, low, high);
19681 emit_insn (gen_shld (high[0], low[0], operands[2]));
19684 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
19686 if (TARGET_CMOVE && scratch)
19688 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
19689 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
19691 ix86_expand_clear (scratch);
19692 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
19694 else
19696 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
19697 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
19699 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
19703 void
19704 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
19706 rtx (*gen_ashr3)(rtx, rtx, rtx)
19707 = mode == DImode ? gen_ashrsi3 : gen_ashrdi3;
19708 rtx (*gen_shrd)(rtx, rtx, rtx);
19709 int half_width = GET_MODE_BITSIZE (mode) >> 1;
19711 rtx low[2], high[2];
19712 int count;
19714 if (CONST_INT_P (operands[2]))
19716 split_double_mode (mode, operands, 2, low, high);
19717 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
19719 if (count == GET_MODE_BITSIZE (mode) - 1)
19721 emit_move_insn (high[0], high[1]);
19722 emit_insn (gen_ashr3 (high[0], high[0],
19723 GEN_INT (half_width - 1)));
19724 emit_move_insn (low[0], high[0]);
19727 else if (count >= half_width)
19729 emit_move_insn (low[0], high[1]);
19730 emit_move_insn (high[0], low[0]);
19731 emit_insn (gen_ashr3 (high[0], high[0],
19732 GEN_INT (half_width - 1)));
19734 if (count > half_width)
19735 emit_insn (gen_ashr3 (low[0], low[0],
19736 GEN_INT (count - half_width)));
19738 else
19740 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
19742 if (!rtx_equal_p (operands[0], operands[1]))
19743 emit_move_insn (operands[0], operands[1]);
19745 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
19746 emit_insn (gen_ashr3 (high[0], high[0], GEN_INT (count)));
19749 else
19751 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
19753 if (!rtx_equal_p (operands[0], operands[1]))
19754 emit_move_insn (operands[0], operands[1]);
19756 split_double_mode (mode, operands, 1, low, high);
19758 emit_insn (gen_shrd (low[0], high[0], operands[2]));
19759 emit_insn (gen_ashr3 (high[0], high[0], operands[2]));
19761 if (TARGET_CMOVE && scratch)
19763 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
19764 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
19766 emit_move_insn (scratch, high[0]);
19767 emit_insn (gen_ashr3 (scratch, scratch,
19768 GEN_INT (half_width - 1)));
19769 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
19770 scratch));
19772 else
19774 rtx (*gen_x86_shift_adj_3)(rtx, rtx, rtx)
19775 = mode == DImode ? gen_x86_shiftsi_adj_3 : gen_x86_shiftdi_adj_3;
19777 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
19782 void
19783 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
19785 rtx (*gen_lshr3)(rtx, rtx, rtx)
19786 = mode == DImode ? gen_lshrsi3 : gen_lshrdi3;
19787 rtx (*gen_shrd)(rtx, rtx, rtx);
19788 int half_width = GET_MODE_BITSIZE (mode) >> 1;
19790 rtx low[2], high[2];
19791 int count;
19793 if (CONST_INT_P (operands[2]))
19795 split_double_mode (mode, operands, 2, low, high);
19796 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
19798 if (count >= half_width)
19800 emit_move_insn (low[0], high[1]);
19801 ix86_expand_clear (high[0]);
19803 if (count > half_width)
19804 emit_insn (gen_lshr3 (low[0], low[0],
19805 GEN_INT (count - half_width)));
19807 else
19809 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
19811 if (!rtx_equal_p (operands[0], operands[1]))
19812 emit_move_insn (operands[0], operands[1]);
19814 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
19815 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (count)));
19818 else
19820 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
19822 if (!rtx_equal_p (operands[0], operands[1]))
19823 emit_move_insn (operands[0], operands[1]);
19825 split_double_mode (mode, operands, 1, low, high);
19827 emit_insn (gen_shrd (low[0], high[0], operands[2]));
19828 emit_insn (gen_lshr3 (high[0], high[0], operands[2]));
19830 if (TARGET_CMOVE && scratch)
19832 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
19833 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
19835 ix86_expand_clear (scratch);
19836 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
19837 scratch));
19839 else
19841 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
19842 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
19844 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
19849 /* Predict just emitted jump instruction to be taken with probability PROB. */
19850 static void
19851 predict_jump (int prob)
19853 rtx insn = get_last_insn ();
19854 gcc_assert (JUMP_P (insn));
19855 add_reg_note (insn, REG_BR_PROB, GEN_INT (prob));
19858 /* Helper function for the string operations below. Dest VARIABLE whether
19859 it is aligned to VALUE bytes. If true, jump to the label. */
19860 static rtx
19861 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
19863 rtx label = gen_label_rtx ();
19864 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
19865 if (GET_MODE (variable) == DImode)
19866 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
19867 else
19868 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
19869 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
19870 1, label);
19871 if (epilogue)
19872 predict_jump (REG_BR_PROB_BASE * 50 / 100);
19873 else
19874 predict_jump (REG_BR_PROB_BASE * 90 / 100);
19875 return label;
19878 /* Adjust COUNTER by the VALUE. */
19879 static void
19880 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
19882 rtx (*gen_add)(rtx, rtx, rtx)
19883 = GET_MODE (countreg) == DImode ? gen_adddi3 : gen_addsi3;
19885 emit_insn (gen_add (countreg, countreg, GEN_INT (-value)));
19888 /* Zero extend possibly SImode EXP to Pmode register. */
19890 ix86_zero_extend_to_Pmode (rtx exp)
19892 rtx r;
19893 if (GET_MODE (exp) == VOIDmode)
19894 return force_reg (Pmode, exp);
19895 if (GET_MODE (exp) == Pmode)
19896 return copy_to_mode_reg (Pmode, exp);
19897 r = gen_reg_rtx (Pmode);
19898 emit_insn (gen_zero_extendsidi2 (r, exp));
19899 return r;
19902 /* Divide COUNTREG by SCALE. */
19903 static rtx
19904 scale_counter (rtx countreg, int scale)
19906 rtx sc;
19908 if (scale == 1)
19909 return countreg;
19910 if (CONST_INT_P (countreg))
19911 return GEN_INT (INTVAL (countreg) / scale);
19912 gcc_assert (REG_P (countreg));
19914 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
19915 GEN_INT (exact_log2 (scale)),
19916 NULL, 1, OPTAB_DIRECT);
19917 return sc;
19920 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
19921 DImode for constant loop counts. */
19923 static enum machine_mode
19924 counter_mode (rtx count_exp)
19926 if (GET_MODE (count_exp) != VOIDmode)
19927 return GET_MODE (count_exp);
19928 if (!CONST_INT_P (count_exp))
19929 return Pmode;
19930 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
19931 return DImode;
19932 return SImode;
19935 /* When SRCPTR is non-NULL, output simple loop to move memory
19936 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
19937 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
19938 equivalent loop to set memory by VALUE (supposed to be in MODE).
19940 The size is rounded down to whole number of chunk size moved at once.
19941 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
19944 static void
19945 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
19946 rtx destptr, rtx srcptr, rtx value,
19947 rtx count, enum machine_mode mode, int unroll,
19948 int expected_size)
19950 rtx out_label, top_label, iter, tmp;
19951 enum machine_mode iter_mode = counter_mode (count);
19952 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
19953 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
19954 rtx size;
19955 rtx x_addr;
19956 rtx y_addr;
19957 int i;
19959 top_label = gen_label_rtx ();
19960 out_label = gen_label_rtx ();
19961 iter = gen_reg_rtx (iter_mode);
19963 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
19964 NULL, 1, OPTAB_DIRECT);
19965 /* Those two should combine. */
19966 if (piece_size == const1_rtx)
19968 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
19969 true, out_label);
19970 predict_jump (REG_BR_PROB_BASE * 10 / 100);
19972 emit_move_insn (iter, const0_rtx);
19974 emit_label (top_label);
19976 tmp = convert_modes (Pmode, iter_mode, iter, true);
19977 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
19978 destmem = change_address (destmem, mode, x_addr);
19980 if (srcmem)
19982 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
19983 srcmem = change_address (srcmem, mode, y_addr);
19985 /* When unrolling for chips that reorder memory reads and writes,
19986 we can save registers by using single temporary.
19987 Also using 4 temporaries is overkill in 32bit mode. */
19988 if (!TARGET_64BIT && 0)
19990 for (i = 0; i < unroll; i++)
19992 if (i)
19994 destmem =
19995 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
19996 srcmem =
19997 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
19999 emit_move_insn (destmem, srcmem);
20002 else
20004 rtx tmpreg[4];
20005 gcc_assert (unroll <= 4);
20006 for (i = 0; i < unroll; i++)
20008 tmpreg[i] = gen_reg_rtx (mode);
20009 if (i)
20011 srcmem =
20012 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
20014 emit_move_insn (tmpreg[i], srcmem);
20016 for (i = 0; i < unroll; i++)
20018 if (i)
20020 destmem =
20021 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
20023 emit_move_insn (destmem, tmpreg[i]);
20027 else
20028 for (i = 0; i < unroll; i++)
20030 if (i)
20031 destmem =
20032 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
20033 emit_move_insn (destmem, value);
20036 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
20037 true, OPTAB_LIB_WIDEN);
20038 if (tmp != iter)
20039 emit_move_insn (iter, tmp);
20041 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
20042 true, top_label);
20043 if (expected_size != -1)
20045 expected_size /= GET_MODE_SIZE (mode) * unroll;
20046 if (expected_size == 0)
20047 predict_jump (0);
20048 else if (expected_size > REG_BR_PROB_BASE)
20049 predict_jump (REG_BR_PROB_BASE - 1);
20050 else
20051 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
20053 else
20054 predict_jump (REG_BR_PROB_BASE * 80 / 100);
20055 iter = ix86_zero_extend_to_Pmode (iter);
20056 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
20057 true, OPTAB_LIB_WIDEN);
20058 if (tmp != destptr)
20059 emit_move_insn (destptr, tmp);
20060 if (srcptr)
20062 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
20063 true, OPTAB_LIB_WIDEN);
20064 if (tmp != srcptr)
20065 emit_move_insn (srcptr, tmp);
20067 emit_label (out_label);
20070 /* Output "rep; mov" instruction.
20071 Arguments have same meaning as for previous function */
20072 static void
20073 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
20074 rtx destptr, rtx srcptr,
20075 rtx count,
20076 enum machine_mode mode)
20078 rtx destexp;
20079 rtx srcexp;
20080 rtx countreg;
20082 /* If the size is known, it is shorter to use rep movs. */
20083 if (mode == QImode && CONST_INT_P (count)
20084 && !(INTVAL (count) & 3))
20085 mode = SImode;
20087 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
20088 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
20089 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
20090 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
20091 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
20092 if (mode != QImode)
20094 destexp = gen_rtx_ASHIFT (Pmode, countreg,
20095 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
20096 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
20097 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
20098 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
20099 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
20101 else
20103 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
20104 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
20106 if (CONST_INT_P (count))
20108 count = GEN_INT (INTVAL (count)
20109 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
20110 destmem = shallow_copy_rtx (destmem);
20111 srcmem = shallow_copy_rtx (srcmem);
20112 set_mem_size (destmem, count);
20113 set_mem_size (srcmem, count);
20115 else
20117 if (MEM_SIZE (destmem))
20118 set_mem_size (destmem, NULL_RTX);
20119 if (MEM_SIZE (srcmem))
20120 set_mem_size (srcmem, NULL_RTX);
20122 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
20123 destexp, srcexp));
20126 /* Output "rep; stos" instruction.
20127 Arguments have same meaning as for previous function */
20128 static void
20129 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
20130 rtx count, enum machine_mode mode,
20131 rtx orig_value)
20133 rtx destexp;
20134 rtx countreg;
20136 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
20137 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
20138 value = force_reg (mode, gen_lowpart (mode, value));
20139 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
20140 if (mode != QImode)
20142 destexp = gen_rtx_ASHIFT (Pmode, countreg,
20143 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
20144 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
20146 else
20147 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
20148 if (orig_value == const0_rtx && CONST_INT_P (count))
20150 count = GEN_INT (INTVAL (count)
20151 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
20152 destmem = shallow_copy_rtx (destmem);
20153 set_mem_size (destmem, count);
20155 else if (MEM_SIZE (destmem))
20156 set_mem_size (destmem, NULL_RTX);
20157 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
20160 static void
20161 emit_strmov (rtx destmem, rtx srcmem,
20162 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
20164 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
20165 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
20166 emit_insn (gen_strmov (destptr, dest, srcptr, src));
20169 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
20170 static void
20171 expand_movmem_epilogue (rtx destmem, rtx srcmem,
20172 rtx destptr, rtx srcptr, rtx count, int max_size)
20174 rtx src, dest;
20175 if (CONST_INT_P (count))
20177 HOST_WIDE_INT countval = INTVAL (count);
20178 int offset = 0;
20180 if ((countval & 0x10) && max_size > 16)
20182 if (TARGET_64BIT)
20184 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
20185 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
20187 else
20188 gcc_unreachable ();
20189 offset += 16;
20191 if ((countval & 0x08) && max_size > 8)
20193 if (TARGET_64BIT)
20194 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
20195 else
20197 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
20198 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
20200 offset += 8;
20202 if ((countval & 0x04) && max_size > 4)
20204 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
20205 offset += 4;
20207 if ((countval & 0x02) && max_size > 2)
20209 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
20210 offset += 2;
20212 if ((countval & 0x01) && max_size > 1)
20214 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
20215 offset += 1;
20217 return;
20219 if (max_size > 8)
20221 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
20222 count, 1, OPTAB_DIRECT);
20223 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
20224 count, QImode, 1, 4);
20225 return;
20228 /* When there are stringops, we can cheaply increase dest and src pointers.
20229 Otherwise we save code size by maintaining offset (zero is readily
20230 available from preceding rep operation) and using x86 addressing modes.
20232 if (TARGET_SINGLE_STRINGOP)
20234 if (max_size > 4)
20236 rtx label = ix86_expand_aligntest (count, 4, true);
20237 src = change_address (srcmem, SImode, srcptr);
20238 dest = change_address (destmem, SImode, destptr);
20239 emit_insn (gen_strmov (destptr, dest, srcptr, src));
20240 emit_label (label);
20241 LABEL_NUSES (label) = 1;
20243 if (max_size > 2)
20245 rtx label = ix86_expand_aligntest (count, 2, true);
20246 src = change_address (srcmem, HImode, srcptr);
20247 dest = change_address (destmem, HImode, destptr);
20248 emit_insn (gen_strmov (destptr, dest, srcptr, src));
20249 emit_label (label);
20250 LABEL_NUSES (label) = 1;
20252 if (max_size > 1)
20254 rtx label = ix86_expand_aligntest (count, 1, true);
20255 src = change_address (srcmem, QImode, srcptr);
20256 dest = change_address (destmem, QImode, destptr);
20257 emit_insn (gen_strmov (destptr, dest, srcptr, src));
20258 emit_label (label);
20259 LABEL_NUSES (label) = 1;
20262 else
20264 rtx offset = force_reg (Pmode, const0_rtx);
20265 rtx tmp;
20267 if (max_size > 4)
20269 rtx label = ix86_expand_aligntest (count, 4, true);
20270 src = change_address (srcmem, SImode, srcptr);
20271 dest = change_address (destmem, SImode, destptr);
20272 emit_move_insn (dest, src);
20273 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
20274 true, OPTAB_LIB_WIDEN);
20275 if (tmp != offset)
20276 emit_move_insn (offset, tmp);
20277 emit_label (label);
20278 LABEL_NUSES (label) = 1;
20280 if (max_size > 2)
20282 rtx label = ix86_expand_aligntest (count, 2, true);
20283 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
20284 src = change_address (srcmem, HImode, tmp);
20285 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
20286 dest = change_address (destmem, HImode, tmp);
20287 emit_move_insn (dest, src);
20288 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
20289 true, OPTAB_LIB_WIDEN);
20290 if (tmp != offset)
20291 emit_move_insn (offset, tmp);
20292 emit_label (label);
20293 LABEL_NUSES (label) = 1;
20295 if (max_size > 1)
20297 rtx label = ix86_expand_aligntest (count, 1, true);
20298 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
20299 src = change_address (srcmem, QImode, tmp);
20300 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
20301 dest = change_address (destmem, QImode, tmp);
20302 emit_move_insn (dest, src);
20303 emit_label (label);
20304 LABEL_NUSES (label) = 1;
20309 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
20310 static void
20311 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
20312 rtx count, int max_size)
20314 count =
20315 expand_simple_binop (counter_mode (count), AND, count,
20316 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
20317 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
20318 gen_lowpart (QImode, value), count, QImode,
20319 1, max_size / 2);
20322 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
20323 static void
20324 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
20326 rtx dest;
20328 if (CONST_INT_P (count))
20330 HOST_WIDE_INT countval = INTVAL (count);
20331 int offset = 0;
20333 if ((countval & 0x10) && max_size > 16)
20335 if (TARGET_64BIT)
20337 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
20338 emit_insn (gen_strset (destptr, dest, value));
20339 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
20340 emit_insn (gen_strset (destptr, dest, value));
20342 else
20343 gcc_unreachable ();
20344 offset += 16;
20346 if ((countval & 0x08) && max_size > 8)
20348 if (TARGET_64BIT)
20350 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
20351 emit_insn (gen_strset (destptr, dest, value));
20353 else
20355 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
20356 emit_insn (gen_strset (destptr, dest, value));
20357 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
20358 emit_insn (gen_strset (destptr, dest, value));
20360 offset += 8;
20362 if ((countval & 0x04) && max_size > 4)
20364 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
20365 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
20366 offset += 4;
20368 if ((countval & 0x02) && max_size > 2)
20370 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
20371 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
20372 offset += 2;
20374 if ((countval & 0x01) && max_size > 1)
20376 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
20377 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
20378 offset += 1;
20380 return;
20382 if (max_size > 32)
20384 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
20385 return;
20387 if (max_size > 16)
20389 rtx label = ix86_expand_aligntest (count, 16, true);
20390 if (TARGET_64BIT)
20392 dest = change_address (destmem, DImode, destptr);
20393 emit_insn (gen_strset (destptr, dest, value));
20394 emit_insn (gen_strset (destptr, dest, value));
20396 else
20398 dest = change_address (destmem, SImode, destptr);
20399 emit_insn (gen_strset (destptr, dest, value));
20400 emit_insn (gen_strset (destptr, dest, value));
20401 emit_insn (gen_strset (destptr, dest, value));
20402 emit_insn (gen_strset (destptr, dest, value));
20404 emit_label (label);
20405 LABEL_NUSES (label) = 1;
20407 if (max_size > 8)
20409 rtx label = ix86_expand_aligntest (count, 8, true);
20410 if (TARGET_64BIT)
20412 dest = change_address (destmem, DImode, destptr);
20413 emit_insn (gen_strset (destptr, dest, value));
20415 else
20417 dest = change_address (destmem, SImode, destptr);
20418 emit_insn (gen_strset (destptr, dest, value));
20419 emit_insn (gen_strset (destptr, dest, value));
20421 emit_label (label);
20422 LABEL_NUSES (label) = 1;
20424 if (max_size > 4)
20426 rtx label = ix86_expand_aligntest (count, 4, true);
20427 dest = change_address (destmem, SImode, destptr);
20428 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
20429 emit_label (label);
20430 LABEL_NUSES (label) = 1;
20432 if (max_size > 2)
20434 rtx label = ix86_expand_aligntest (count, 2, true);
20435 dest = change_address (destmem, HImode, destptr);
20436 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
20437 emit_label (label);
20438 LABEL_NUSES (label) = 1;
20440 if (max_size > 1)
20442 rtx label = ix86_expand_aligntest (count, 1, true);
20443 dest = change_address (destmem, QImode, destptr);
20444 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
20445 emit_label (label);
20446 LABEL_NUSES (label) = 1;
20450 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
20451 DESIRED_ALIGNMENT. */
20452 static void
20453 expand_movmem_prologue (rtx destmem, rtx srcmem,
20454 rtx destptr, rtx srcptr, rtx count,
20455 int align, int desired_alignment)
20457 if (align <= 1 && desired_alignment > 1)
20459 rtx label = ix86_expand_aligntest (destptr, 1, false);
20460 srcmem = change_address (srcmem, QImode, srcptr);
20461 destmem = change_address (destmem, QImode, destptr);
20462 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
20463 ix86_adjust_counter (count, 1);
20464 emit_label (label);
20465 LABEL_NUSES (label) = 1;
20467 if (align <= 2 && desired_alignment > 2)
20469 rtx label = ix86_expand_aligntest (destptr, 2, false);
20470 srcmem = change_address (srcmem, HImode, srcptr);
20471 destmem = change_address (destmem, HImode, destptr);
20472 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
20473 ix86_adjust_counter (count, 2);
20474 emit_label (label);
20475 LABEL_NUSES (label) = 1;
20477 if (align <= 4 && desired_alignment > 4)
20479 rtx label = ix86_expand_aligntest (destptr, 4, false);
20480 srcmem = change_address (srcmem, SImode, srcptr);
20481 destmem = change_address (destmem, SImode, destptr);
20482 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
20483 ix86_adjust_counter (count, 4);
20484 emit_label (label);
20485 LABEL_NUSES (label) = 1;
20487 gcc_assert (desired_alignment <= 8);
20490 /* Copy enough from DST to SRC to align DST known to DESIRED_ALIGN.
20491 ALIGN_BYTES is how many bytes need to be copied. */
20492 static rtx
20493 expand_constant_movmem_prologue (rtx dst, rtx *srcp, rtx destreg, rtx srcreg,
20494 int desired_align, int align_bytes)
20496 rtx src = *srcp;
20497 rtx src_size, dst_size;
20498 int off = 0;
20499 int src_align_bytes = get_mem_align_offset (src, desired_align * BITS_PER_UNIT);
20500 if (src_align_bytes >= 0)
20501 src_align_bytes = desired_align - src_align_bytes;
20502 src_size = MEM_SIZE (src);
20503 dst_size = MEM_SIZE (dst);
20504 if (align_bytes & 1)
20506 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
20507 src = adjust_automodify_address_nv (src, QImode, srcreg, 0);
20508 off = 1;
20509 emit_insn (gen_strmov (destreg, dst, srcreg, src));
20511 if (align_bytes & 2)
20513 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
20514 src = adjust_automodify_address_nv (src, HImode, srcreg, off);
20515 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
20516 set_mem_align (dst, 2 * BITS_PER_UNIT);
20517 if (src_align_bytes >= 0
20518 && (src_align_bytes & 1) == (align_bytes & 1)
20519 && MEM_ALIGN (src) < 2 * BITS_PER_UNIT)
20520 set_mem_align (src, 2 * BITS_PER_UNIT);
20521 off = 2;
20522 emit_insn (gen_strmov (destreg, dst, srcreg, src));
20524 if (align_bytes & 4)
20526 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
20527 src = adjust_automodify_address_nv (src, SImode, srcreg, off);
20528 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
20529 set_mem_align (dst, 4 * BITS_PER_UNIT);
20530 if (src_align_bytes >= 0)
20532 unsigned int src_align = 0;
20533 if ((src_align_bytes & 3) == (align_bytes & 3))
20534 src_align = 4;
20535 else if ((src_align_bytes & 1) == (align_bytes & 1))
20536 src_align = 2;
20537 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
20538 set_mem_align (src, src_align * BITS_PER_UNIT);
20540 off = 4;
20541 emit_insn (gen_strmov (destreg, dst, srcreg, src));
20543 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
20544 src = adjust_automodify_address_nv (src, BLKmode, srcreg, off);
20545 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
20546 set_mem_align (dst, desired_align * BITS_PER_UNIT);
20547 if (src_align_bytes >= 0)
20549 unsigned int src_align = 0;
20550 if ((src_align_bytes & 7) == (align_bytes & 7))
20551 src_align = 8;
20552 else if ((src_align_bytes & 3) == (align_bytes & 3))
20553 src_align = 4;
20554 else if ((src_align_bytes & 1) == (align_bytes & 1))
20555 src_align = 2;
20556 if (src_align > (unsigned int) desired_align)
20557 src_align = desired_align;
20558 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
20559 set_mem_align (src, src_align * BITS_PER_UNIT);
20561 if (dst_size)
20562 set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
20563 if (src_size)
20564 set_mem_size (dst, GEN_INT (INTVAL (src_size) - align_bytes));
20565 *srcp = src;
20566 return dst;
20569 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
20570 DESIRED_ALIGNMENT. */
20571 static void
20572 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
20573 int align, int desired_alignment)
20575 if (align <= 1 && desired_alignment > 1)
20577 rtx label = ix86_expand_aligntest (destptr, 1, false);
20578 destmem = change_address (destmem, QImode, destptr);
20579 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
20580 ix86_adjust_counter (count, 1);
20581 emit_label (label);
20582 LABEL_NUSES (label) = 1;
20584 if (align <= 2 && desired_alignment > 2)
20586 rtx label = ix86_expand_aligntest (destptr, 2, false);
20587 destmem = change_address (destmem, HImode, destptr);
20588 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
20589 ix86_adjust_counter (count, 2);
20590 emit_label (label);
20591 LABEL_NUSES (label) = 1;
20593 if (align <= 4 && desired_alignment > 4)
20595 rtx label = ix86_expand_aligntest (destptr, 4, false);
20596 destmem = change_address (destmem, SImode, destptr);
20597 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
20598 ix86_adjust_counter (count, 4);
20599 emit_label (label);
20600 LABEL_NUSES (label) = 1;
20602 gcc_assert (desired_alignment <= 8);
20605 /* Set enough from DST to align DST known to by aligned by ALIGN to
20606 DESIRED_ALIGN. ALIGN_BYTES is how many bytes need to be stored. */
20607 static rtx
20608 expand_constant_setmem_prologue (rtx dst, rtx destreg, rtx value,
20609 int desired_align, int align_bytes)
20611 int off = 0;
20612 rtx dst_size = MEM_SIZE (dst);
20613 if (align_bytes & 1)
20615 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
20616 off = 1;
20617 emit_insn (gen_strset (destreg, dst,
20618 gen_lowpart (QImode, value)));
20620 if (align_bytes & 2)
20622 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
20623 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
20624 set_mem_align (dst, 2 * BITS_PER_UNIT);
20625 off = 2;
20626 emit_insn (gen_strset (destreg, dst,
20627 gen_lowpart (HImode, value)));
20629 if (align_bytes & 4)
20631 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
20632 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
20633 set_mem_align (dst, 4 * BITS_PER_UNIT);
20634 off = 4;
20635 emit_insn (gen_strset (destreg, dst,
20636 gen_lowpart (SImode, value)));
20638 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
20639 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
20640 set_mem_align (dst, desired_align * BITS_PER_UNIT);
20641 if (dst_size)
20642 set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
20643 return dst;
20646 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
20647 static enum stringop_alg
20648 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
20649 int *dynamic_check)
20651 const struct stringop_algs * algs;
20652 bool optimize_for_speed;
20653 /* Algorithms using the rep prefix want at least edi and ecx;
20654 additionally, memset wants eax and memcpy wants esi. Don't
20655 consider such algorithms if the user has appropriated those
20656 registers for their own purposes. */
20657 bool rep_prefix_usable = !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
20658 || (memset
20659 ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
20661 #define ALG_USABLE_P(alg) (rep_prefix_usable \
20662 || (alg != rep_prefix_1_byte \
20663 && alg != rep_prefix_4_byte \
20664 && alg != rep_prefix_8_byte))
20665 const struct processor_costs *cost;
20667 /* Even if the string operation call is cold, we still might spend a lot
20668 of time processing large blocks. */
20669 if (optimize_function_for_size_p (cfun)
20670 || (optimize_insn_for_size_p ()
20671 && expected_size != -1 && expected_size < 256))
20672 optimize_for_speed = false;
20673 else
20674 optimize_for_speed = true;
20676 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
20678 *dynamic_check = -1;
20679 if (memset)
20680 algs = &cost->memset[TARGET_64BIT != 0];
20681 else
20682 algs = &cost->memcpy[TARGET_64BIT != 0];
20683 if (stringop_alg != no_stringop && ALG_USABLE_P (stringop_alg))
20684 return stringop_alg;
20685 /* rep; movq or rep; movl is the smallest variant. */
20686 else if (!optimize_for_speed)
20688 if (!count || (count & 3))
20689 return rep_prefix_usable ? rep_prefix_1_byte : loop_1_byte;
20690 else
20691 return rep_prefix_usable ? rep_prefix_4_byte : loop;
20693 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
20695 else if (expected_size != -1 && expected_size < 4)
20696 return loop_1_byte;
20697 else if (expected_size != -1)
20699 unsigned int i;
20700 enum stringop_alg alg = libcall;
20701 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
20703 /* We get here if the algorithms that were not libcall-based
20704 were rep-prefix based and we are unable to use rep prefixes
20705 based on global register usage. Break out of the loop and
20706 use the heuristic below. */
20707 if (algs->size[i].max == 0)
20708 break;
20709 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
20711 enum stringop_alg candidate = algs->size[i].alg;
20713 if (candidate != libcall && ALG_USABLE_P (candidate))
20714 alg = candidate;
20715 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
20716 last non-libcall inline algorithm. */
20717 if (TARGET_INLINE_ALL_STRINGOPS)
20719 /* When the current size is best to be copied by a libcall,
20720 but we are still forced to inline, run the heuristic below
20721 that will pick code for medium sized blocks. */
20722 if (alg != libcall)
20723 return alg;
20724 break;
20726 else if (ALG_USABLE_P (candidate))
20727 return candidate;
20730 gcc_assert (TARGET_INLINE_ALL_STRINGOPS || !rep_prefix_usable);
20732 /* When asked to inline the call anyway, try to pick meaningful choice.
20733 We look for maximal size of block that is faster to copy by hand and
20734 take blocks of at most of that size guessing that average size will
20735 be roughly half of the block.
20737 If this turns out to be bad, we might simply specify the preferred
20738 choice in ix86_costs. */
20739 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
20740 && (algs->unknown_size == libcall || !ALG_USABLE_P (algs->unknown_size)))
20742 int max = -1;
20743 enum stringop_alg alg;
20744 int i;
20745 bool any_alg_usable_p = true;
20747 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
20749 enum stringop_alg candidate = algs->size[i].alg;
20750 any_alg_usable_p = any_alg_usable_p && ALG_USABLE_P (candidate);
20752 if (candidate != libcall && candidate
20753 && ALG_USABLE_P (candidate))
20754 max = algs->size[i].max;
20756 /* If there aren't any usable algorithms, then recursing on
20757 smaller sizes isn't going to find anything. Just return the
20758 simple byte-at-a-time copy loop. */
20759 if (!any_alg_usable_p)
20761 /* Pick something reasonable. */
20762 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
20763 *dynamic_check = 128;
20764 return loop_1_byte;
20766 if (max == -1)
20767 max = 4096;
20768 alg = decide_alg (count, max / 2, memset, dynamic_check);
20769 gcc_assert (*dynamic_check == -1);
20770 gcc_assert (alg != libcall);
20771 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
20772 *dynamic_check = max;
20773 return alg;
20775 return ALG_USABLE_P (algs->unknown_size) ? algs->unknown_size : libcall;
20776 #undef ALG_USABLE_P
20779 /* Decide on alignment. We know that the operand is already aligned to ALIGN
20780 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
20781 static int
20782 decide_alignment (int align,
20783 enum stringop_alg alg,
20784 int expected_size)
20786 int desired_align = 0;
20787 switch (alg)
20789 case no_stringop:
20790 gcc_unreachable ();
20791 case loop:
20792 case unrolled_loop:
20793 desired_align = GET_MODE_SIZE (Pmode);
20794 break;
20795 case rep_prefix_8_byte:
20796 desired_align = 8;
20797 break;
20798 case rep_prefix_4_byte:
20799 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
20800 copying whole cacheline at once. */
20801 if (TARGET_PENTIUMPRO)
20802 desired_align = 8;
20803 else
20804 desired_align = 4;
20805 break;
20806 case rep_prefix_1_byte:
20807 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
20808 copying whole cacheline at once. */
20809 if (TARGET_PENTIUMPRO)
20810 desired_align = 8;
20811 else
20812 desired_align = 1;
20813 break;
20814 case loop_1_byte:
20815 desired_align = 1;
20816 break;
20817 case libcall:
20818 return 0;
20821 if (optimize_size)
20822 desired_align = 1;
20823 if (desired_align < align)
20824 desired_align = align;
20825 if (expected_size != -1 && expected_size < 4)
20826 desired_align = align;
20827 return desired_align;
20830 /* Return the smallest power of 2 greater than VAL. */
20831 static int
20832 smallest_pow2_greater_than (int val)
20834 int ret = 1;
20835 while (ret <= val)
20836 ret <<= 1;
20837 return ret;
20840 /* Expand string move (memcpy) operation. Use i386 string operations when
20841 profitable. expand_setmem contains similar code. The code depends upon
20842 architecture, block size and alignment, but always has the same
20843 overall structure:
20845 1) Prologue guard: Conditional that jumps up to epilogues for small
20846 blocks that can be handled by epilogue alone. This is faster but
20847 also needed for correctness, since prologue assume the block is larger
20848 than the desired alignment.
20850 Optional dynamic check for size and libcall for large
20851 blocks is emitted here too, with -minline-stringops-dynamically.
20853 2) Prologue: copy first few bytes in order to get destination aligned
20854 to DESIRED_ALIGN. It is emitted only when ALIGN is less than
20855 DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
20856 We emit either a jump tree on power of two sized blocks, or a byte loop.
20858 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
20859 with specified algorithm.
20861 4) Epilogue: code copying tail of the block that is too small to be
20862 handled by main body (or up to size guarded by prologue guard). */
20864 bool
20865 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
20866 rtx expected_align_exp, rtx expected_size_exp)
20868 rtx destreg;
20869 rtx srcreg;
20870 rtx label = NULL;
20871 rtx tmp;
20872 rtx jump_around_label = NULL;
20873 HOST_WIDE_INT align = 1;
20874 unsigned HOST_WIDE_INT count = 0;
20875 HOST_WIDE_INT expected_size = -1;
20876 int size_needed = 0, epilogue_size_needed;
20877 int desired_align = 0, align_bytes = 0;
20878 enum stringop_alg alg;
20879 int dynamic_check;
20880 bool need_zero_guard = false;
20882 if (CONST_INT_P (align_exp))
20883 align = INTVAL (align_exp);
20884 /* i386 can do misaligned access on reasonably increased cost. */
20885 if (CONST_INT_P (expected_align_exp)
20886 && INTVAL (expected_align_exp) > align)
20887 align = INTVAL (expected_align_exp);
20888 /* ALIGN is the minimum of destination and source alignment, but we care here
20889 just about destination alignment. */
20890 else if (MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
20891 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
20893 if (CONST_INT_P (count_exp))
20894 count = expected_size = INTVAL (count_exp);
20895 if (CONST_INT_P (expected_size_exp) && count == 0)
20896 expected_size = INTVAL (expected_size_exp);
20898 /* Make sure we don't need to care about overflow later on. */
20899 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
20900 return false;
20902 /* Step 0: Decide on preferred algorithm, desired alignment and
20903 size of chunks to be copied by main loop. */
20905 alg = decide_alg (count, expected_size, false, &dynamic_check);
20906 desired_align = decide_alignment (align, alg, expected_size);
20908 if (!TARGET_ALIGN_STRINGOPS)
20909 align = desired_align;
20911 if (alg == libcall)
20912 return false;
20913 gcc_assert (alg != no_stringop);
20914 if (!count)
20915 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
20916 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
20917 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
20918 switch (alg)
20920 case libcall:
20921 case no_stringop:
20922 gcc_unreachable ();
20923 case loop:
20924 need_zero_guard = true;
20925 size_needed = GET_MODE_SIZE (Pmode);
20926 break;
20927 case unrolled_loop:
20928 need_zero_guard = true;
20929 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
20930 break;
20931 case rep_prefix_8_byte:
20932 size_needed = 8;
20933 break;
20934 case rep_prefix_4_byte:
20935 size_needed = 4;
20936 break;
20937 case rep_prefix_1_byte:
20938 size_needed = 1;
20939 break;
20940 case loop_1_byte:
20941 need_zero_guard = true;
20942 size_needed = 1;
20943 break;
20946 epilogue_size_needed = size_needed;
20948 /* Step 1: Prologue guard. */
20950 /* Alignment code needs count to be in register. */
20951 if (CONST_INT_P (count_exp) && desired_align > align)
20953 if (INTVAL (count_exp) > desired_align
20954 && INTVAL (count_exp) > size_needed)
20956 align_bytes
20957 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
20958 if (align_bytes <= 0)
20959 align_bytes = 0;
20960 else
20961 align_bytes = desired_align - align_bytes;
20963 if (align_bytes == 0)
20964 count_exp = force_reg (counter_mode (count_exp), count_exp);
20966 gcc_assert (desired_align >= 1 && align >= 1);
20968 /* Ensure that alignment prologue won't copy past end of block. */
20969 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
20971 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
20972 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
20973 Make sure it is power of 2. */
20974 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
20976 if (count)
20978 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
20980 /* If main algorithm works on QImode, no epilogue is needed.
20981 For small sizes just don't align anything. */
20982 if (size_needed == 1)
20983 desired_align = align;
20984 else
20985 goto epilogue;
20988 else
20990 label = gen_label_rtx ();
20991 emit_cmp_and_jump_insns (count_exp,
20992 GEN_INT (epilogue_size_needed),
20993 LTU, 0, counter_mode (count_exp), 1, label);
20994 if (expected_size == -1 || expected_size < epilogue_size_needed)
20995 predict_jump (REG_BR_PROB_BASE * 60 / 100);
20996 else
20997 predict_jump (REG_BR_PROB_BASE * 20 / 100);
21001 /* Emit code to decide on runtime whether library call or inline should be
21002 used. */
21003 if (dynamic_check != -1)
21005 if (CONST_INT_P (count_exp))
21007 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
21009 emit_block_move_via_libcall (dst, src, count_exp, false);
21010 count_exp = const0_rtx;
21011 goto epilogue;
21014 else
21016 rtx hot_label = gen_label_rtx ();
21017 jump_around_label = gen_label_rtx ();
21018 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
21019 LEU, 0, GET_MODE (count_exp), 1, hot_label);
21020 predict_jump (REG_BR_PROB_BASE * 90 / 100);
21021 emit_block_move_via_libcall (dst, src, count_exp, false);
21022 emit_jump (jump_around_label);
21023 emit_label (hot_label);
21027 /* Step 2: Alignment prologue. */
21029 if (desired_align > align)
21031 if (align_bytes == 0)
21033 /* Except for the first move in epilogue, we no longer know
21034 constant offset in aliasing info. It don't seems to worth
21035 the pain to maintain it for the first move, so throw away
21036 the info early. */
21037 src = change_address (src, BLKmode, srcreg);
21038 dst = change_address (dst, BLKmode, destreg);
21039 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
21040 desired_align);
21042 else
21044 /* If we know how many bytes need to be stored before dst is
21045 sufficiently aligned, maintain aliasing info accurately. */
21046 dst = expand_constant_movmem_prologue (dst, &src, destreg, srcreg,
21047 desired_align, align_bytes);
21048 count_exp = plus_constant (count_exp, -align_bytes);
21049 count -= align_bytes;
21051 if (need_zero_guard
21052 && (count < (unsigned HOST_WIDE_INT) size_needed
21053 || (align_bytes == 0
21054 && count < ((unsigned HOST_WIDE_INT) size_needed
21055 + desired_align - align))))
21057 /* It is possible that we copied enough so the main loop will not
21058 execute. */
21059 gcc_assert (size_needed > 1);
21060 if (label == NULL_RTX)
21061 label = gen_label_rtx ();
21062 emit_cmp_and_jump_insns (count_exp,
21063 GEN_INT (size_needed),
21064 LTU, 0, counter_mode (count_exp), 1, label);
21065 if (expected_size == -1
21066 || expected_size < (desired_align - align) / 2 + size_needed)
21067 predict_jump (REG_BR_PROB_BASE * 20 / 100);
21068 else
21069 predict_jump (REG_BR_PROB_BASE * 60 / 100);
21072 if (label && size_needed == 1)
21074 emit_label (label);
21075 LABEL_NUSES (label) = 1;
21076 label = NULL;
21077 epilogue_size_needed = 1;
21079 else if (label == NULL_RTX)
21080 epilogue_size_needed = size_needed;
21082 /* Step 3: Main loop. */
21084 switch (alg)
21086 case libcall:
21087 case no_stringop:
21088 gcc_unreachable ();
21089 case loop_1_byte:
21090 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
21091 count_exp, QImode, 1, expected_size);
21092 break;
21093 case loop:
21094 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
21095 count_exp, Pmode, 1, expected_size);
21096 break;
21097 case unrolled_loop:
21098 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
21099 registers for 4 temporaries anyway. */
21100 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
21101 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
21102 expected_size);
21103 break;
21104 case rep_prefix_8_byte:
21105 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
21106 DImode);
21107 break;
21108 case rep_prefix_4_byte:
21109 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
21110 SImode);
21111 break;
21112 case rep_prefix_1_byte:
21113 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
21114 QImode);
21115 break;
21117 /* Adjust properly the offset of src and dest memory for aliasing. */
21118 if (CONST_INT_P (count_exp))
21120 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
21121 (count / size_needed) * size_needed);
21122 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
21123 (count / size_needed) * size_needed);
21125 else
21127 src = change_address (src, BLKmode, srcreg);
21128 dst = change_address (dst, BLKmode, destreg);
21131 /* Step 4: Epilogue to copy the remaining bytes. */
21132 epilogue:
21133 if (label)
21135 /* When the main loop is done, COUNT_EXP might hold original count,
21136 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
21137 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
21138 bytes. Compensate if needed. */
21140 if (size_needed < epilogue_size_needed)
21142 tmp =
21143 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
21144 GEN_INT (size_needed - 1), count_exp, 1,
21145 OPTAB_DIRECT);
21146 if (tmp != count_exp)
21147 emit_move_insn (count_exp, tmp);
21149 emit_label (label);
21150 LABEL_NUSES (label) = 1;
21153 if (count_exp != const0_rtx && epilogue_size_needed > 1)
21154 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
21155 epilogue_size_needed);
21156 if (jump_around_label)
21157 emit_label (jump_around_label);
21158 return true;
21161 /* Helper function for memcpy. For QImode value 0xXY produce
21162 0xXYXYXYXY of wide specified by MODE. This is essentially
21163 a * 0x10101010, but we can do slightly better than
21164 synth_mult by unwinding the sequence by hand on CPUs with
21165 slow multiply. */
21166 static rtx
21167 promote_duplicated_reg (enum machine_mode mode, rtx val)
21169 enum machine_mode valmode = GET_MODE (val);
21170 rtx tmp;
21171 int nops = mode == DImode ? 3 : 2;
21173 gcc_assert (mode == SImode || mode == DImode);
21174 if (val == const0_rtx)
21175 return copy_to_mode_reg (mode, const0_rtx);
21176 if (CONST_INT_P (val))
21178 HOST_WIDE_INT v = INTVAL (val) & 255;
21180 v |= v << 8;
21181 v |= v << 16;
21182 if (mode == DImode)
21183 v |= (v << 16) << 16;
21184 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
21187 if (valmode == VOIDmode)
21188 valmode = QImode;
21189 if (valmode != QImode)
21190 val = gen_lowpart (QImode, val);
21191 if (mode == QImode)
21192 return val;
21193 if (!TARGET_PARTIAL_REG_STALL)
21194 nops--;
21195 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
21196 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
21197 <= (ix86_cost->shift_const + ix86_cost->add) * nops
21198 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
21200 rtx reg = convert_modes (mode, QImode, val, true);
21201 tmp = promote_duplicated_reg (mode, const1_rtx);
21202 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
21203 OPTAB_DIRECT);
21205 else
21207 rtx reg = convert_modes (mode, QImode, val, true);
21209 if (!TARGET_PARTIAL_REG_STALL)
21210 if (mode == SImode)
21211 emit_insn (gen_movsi_insv_1 (reg, reg));
21212 else
21213 emit_insn (gen_movdi_insv_1 (reg, reg));
21214 else
21216 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
21217 NULL, 1, OPTAB_DIRECT);
21218 reg =
21219 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
21221 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
21222 NULL, 1, OPTAB_DIRECT);
21223 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
21224 if (mode == SImode)
21225 return reg;
21226 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
21227 NULL, 1, OPTAB_DIRECT);
21228 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
21229 return reg;
21233 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
21234 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
21235 alignment from ALIGN to DESIRED_ALIGN. */
21236 static rtx
21237 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
21239 rtx promoted_val;
21241 if (TARGET_64BIT
21242 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
21243 promoted_val = promote_duplicated_reg (DImode, val);
21244 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
21245 promoted_val = promote_duplicated_reg (SImode, val);
21246 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
21247 promoted_val = promote_duplicated_reg (HImode, val);
21248 else
21249 promoted_val = val;
21251 return promoted_val;
21254 /* Expand string clear operation (bzero). Use i386 string operations when
21255 profitable. See expand_movmem comment for explanation of individual
21256 steps performed. */
21257 bool
21258 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
21259 rtx expected_align_exp, rtx expected_size_exp)
21261 rtx destreg;
21262 rtx label = NULL;
21263 rtx tmp;
21264 rtx jump_around_label = NULL;
21265 HOST_WIDE_INT align = 1;
21266 unsigned HOST_WIDE_INT count = 0;
21267 HOST_WIDE_INT expected_size = -1;
21268 int size_needed = 0, epilogue_size_needed;
21269 int desired_align = 0, align_bytes = 0;
21270 enum stringop_alg alg;
21271 rtx promoted_val = NULL;
21272 bool force_loopy_epilogue = false;
21273 int dynamic_check;
21274 bool need_zero_guard = false;
21276 if (CONST_INT_P (align_exp))
21277 align = INTVAL (align_exp);
21278 /* i386 can do misaligned access on reasonably increased cost. */
21279 if (CONST_INT_P (expected_align_exp)
21280 && INTVAL (expected_align_exp) > align)
21281 align = INTVAL (expected_align_exp);
21282 if (CONST_INT_P (count_exp))
21283 count = expected_size = INTVAL (count_exp);
21284 if (CONST_INT_P (expected_size_exp) && count == 0)
21285 expected_size = INTVAL (expected_size_exp);
21287 /* Make sure we don't need to care about overflow later on. */
21288 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
21289 return false;
21291 /* Step 0: Decide on preferred algorithm, desired alignment and
21292 size of chunks to be copied by main loop. */
21294 alg = decide_alg (count, expected_size, true, &dynamic_check);
21295 desired_align = decide_alignment (align, alg, expected_size);
21297 if (!TARGET_ALIGN_STRINGOPS)
21298 align = desired_align;
21300 if (alg == libcall)
21301 return false;
21302 gcc_assert (alg != no_stringop);
21303 if (!count)
21304 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
21305 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
21306 switch (alg)
21308 case libcall:
21309 case no_stringop:
21310 gcc_unreachable ();
21311 case loop:
21312 need_zero_guard = true;
21313 size_needed = GET_MODE_SIZE (Pmode);
21314 break;
21315 case unrolled_loop:
21316 need_zero_guard = true;
21317 size_needed = GET_MODE_SIZE (Pmode) * 4;
21318 break;
21319 case rep_prefix_8_byte:
21320 size_needed = 8;
21321 break;
21322 case rep_prefix_4_byte:
21323 size_needed = 4;
21324 break;
21325 case rep_prefix_1_byte:
21326 size_needed = 1;
21327 break;
21328 case loop_1_byte:
21329 need_zero_guard = true;
21330 size_needed = 1;
21331 break;
21333 epilogue_size_needed = size_needed;
21335 /* Step 1: Prologue guard. */
21337 /* Alignment code needs count to be in register. */
21338 if (CONST_INT_P (count_exp) && desired_align > align)
21340 if (INTVAL (count_exp) > desired_align
21341 && INTVAL (count_exp) > size_needed)
21343 align_bytes
21344 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
21345 if (align_bytes <= 0)
21346 align_bytes = 0;
21347 else
21348 align_bytes = desired_align - align_bytes;
21350 if (align_bytes == 0)
21352 enum machine_mode mode = SImode;
21353 if (TARGET_64BIT && (count & ~0xffffffff))
21354 mode = DImode;
21355 count_exp = force_reg (mode, count_exp);
21358 /* Do the cheap promotion to allow better CSE across the
21359 main loop and epilogue (ie one load of the big constant in the
21360 front of all code. */
21361 if (CONST_INT_P (val_exp))
21362 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
21363 desired_align, align);
21364 /* Ensure that alignment prologue won't copy past end of block. */
21365 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
21367 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
21368 /* Epilogue always copies COUNT_EXP & (EPILOGUE_SIZE_NEEDED - 1) bytes.
21369 Make sure it is power of 2. */
21370 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
21372 /* To improve performance of small blocks, we jump around the VAL
21373 promoting mode. This mean that if the promoted VAL is not constant,
21374 we might not use it in the epilogue and have to use byte
21375 loop variant. */
21376 if (epilogue_size_needed > 2 && !promoted_val)
21377 force_loopy_epilogue = true;
21378 if (count)
21380 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
21382 /* If main algorithm works on QImode, no epilogue is needed.
21383 For small sizes just don't align anything. */
21384 if (size_needed == 1)
21385 desired_align = align;
21386 else
21387 goto epilogue;
21390 else
21392 label = gen_label_rtx ();
21393 emit_cmp_and_jump_insns (count_exp,
21394 GEN_INT (epilogue_size_needed),
21395 LTU, 0, counter_mode (count_exp), 1, label);
21396 if (expected_size == -1 || expected_size <= epilogue_size_needed)
21397 predict_jump (REG_BR_PROB_BASE * 60 / 100);
21398 else
21399 predict_jump (REG_BR_PROB_BASE * 20 / 100);
21402 if (dynamic_check != -1)
21404 rtx hot_label = gen_label_rtx ();
21405 jump_around_label = gen_label_rtx ();
21406 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
21407 LEU, 0, counter_mode (count_exp), 1, hot_label);
21408 predict_jump (REG_BR_PROB_BASE * 90 / 100);
21409 set_storage_via_libcall (dst, count_exp, val_exp, false);
21410 emit_jump (jump_around_label);
21411 emit_label (hot_label);
21414 /* Step 2: Alignment prologue. */
21416 /* Do the expensive promotion once we branched off the small blocks. */
21417 if (!promoted_val)
21418 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
21419 desired_align, align);
21420 gcc_assert (desired_align >= 1 && align >= 1);
21422 if (desired_align > align)
21424 if (align_bytes == 0)
21426 /* Except for the first move in epilogue, we no longer know
21427 constant offset in aliasing info. It don't seems to worth
21428 the pain to maintain it for the first move, so throw away
21429 the info early. */
21430 dst = change_address (dst, BLKmode, destreg);
21431 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
21432 desired_align);
21434 else
21436 /* If we know how many bytes need to be stored before dst is
21437 sufficiently aligned, maintain aliasing info accurately. */
21438 dst = expand_constant_setmem_prologue (dst, destreg, promoted_val,
21439 desired_align, align_bytes);
21440 count_exp = plus_constant (count_exp, -align_bytes);
21441 count -= align_bytes;
21443 if (need_zero_guard
21444 && (count < (unsigned HOST_WIDE_INT) size_needed
21445 || (align_bytes == 0
21446 && count < ((unsigned HOST_WIDE_INT) size_needed
21447 + desired_align - align))))
21449 /* It is possible that we copied enough so the main loop will not
21450 execute. */
21451 gcc_assert (size_needed > 1);
21452 if (label == NULL_RTX)
21453 label = gen_label_rtx ();
21454 emit_cmp_and_jump_insns (count_exp,
21455 GEN_INT (size_needed),
21456 LTU, 0, counter_mode (count_exp), 1, label);
21457 if (expected_size == -1
21458 || expected_size < (desired_align - align) / 2 + size_needed)
21459 predict_jump (REG_BR_PROB_BASE * 20 / 100);
21460 else
21461 predict_jump (REG_BR_PROB_BASE * 60 / 100);
21464 if (label && size_needed == 1)
21466 emit_label (label);
21467 LABEL_NUSES (label) = 1;
21468 label = NULL;
21469 promoted_val = val_exp;
21470 epilogue_size_needed = 1;
21472 else if (label == NULL_RTX)
21473 epilogue_size_needed = size_needed;
21475 /* Step 3: Main loop. */
21477 switch (alg)
21479 case libcall:
21480 case no_stringop:
21481 gcc_unreachable ();
21482 case loop_1_byte:
21483 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
21484 count_exp, QImode, 1, expected_size);
21485 break;
21486 case loop:
21487 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
21488 count_exp, Pmode, 1, expected_size);
21489 break;
21490 case unrolled_loop:
21491 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
21492 count_exp, Pmode, 4, expected_size);
21493 break;
21494 case rep_prefix_8_byte:
21495 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
21496 DImode, val_exp);
21497 break;
21498 case rep_prefix_4_byte:
21499 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
21500 SImode, val_exp);
21501 break;
21502 case rep_prefix_1_byte:
21503 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
21504 QImode, val_exp);
21505 break;
21507 /* Adjust properly the offset of src and dest memory for aliasing. */
21508 if (CONST_INT_P (count_exp))
21509 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
21510 (count / size_needed) * size_needed);
21511 else
21512 dst = change_address (dst, BLKmode, destreg);
21514 /* Step 4: Epilogue to copy the remaining bytes. */
21516 if (label)
21518 /* When the main loop is done, COUNT_EXP might hold original count,
21519 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
21520 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
21521 bytes. Compensate if needed. */
21523 if (size_needed < epilogue_size_needed)
21525 tmp =
21526 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
21527 GEN_INT (size_needed - 1), count_exp, 1,
21528 OPTAB_DIRECT);
21529 if (tmp != count_exp)
21530 emit_move_insn (count_exp, tmp);
21532 emit_label (label);
21533 LABEL_NUSES (label) = 1;
21535 epilogue:
21536 if (count_exp != const0_rtx && epilogue_size_needed > 1)
21538 if (force_loopy_epilogue)
21539 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
21540 epilogue_size_needed);
21541 else
21542 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
21543 epilogue_size_needed);
21545 if (jump_around_label)
21546 emit_label (jump_around_label);
21547 return true;
21550 /* Expand the appropriate insns for doing strlen if not just doing
21551 repnz; scasb
21553 out = result, initialized with the start address
21554 align_rtx = alignment of the address.
21555 scratch = scratch register, initialized with the startaddress when
21556 not aligned, otherwise undefined
21558 This is just the body. It needs the initializations mentioned above and
21559 some address computing at the end. These things are done in i386.md. */
21561 static void
21562 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
21564 int align;
21565 rtx tmp;
21566 rtx align_2_label = NULL_RTX;
21567 rtx align_3_label = NULL_RTX;
21568 rtx align_4_label = gen_label_rtx ();
21569 rtx end_0_label = gen_label_rtx ();
21570 rtx mem;
21571 rtx tmpreg = gen_reg_rtx (SImode);
21572 rtx scratch = gen_reg_rtx (SImode);
21573 rtx cmp;
21575 align = 0;
21576 if (CONST_INT_P (align_rtx))
21577 align = INTVAL (align_rtx);
21579 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
21581 /* Is there a known alignment and is it less than 4? */
21582 if (align < 4)
21584 rtx scratch1 = gen_reg_rtx (Pmode);
21585 emit_move_insn (scratch1, out);
21586 /* Is there a known alignment and is it not 2? */
21587 if (align != 2)
21589 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
21590 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
21592 /* Leave just the 3 lower bits. */
21593 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
21594 NULL_RTX, 0, OPTAB_WIDEN);
21596 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
21597 Pmode, 1, align_4_label);
21598 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
21599 Pmode, 1, align_2_label);
21600 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
21601 Pmode, 1, align_3_label);
21603 else
21605 /* Since the alignment is 2, we have to check 2 or 0 bytes;
21606 check if is aligned to 4 - byte. */
21608 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
21609 NULL_RTX, 0, OPTAB_WIDEN);
21611 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
21612 Pmode, 1, align_4_label);
21615 mem = change_address (src, QImode, out);
21617 /* Now compare the bytes. */
21619 /* Compare the first n unaligned byte on a byte per byte basis. */
21620 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
21621 QImode, 1, end_0_label);
21623 /* Increment the address. */
21624 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
21626 /* Not needed with an alignment of 2 */
21627 if (align != 2)
21629 emit_label (align_2_label);
21631 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
21632 end_0_label);
21634 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
21636 emit_label (align_3_label);
21639 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
21640 end_0_label);
21642 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
21645 /* Generate loop to check 4 bytes at a time. It is not a good idea to
21646 align this loop. It gives only huge programs, but does not help to
21647 speed up. */
21648 emit_label (align_4_label);
21650 mem = change_address (src, SImode, out);
21651 emit_move_insn (scratch, mem);
21652 emit_insn (ix86_gen_add3 (out, out, GEN_INT (4)));
21654 /* This formula yields a nonzero result iff one of the bytes is zero.
21655 This saves three branches inside loop and many cycles. */
21657 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
21658 emit_insn (gen_one_cmplsi2 (scratch, scratch));
21659 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
21660 emit_insn (gen_andsi3 (tmpreg, tmpreg,
21661 gen_int_mode (0x80808080, SImode)));
21662 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
21663 align_4_label);
21665 if (TARGET_CMOVE)
21667 rtx reg = gen_reg_rtx (SImode);
21668 rtx reg2 = gen_reg_rtx (Pmode);
21669 emit_move_insn (reg, tmpreg);
21670 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
21672 /* If zero is not in the first two bytes, move two bytes forward. */
21673 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
21674 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
21675 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
21676 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
21677 gen_rtx_IF_THEN_ELSE (SImode, tmp,
21678 reg,
21679 tmpreg)));
21680 /* Emit lea manually to avoid clobbering of flags. */
21681 emit_insn (gen_rtx_SET (SImode, reg2,
21682 gen_rtx_PLUS (Pmode, out, const2_rtx)));
21684 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
21685 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
21686 emit_insn (gen_rtx_SET (VOIDmode, out,
21687 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
21688 reg2,
21689 out)));
21691 else
21693 rtx end_2_label = gen_label_rtx ();
21694 /* Is zero in the first two bytes? */
21696 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
21697 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
21698 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
21699 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
21700 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
21701 pc_rtx);
21702 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
21703 JUMP_LABEL (tmp) = end_2_label;
21705 /* Not in the first two. Move two bytes forward. */
21706 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
21707 emit_insn (ix86_gen_add3 (out, out, const2_rtx));
21709 emit_label (end_2_label);
21713 /* Avoid branch in fixing the byte. */
21714 tmpreg = gen_lowpart (QImode, tmpreg);
21715 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
21716 tmp = gen_rtx_REG (CCmode, FLAGS_REG);
21717 cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
21718 emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp));
21720 emit_label (end_0_label);
21723 /* Expand strlen. */
21725 bool
21726 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
21728 rtx addr, scratch1, scratch2, scratch3, scratch4;
21730 /* The generic case of strlen expander is long. Avoid it's
21731 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
21733 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
21734 && !TARGET_INLINE_ALL_STRINGOPS
21735 && !optimize_insn_for_size_p ()
21736 && (!CONST_INT_P (align) || INTVAL (align) < 4))
21737 return false;
21739 addr = force_reg (Pmode, XEXP (src, 0));
21740 scratch1 = gen_reg_rtx (Pmode);
21742 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
21743 && !optimize_insn_for_size_p ())
21745 /* Well it seems that some optimizer does not combine a call like
21746 foo(strlen(bar), strlen(bar));
21747 when the move and the subtraction is done here. It does calculate
21748 the length just once when these instructions are done inside of
21749 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
21750 often used and I use one fewer register for the lifetime of
21751 output_strlen_unroll() this is better. */
21753 emit_move_insn (out, addr);
21755 ix86_expand_strlensi_unroll_1 (out, src, align);
21757 /* strlensi_unroll_1 returns the address of the zero at the end of
21758 the string, like memchr(), so compute the length by subtracting
21759 the start address. */
21760 emit_insn (ix86_gen_sub3 (out, out, addr));
21762 else
21764 rtx unspec;
21766 /* Can't use this if the user has appropriated eax, ecx, or edi. */
21767 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
21768 return false;
21770 scratch2 = gen_reg_rtx (Pmode);
21771 scratch3 = gen_reg_rtx (Pmode);
21772 scratch4 = force_reg (Pmode, constm1_rtx);
21774 emit_move_insn (scratch3, addr);
21775 eoschar = force_reg (QImode, eoschar);
21777 src = replace_equiv_address_nv (src, scratch3);
21779 /* If .md starts supporting :P, this can be done in .md. */
21780 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
21781 scratch4), UNSPEC_SCAS);
21782 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
21783 emit_insn (ix86_gen_one_cmpl2 (scratch2, scratch1));
21784 emit_insn (ix86_gen_add3 (out, scratch2, constm1_rtx));
21786 return true;
21789 /* For given symbol (function) construct code to compute address of it's PLT
21790 entry in large x86-64 PIC model. */
21792 construct_plt_address (rtx symbol)
21794 rtx tmp = gen_reg_rtx (Pmode);
21795 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
21797 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
21798 gcc_assert (ix86_cmodel == CM_LARGE_PIC);
21800 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
21801 emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
21802 return tmp;
21806 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
21807 rtx callarg2,
21808 rtx pop, int sibcall)
21810 rtx use = NULL, call;
21812 if (pop == const0_rtx)
21813 pop = NULL;
21814 gcc_assert (!TARGET_64BIT || !pop);
21816 if (TARGET_MACHO && !TARGET_64BIT)
21818 #if TARGET_MACHO
21819 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
21820 fnaddr = machopic_indirect_call_target (fnaddr);
21821 #endif
21823 else
21825 /* Static functions and indirect calls don't need the pic register. */
21826 if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
21827 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
21828 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
21829 use_reg (&use, pic_offset_table_rtx);
21832 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
21834 rtx al = gen_rtx_REG (QImode, AX_REG);
21835 emit_move_insn (al, callarg2);
21836 use_reg (&use, al);
21839 if (ix86_cmodel == CM_LARGE_PIC
21840 && MEM_P (fnaddr)
21841 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
21842 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
21843 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
21844 else if (sibcall
21845 ? !sibcall_insn_operand (XEXP (fnaddr, 0), Pmode)
21846 : !call_insn_operand (XEXP (fnaddr, 0), Pmode))
21848 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
21849 fnaddr = gen_rtx_MEM (QImode, fnaddr);
21852 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
21853 if (retval)
21854 call = gen_rtx_SET (VOIDmode, retval, call);
21855 if (pop)
21857 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
21858 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
21859 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
21861 if (TARGET_64BIT
21862 && ix86_cfun_abi () == MS_ABI
21863 && (!callarg2 || INTVAL (callarg2) != -2))
21865 /* We need to represent that SI and DI registers are clobbered
21866 by SYSV calls. */
21867 static int clobbered_registers[] = {
21868 XMM6_REG, XMM7_REG, XMM8_REG,
21869 XMM9_REG, XMM10_REG, XMM11_REG,
21870 XMM12_REG, XMM13_REG, XMM14_REG,
21871 XMM15_REG, SI_REG, DI_REG
21873 unsigned int i;
21874 rtx vec[ARRAY_SIZE (clobbered_registers) + 2];
21875 rtx unspec = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx),
21876 UNSPEC_MS_TO_SYSV_CALL);
21878 vec[0] = call;
21879 vec[1] = unspec;
21880 for (i = 0; i < ARRAY_SIZE (clobbered_registers); i++)
21881 vec[i + 2] = gen_rtx_CLOBBER (SSE_REGNO_P (clobbered_registers[i])
21882 ? TImode : DImode,
21883 gen_rtx_REG
21884 (SSE_REGNO_P (clobbered_registers[i])
21885 ? TImode : DImode,
21886 clobbered_registers[i]));
21888 call = gen_rtx_PARALLEL (VOIDmode,
21889 gen_rtvec_v (ARRAY_SIZE (clobbered_registers)
21890 + 2, vec));
21893 /* Add UNSPEC_CALL_NEEDS_VZEROUPPER decoration. */
21894 if (TARGET_VZEROUPPER)
21896 rtx unspec;
21897 int avx256;
21899 if (cfun->machine->callee_pass_avx256_p)
21901 if (cfun->machine->callee_return_avx256_p)
21902 avx256 = callee_return_pass_avx256;
21903 else
21904 avx256 = callee_pass_avx256;
21906 else if (cfun->machine->callee_return_avx256_p)
21907 avx256 = callee_return_avx256;
21908 else
21909 avx256 = call_no_avx256;
21911 if (reload_completed)
21912 emit_insn (gen_avx_vzeroupper (GEN_INT (avx256)));
21913 else
21915 unspec = gen_rtx_UNSPEC (VOIDmode,
21916 gen_rtvec (1, GEN_INT (avx256)),
21917 UNSPEC_CALL_NEEDS_VZEROUPPER);
21918 call = gen_rtx_PARALLEL (VOIDmode,
21919 gen_rtvec (2, call, unspec));
21923 call = emit_call_insn (call);
21924 if (use)
21925 CALL_INSN_FUNCTION_USAGE (call) = use;
21927 return call;
21930 void
21931 ix86_split_call_vzeroupper (rtx insn, rtx vzeroupper)
21933 rtx call = XVECEXP (PATTERN (insn), 0, 0);
21934 emit_insn (gen_avx_vzeroupper (vzeroupper));
21935 emit_call_insn (call);
21938 /* Output the assembly for a call instruction. */
21940 const char *
21941 ix86_output_call_insn (rtx insn, rtx call_op, int addr_op)
21943 bool direct_p = constant_call_address_operand (call_op, Pmode);
21944 bool seh_nop_p = false;
21946 gcc_assert (addr_op == 0 || addr_op == 1);
21948 if (SIBLING_CALL_P (insn))
21950 if (direct_p)
21951 return addr_op ? "jmp\t%P1" : "jmp\t%P0";
21952 /* SEH epilogue detection requires the indirect branch case
21953 to include REX.W. */
21954 else if (TARGET_SEH)
21955 return addr_op ? "rex.W jmp %A1" : "rex.W jmp %A0";
21956 else
21957 return addr_op ? "jmp\t%A1" : "jmp\t%A0";
21960 /* SEH unwinding can require an extra nop to be emitted in several
21961 circumstances. Determine if we have one of those. */
21962 if (TARGET_SEH)
21964 rtx i;
21966 for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i))
21968 /* If we get to another real insn, we don't need the nop. */
21969 if (INSN_P (i))
21970 break;
21972 /* If we get to the epilogue note, prevent a catch region from
21973 being adjacent to the standard epilogue sequence. If non-
21974 call-exceptions, we'll have done this during epilogue emission. */
21975 if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG
21976 && !flag_non_call_exceptions
21977 && !can_throw_internal (insn))
21979 seh_nop_p = true;
21980 break;
21984 /* If we didn't find a real insn following the call, prevent the
21985 unwinder from looking into the next function. */
21986 if (i == NULL)
21987 seh_nop_p = true;
21990 if (direct_p)
21992 if (seh_nop_p)
21993 return addr_op ? "call\t%P1\n\tnop" : "call\t%P0\n\tnop";
21994 else
21995 return addr_op ? "call\t%P1" : "call\t%P0";
21997 else
21999 if (seh_nop_p)
22000 return addr_op ? "call\t%A1\n\tnop" : "call\t%A0\n\tnop";
22001 else
22002 return addr_op ? "call\t%A1" : "call\t%A0";
22006 /* Clear stack slot assignments remembered from previous functions.
22007 This is called from INIT_EXPANDERS once before RTL is emitted for each
22008 function. */
22010 static struct machine_function *
22011 ix86_init_machine_status (void)
22013 struct machine_function *f;
22015 f = ggc_alloc_cleared_machine_function ();
22016 f->use_fast_prologue_epilogue_nregs = -1;
22017 f->tls_descriptor_call_expanded_p = 0;
22018 f->call_abi = ix86_abi;
22020 return f;
22023 /* Return a MEM corresponding to a stack slot with mode MODE.
22024 Allocate a new slot if necessary.
22026 The RTL for a function can have several slots available: N is
22027 which slot to use. */
22030 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
22032 struct stack_local_entry *s;
22034 gcc_assert (n < MAX_386_STACK_LOCALS);
22036 /* Virtual slot is valid only before vregs are instantiated. */
22037 gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
22039 for (s = ix86_stack_locals; s; s = s->next)
22040 if (s->mode == mode && s->n == n)
22041 return copy_rtx (s->rtl);
22043 s = ggc_alloc_stack_local_entry ();
22044 s->n = n;
22045 s->mode = mode;
22046 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
22048 s->next = ix86_stack_locals;
22049 ix86_stack_locals = s;
22050 return s->rtl;
22053 /* Construct the SYMBOL_REF for the tls_get_addr function. */
22055 static GTY(()) rtx ix86_tls_symbol;
22057 ix86_tls_get_addr (void)
22060 if (!ix86_tls_symbol)
22062 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
22063 (TARGET_ANY_GNU_TLS
22064 && !TARGET_64BIT)
22065 ? "___tls_get_addr"
22066 : "__tls_get_addr");
22069 return ix86_tls_symbol;
22072 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
22074 static GTY(()) rtx ix86_tls_module_base_symbol;
22076 ix86_tls_module_base (void)
22079 if (!ix86_tls_module_base_symbol)
22081 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
22082 "_TLS_MODULE_BASE_");
22083 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
22084 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
22087 return ix86_tls_module_base_symbol;
22090 /* Calculate the length of the memory address in the instruction
22091 encoding. Does not include the one-byte modrm, opcode, or prefix. */
22094 memory_address_length (rtx addr)
22096 struct ix86_address parts;
22097 rtx base, index, disp;
22098 int len;
22099 int ok;
22101 if (GET_CODE (addr) == PRE_DEC
22102 || GET_CODE (addr) == POST_INC
22103 || GET_CODE (addr) == PRE_MODIFY
22104 || GET_CODE (addr) == POST_MODIFY)
22105 return 0;
22107 ok = ix86_decompose_address (addr, &parts);
22108 gcc_assert (ok);
22110 if (parts.base && GET_CODE (parts.base) == SUBREG)
22111 parts.base = SUBREG_REG (parts.base);
22112 if (parts.index && GET_CODE (parts.index) == SUBREG)
22113 parts.index = SUBREG_REG (parts.index);
22115 base = parts.base;
22116 index = parts.index;
22117 disp = parts.disp;
22118 len = 0;
22120 /* Rule of thumb:
22121 - esp as the base always wants an index,
22122 - ebp as the base always wants a displacement,
22123 - r12 as the base always wants an index,
22124 - r13 as the base always wants a displacement. */
22126 /* Register Indirect. */
22127 if (base && !index && !disp)
22129 /* esp (for its index) and ebp (for its displacement) need
22130 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
22131 code. */
22132 if (REG_P (addr)
22133 && (addr == arg_pointer_rtx
22134 || addr == frame_pointer_rtx
22135 || REGNO (addr) == SP_REG
22136 || REGNO (addr) == BP_REG
22137 || REGNO (addr) == R12_REG
22138 || REGNO (addr) == R13_REG))
22139 len = 1;
22142 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
22143 is not disp32, but disp32(%rip), so for disp32
22144 SIB byte is needed, unless print_operand_address
22145 optimizes it into disp32(%rip) or (%rip) is implied
22146 by UNSPEC. */
22147 else if (disp && !base && !index)
22149 len = 4;
22150 if (TARGET_64BIT)
22152 rtx symbol = disp;
22154 if (GET_CODE (disp) == CONST)
22155 symbol = XEXP (disp, 0);
22156 if (GET_CODE (symbol) == PLUS
22157 && CONST_INT_P (XEXP (symbol, 1)))
22158 symbol = XEXP (symbol, 0);
22160 if (GET_CODE (symbol) != LABEL_REF
22161 && (GET_CODE (symbol) != SYMBOL_REF
22162 || SYMBOL_REF_TLS_MODEL (symbol) != 0)
22163 && (GET_CODE (symbol) != UNSPEC
22164 || (XINT (symbol, 1) != UNSPEC_GOTPCREL
22165 && XINT (symbol, 1) != UNSPEC_PCREL
22166 && XINT (symbol, 1) != UNSPEC_GOTNTPOFF)))
22167 len += 1;
22171 else
22173 /* Find the length of the displacement constant. */
22174 if (disp)
22176 if (base && satisfies_constraint_K (disp))
22177 len = 1;
22178 else
22179 len = 4;
22181 /* ebp always wants a displacement. Similarly r13. */
22182 else if (base && REG_P (base)
22183 && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
22184 len = 1;
22186 /* An index requires the two-byte modrm form.... */
22187 if (index
22188 /* ...like esp (or r12), which always wants an index. */
22189 || base == arg_pointer_rtx
22190 || base == frame_pointer_rtx
22191 || (base && REG_P (base)
22192 && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
22193 len += 1;
22196 switch (parts.seg)
22198 case SEG_FS:
22199 case SEG_GS:
22200 len += 1;
22201 break;
22202 default:
22203 break;
22206 return len;
22209 /* Compute default value for "length_immediate" attribute. When SHORTFORM
22210 is set, expect that insn have 8bit immediate alternative. */
22212 ix86_attr_length_immediate_default (rtx insn, int shortform)
22214 int len = 0;
22215 int i;
22216 extract_insn_cached (insn);
22217 for (i = recog_data.n_operands - 1; i >= 0; --i)
22218 if (CONSTANT_P (recog_data.operand[i]))
22220 enum attr_mode mode = get_attr_mode (insn);
22222 gcc_assert (!len);
22223 if (shortform && CONST_INT_P (recog_data.operand[i]))
22225 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
22226 switch (mode)
22228 case MODE_QI:
22229 len = 1;
22230 continue;
22231 case MODE_HI:
22232 ival = trunc_int_for_mode (ival, HImode);
22233 break;
22234 case MODE_SI:
22235 ival = trunc_int_for_mode (ival, SImode);
22236 break;
22237 default:
22238 break;
22240 if (IN_RANGE (ival, -128, 127))
22242 len = 1;
22243 continue;
22246 switch (mode)
22248 case MODE_QI:
22249 len = 1;
22250 break;
22251 case MODE_HI:
22252 len = 2;
22253 break;
22254 case MODE_SI:
22255 len = 4;
22256 break;
22257 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
22258 case MODE_DI:
22259 len = 4;
22260 break;
22261 default:
22262 fatal_insn ("unknown insn mode", insn);
22265 return len;
22267 /* Compute default value for "length_address" attribute. */
22269 ix86_attr_length_address_default (rtx insn)
22271 int i;
22273 if (get_attr_type (insn) == TYPE_LEA)
22275 rtx set = PATTERN (insn), addr;
22277 if (GET_CODE (set) == PARALLEL)
22278 set = XVECEXP (set, 0, 0);
22280 gcc_assert (GET_CODE (set) == SET);
22282 addr = SET_SRC (set);
22283 if (TARGET_64BIT && get_attr_mode (insn) == MODE_SI)
22285 if (GET_CODE (addr) == ZERO_EXTEND)
22286 addr = XEXP (addr, 0);
22287 if (GET_CODE (addr) == SUBREG)
22288 addr = SUBREG_REG (addr);
22291 return memory_address_length (addr);
22294 extract_insn_cached (insn);
22295 for (i = recog_data.n_operands - 1; i >= 0; --i)
22296 if (MEM_P (recog_data.operand[i]))
22298 constrain_operands_cached (reload_completed);
22299 if (which_alternative != -1)
22301 const char *constraints = recog_data.constraints[i];
22302 int alt = which_alternative;
22304 while (*constraints == '=' || *constraints == '+')
22305 constraints++;
22306 while (alt-- > 0)
22307 while (*constraints++ != ',')
22309 /* Skip ignored operands. */
22310 if (*constraints == 'X')
22311 continue;
22313 return memory_address_length (XEXP (recog_data.operand[i], 0));
22315 return 0;
22318 /* Compute default value for "length_vex" attribute. It includes
22319 2 or 3 byte VEX prefix and 1 opcode byte. */
22322 ix86_attr_length_vex_default (rtx insn, int has_0f_opcode,
22323 int has_vex_w)
22325 int i;
22327 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
22328 byte VEX prefix. */
22329 if (!has_0f_opcode || has_vex_w)
22330 return 3 + 1;
22332 /* We can always use 2 byte VEX prefix in 32bit. */
22333 if (!TARGET_64BIT)
22334 return 2 + 1;
22336 extract_insn_cached (insn);
22338 for (i = recog_data.n_operands - 1; i >= 0; --i)
22339 if (REG_P (recog_data.operand[i]))
22341 /* REX.W bit uses 3 byte VEX prefix. */
22342 if (GET_MODE (recog_data.operand[i]) == DImode
22343 && GENERAL_REG_P (recog_data.operand[i]))
22344 return 3 + 1;
22346 else
22348 /* REX.X or REX.B bits use 3 byte VEX prefix. */
22349 if (MEM_P (recog_data.operand[i])
22350 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
22351 return 3 + 1;
22354 return 2 + 1;
22357 /* Return the maximum number of instructions a cpu can issue. */
22359 static int
22360 ix86_issue_rate (void)
22362 switch (ix86_tune)
22364 case PROCESSOR_PENTIUM:
22365 case PROCESSOR_ATOM:
22366 case PROCESSOR_K6:
22367 return 2;
22369 case PROCESSOR_PENTIUMPRO:
22370 case PROCESSOR_PENTIUM4:
22371 case PROCESSOR_CORE2_32:
22372 case PROCESSOR_CORE2_64:
22373 case PROCESSOR_COREI7_32:
22374 case PROCESSOR_COREI7_64:
22375 case PROCESSOR_ATHLON:
22376 case PROCESSOR_K8:
22377 case PROCESSOR_AMDFAM10:
22378 case PROCESSOR_NOCONA:
22379 case PROCESSOR_GENERIC32:
22380 case PROCESSOR_GENERIC64:
22381 case PROCESSOR_BDVER1:
22382 case PROCESSOR_BTVER1:
22383 return 3;
22385 default:
22386 return 1;
22390 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
22391 by DEP_INSN and nothing set by DEP_INSN. */
22393 static int
22394 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
22396 rtx set, set2;
22398 /* Simplify the test for uninteresting insns. */
22399 if (insn_type != TYPE_SETCC
22400 && insn_type != TYPE_ICMOV
22401 && insn_type != TYPE_FCMOV
22402 && insn_type != TYPE_IBR)
22403 return 0;
22405 if ((set = single_set (dep_insn)) != 0)
22407 set = SET_DEST (set);
22408 set2 = NULL_RTX;
22410 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
22411 && XVECLEN (PATTERN (dep_insn), 0) == 2
22412 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
22413 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
22415 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
22416 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
22418 else
22419 return 0;
22421 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
22422 return 0;
22424 /* This test is true if the dependent insn reads the flags but
22425 not any other potentially set register. */
22426 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
22427 return 0;
22429 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
22430 return 0;
22432 return 1;
22435 /* Return true iff USE_INSN has a memory address with operands set by
22436 SET_INSN. */
22438 bool
22439 ix86_agi_dependent (rtx set_insn, rtx use_insn)
22441 int i;
22442 extract_insn_cached (use_insn);
22443 for (i = recog_data.n_operands - 1; i >= 0; --i)
22444 if (MEM_P (recog_data.operand[i]))
22446 rtx addr = XEXP (recog_data.operand[i], 0);
22447 return modified_in_p (addr, set_insn) != 0;
22449 return false;
22452 static int
22453 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
22455 enum attr_type insn_type, dep_insn_type;
22456 enum attr_memory memory;
22457 rtx set, set2;
22458 int dep_insn_code_number;
22460 /* Anti and output dependencies have zero cost on all CPUs. */
22461 if (REG_NOTE_KIND (link) != 0)
22462 return 0;
22464 dep_insn_code_number = recog_memoized (dep_insn);
22466 /* If we can't recognize the insns, we can't really do anything. */
22467 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
22468 return cost;
22470 insn_type = get_attr_type (insn);
22471 dep_insn_type = get_attr_type (dep_insn);
22473 switch (ix86_tune)
22475 case PROCESSOR_PENTIUM:
22476 /* Address Generation Interlock adds a cycle of latency. */
22477 if (insn_type == TYPE_LEA)
22479 rtx addr = PATTERN (insn);
22481 if (GET_CODE (addr) == PARALLEL)
22482 addr = XVECEXP (addr, 0, 0);
22484 gcc_assert (GET_CODE (addr) == SET);
22486 addr = SET_SRC (addr);
22487 if (modified_in_p (addr, dep_insn))
22488 cost += 1;
22490 else if (ix86_agi_dependent (dep_insn, insn))
22491 cost += 1;
22493 /* ??? Compares pair with jump/setcc. */
22494 if (ix86_flags_dependent (insn, dep_insn, insn_type))
22495 cost = 0;
22497 /* Floating point stores require value to be ready one cycle earlier. */
22498 if (insn_type == TYPE_FMOV
22499 && get_attr_memory (insn) == MEMORY_STORE
22500 && !ix86_agi_dependent (dep_insn, insn))
22501 cost += 1;
22502 break;
22504 case PROCESSOR_PENTIUMPRO:
22505 memory = get_attr_memory (insn);
22507 /* INT->FP conversion is expensive. */
22508 if (get_attr_fp_int_src (dep_insn))
22509 cost += 5;
22511 /* There is one cycle extra latency between an FP op and a store. */
22512 if (insn_type == TYPE_FMOV
22513 && (set = single_set (dep_insn)) != NULL_RTX
22514 && (set2 = single_set (insn)) != NULL_RTX
22515 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
22516 && MEM_P (SET_DEST (set2)))
22517 cost += 1;
22519 /* Show ability of reorder buffer to hide latency of load by executing
22520 in parallel with previous instruction in case
22521 previous instruction is not needed to compute the address. */
22522 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
22523 && !ix86_agi_dependent (dep_insn, insn))
22525 /* Claim moves to take one cycle, as core can issue one load
22526 at time and the next load can start cycle later. */
22527 if (dep_insn_type == TYPE_IMOV
22528 || dep_insn_type == TYPE_FMOV)
22529 cost = 1;
22530 else if (cost > 1)
22531 cost--;
22533 break;
22535 case PROCESSOR_K6:
22536 memory = get_attr_memory (insn);
22538 /* The esp dependency is resolved before the instruction is really
22539 finished. */
22540 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
22541 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
22542 return 1;
22544 /* INT->FP conversion is expensive. */
22545 if (get_attr_fp_int_src (dep_insn))
22546 cost += 5;
22548 /* Show ability of reorder buffer to hide latency of load by executing
22549 in parallel with previous instruction in case
22550 previous instruction is not needed to compute the address. */
22551 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
22552 && !ix86_agi_dependent (dep_insn, insn))
22554 /* Claim moves to take one cycle, as core can issue one load
22555 at time and the next load can start cycle later. */
22556 if (dep_insn_type == TYPE_IMOV
22557 || dep_insn_type == TYPE_FMOV)
22558 cost = 1;
22559 else if (cost > 2)
22560 cost -= 2;
22561 else
22562 cost = 1;
22564 break;
22566 case PROCESSOR_ATHLON:
22567 case PROCESSOR_K8:
22568 case PROCESSOR_AMDFAM10:
22569 case PROCESSOR_BDVER1:
22570 case PROCESSOR_BTVER1:
22571 case PROCESSOR_ATOM:
22572 case PROCESSOR_GENERIC32:
22573 case PROCESSOR_GENERIC64:
22574 memory = get_attr_memory (insn);
22576 /* Show ability of reorder buffer to hide latency of load by executing
22577 in parallel with previous instruction in case
22578 previous instruction is not needed to compute the address. */
22579 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
22580 && !ix86_agi_dependent (dep_insn, insn))
22582 enum attr_unit unit = get_attr_unit (insn);
22583 int loadcost = 3;
22585 /* Because of the difference between the length of integer and
22586 floating unit pipeline preparation stages, the memory operands
22587 for floating point are cheaper.
22589 ??? For Athlon it the difference is most probably 2. */
22590 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
22591 loadcost = 3;
22592 else
22593 loadcost = TARGET_ATHLON ? 2 : 0;
22595 if (cost >= loadcost)
22596 cost -= loadcost;
22597 else
22598 cost = 0;
22601 default:
22602 break;
22605 return cost;
22608 /* How many alternative schedules to try. This should be as wide as the
22609 scheduling freedom in the DFA, but no wider. Making this value too
22610 large results extra work for the scheduler. */
22612 static int
22613 ia32_multipass_dfa_lookahead (void)
22615 switch (ix86_tune)
22617 case PROCESSOR_PENTIUM:
22618 return 2;
22620 case PROCESSOR_PENTIUMPRO:
22621 case PROCESSOR_K6:
22622 return 1;
22624 case PROCESSOR_CORE2_32:
22625 case PROCESSOR_CORE2_64:
22626 case PROCESSOR_COREI7_32:
22627 case PROCESSOR_COREI7_64:
22628 /* Generally, we want haifa-sched:max_issue() to look ahead as far
22629 as many instructions can be executed on a cycle, i.e.,
22630 issue_rate. I wonder why tuning for many CPUs does not do this. */
22631 return ix86_issue_rate ();
22633 default:
22634 return 0;
22640 /* Model decoder of Core 2/i7.
22641 Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
22642 track the instruction fetch block boundaries and make sure that long
22643 (9+ bytes) instructions are assigned to D0. */
22645 /* Maximum length of an insn that can be handled by
22646 a secondary decoder unit. '8' for Core 2/i7. */
22647 static int core2i7_secondary_decoder_max_insn_size;
22649 /* Ifetch block size, i.e., number of bytes decoder reads per cycle.
22650 '16' for Core 2/i7. */
22651 static int core2i7_ifetch_block_size;
22653 /* Maximum number of instructions decoder can handle per cycle.
22654 '6' for Core 2/i7. */
22655 static int core2i7_ifetch_block_max_insns;
22657 typedef struct ix86_first_cycle_multipass_data_ *
22658 ix86_first_cycle_multipass_data_t;
22659 typedef const struct ix86_first_cycle_multipass_data_ *
22660 const_ix86_first_cycle_multipass_data_t;
22662 /* A variable to store target state across calls to max_issue within
22663 one cycle. */
22664 static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data,
22665 *ix86_first_cycle_multipass_data = &_ix86_first_cycle_multipass_data;
22667 /* Initialize DATA. */
22668 static void
22669 core2i7_first_cycle_multipass_init (void *_data)
22671 ix86_first_cycle_multipass_data_t data
22672 = (ix86_first_cycle_multipass_data_t) _data;
22674 data->ifetch_block_len = 0;
22675 data->ifetch_block_n_insns = 0;
22676 data->ready_try_change = NULL;
22677 data->ready_try_change_size = 0;
22680 /* Advancing the cycle; reset ifetch block counts. */
22681 static void
22682 core2i7_dfa_post_advance_cycle (void)
22684 ix86_first_cycle_multipass_data_t data = ix86_first_cycle_multipass_data;
22686 gcc_assert (data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
22688 data->ifetch_block_len = 0;
22689 data->ifetch_block_n_insns = 0;
22692 static int min_insn_size (rtx);
22694 /* Filter out insns from ready_try that the core will not be able to issue
22695 on current cycle due to decoder. */
22696 static void
22697 core2i7_first_cycle_multipass_filter_ready_try
22698 (const_ix86_first_cycle_multipass_data_t data,
22699 char *ready_try, int n_ready, bool first_cycle_insn_p)
22701 while (n_ready--)
22703 rtx insn;
22704 int insn_size;
22706 if (ready_try[n_ready])
22707 continue;
22709 insn = get_ready_element (n_ready);
22710 insn_size = min_insn_size (insn);
22712 if (/* If this is a too long an insn for a secondary decoder ... */
22713 (!first_cycle_insn_p
22714 && insn_size > core2i7_secondary_decoder_max_insn_size)
22715 /* ... or it would not fit into the ifetch block ... */
22716 || data->ifetch_block_len + insn_size > core2i7_ifetch_block_size
22717 /* ... or the decoder is full already ... */
22718 || data->ifetch_block_n_insns + 1 > core2i7_ifetch_block_max_insns)
22719 /* ... mask the insn out. */
22721 ready_try[n_ready] = 1;
22723 if (data->ready_try_change)
22724 SET_BIT (data->ready_try_change, n_ready);
22729 /* Prepare for a new round of multipass lookahead scheduling. */
22730 static void
22731 core2i7_first_cycle_multipass_begin (void *_data, char *ready_try, int n_ready,
22732 bool first_cycle_insn_p)
22734 ix86_first_cycle_multipass_data_t data
22735 = (ix86_first_cycle_multipass_data_t) _data;
22736 const_ix86_first_cycle_multipass_data_t prev_data
22737 = ix86_first_cycle_multipass_data;
22739 /* Restore the state from the end of the previous round. */
22740 data->ifetch_block_len = prev_data->ifetch_block_len;
22741 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns;
22743 /* Filter instructions that cannot be issued on current cycle due to
22744 decoder restrictions. */
22745 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
22746 first_cycle_insn_p);
22749 /* INSN is being issued in current solution. Account for its impact on
22750 the decoder model. */
22751 static void
22752 core2i7_first_cycle_multipass_issue (void *_data, char *ready_try, int n_ready,
22753 rtx insn, const void *_prev_data)
22755 ix86_first_cycle_multipass_data_t data
22756 = (ix86_first_cycle_multipass_data_t) _data;
22757 const_ix86_first_cycle_multipass_data_t prev_data
22758 = (const_ix86_first_cycle_multipass_data_t) _prev_data;
22760 int insn_size = min_insn_size (insn);
22762 data->ifetch_block_len = prev_data->ifetch_block_len + insn_size;
22763 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns + 1;
22764 gcc_assert (data->ifetch_block_len <= core2i7_ifetch_block_size
22765 && data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
22767 /* Allocate or resize the bitmap for storing INSN's effect on ready_try. */
22768 if (!data->ready_try_change)
22770 data->ready_try_change = sbitmap_alloc (n_ready);
22771 data->ready_try_change_size = n_ready;
22773 else if (data->ready_try_change_size < n_ready)
22775 data->ready_try_change = sbitmap_resize (data->ready_try_change,
22776 n_ready, 0);
22777 data->ready_try_change_size = n_ready;
22779 sbitmap_zero (data->ready_try_change);
22781 /* Filter out insns from ready_try that the core will not be able to issue
22782 on current cycle due to decoder. */
22783 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
22784 false);
22787 /* Revert the effect on ready_try. */
22788 static void
22789 core2i7_first_cycle_multipass_backtrack (const void *_data,
22790 char *ready_try,
22791 int n_ready ATTRIBUTE_UNUSED)
22793 const_ix86_first_cycle_multipass_data_t data
22794 = (const_ix86_first_cycle_multipass_data_t) _data;
22795 unsigned int i = 0;
22796 sbitmap_iterator sbi;
22798 gcc_assert (sbitmap_last_set_bit (data->ready_try_change) < n_ready);
22799 EXECUTE_IF_SET_IN_SBITMAP (data->ready_try_change, 0, i, sbi)
22801 ready_try[i] = 0;
22805 /* Save the result of multipass lookahead scheduling for the next round. */
22806 static void
22807 core2i7_first_cycle_multipass_end (const void *_data)
22809 const_ix86_first_cycle_multipass_data_t data
22810 = (const_ix86_first_cycle_multipass_data_t) _data;
22811 ix86_first_cycle_multipass_data_t next_data
22812 = ix86_first_cycle_multipass_data;
22814 if (data != NULL)
22816 next_data->ifetch_block_len = data->ifetch_block_len;
22817 next_data->ifetch_block_n_insns = data->ifetch_block_n_insns;
22821 /* Deallocate target data. */
22822 static void
22823 core2i7_first_cycle_multipass_fini (void *_data)
22825 ix86_first_cycle_multipass_data_t data
22826 = (ix86_first_cycle_multipass_data_t) _data;
22828 if (data->ready_try_change)
22830 sbitmap_free (data->ready_try_change);
22831 data->ready_try_change = NULL;
22832 data->ready_try_change_size = 0;
22836 /* Prepare for scheduling pass. */
22837 static void
22838 ix86_sched_init_global (FILE *dump ATTRIBUTE_UNUSED,
22839 int verbose ATTRIBUTE_UNUSED,
22840 int max_uid ATTRIBUTE_UNUSED)
22842 /* Install scheduling hooks for current CPU. Some of these hooks are used
22843 in time-critical parts of the scheduler, so we only set them up when
22844 they are actually used. */
22845 switch (ix86_tune)
22847 case PROCESSOR_CORE2_32:
22848 case PROCESSOR_CORE2_64:
22849 case PROCESSOR_COREI7_32:
22850 case PROCESSOR_COREI7_64:
22851 targetm.sched.dfa_post_advance_cycle
22852 = core2i7_dfa_post_advance_cycle;
22853 targetm.sched.first_cycle_multipass_init
22854 = core2i7_first_cycle_multipass_init;
22855 targetm.sched.first_cycle_multipass_begin
22856 = core2i7_first_cycle_multipass_begin;
22857 targetm.sched.first_cycle_multipass_issue
22858 = core2i7_first_cycle_multipass_issue;
22859 targetm.sched.first_cycle_multipass_backtrack
22860 = core2i7_first_cycle_multipass_backtrack;
22861 targetm.sched.first_cycle_multipass_end
22862 = core2i7_first_cycle_multipass_end;
22863 targetm.sched.first_cycle_multipass_fini
22864 = core2i7_first_cycle_multipass_fini;
22866 /* Set decoder parameters. */
22867 core2i7_secondary_decoder_max_insn_size = 8;
22868 core2i7_ifetch_block_size = 16;
22869 core2i7_ifetch_block_max_insns = 6;
22870 break;
22872 default:
22873 targetm.sched.dfa_post_advance_cycle = NULL;
22874 targetm.sched.first_cycle_multipass_init = NULL;
22875 targetm.sched.first_cycle_multipass_begin = NULL;
22876 targetm.sched.first_cycle_multipass_issue = NULL;
22877 targetm.sched.first_cycle_multipass_backtrack = NULL;
22878 targetm.sched.first_cycle_multipass_end = NULL;
22879 targetm.sched.first_cycle_multipass_fini = NULL;
22880 break;
22885 /* Compute the alignment given to a constant that is being placed in memory.
22886 EXP is the constant and ALIGN is the alignment that the object would
22887 ordinarily have.
22888 The value of this function is used instead of that alignment to align
22889 the object. */
22892 ix86_constant_alignment (tree exp, int align)
22894 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
22895 || TREE_CODE (exp) == INTEGER_CST)
22897 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
22898 return 64;
22899 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
22900 return 128;
22902 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
22903 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
22904 return BITS_PER_WORD;
22906 return align;
22909 /* Compute the alignment for a static variable.
22910 TYPE is the data type, and ALIGN is the alignment that
22911 the object would ordinarily have. The value of this function is used
22912 instead of that alignment to align the object. */
22915 ix86_data_alignment (tree type, int align)
22917 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
22919 if (AGGREGATE_TYPE_P (type)
22920 && TYPE_SIZE (type)
22921 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
22922 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
22923 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
22924 && align < max_align)
22925 align = max_align;
22927 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
22928 to 16byte boundary. */
22929 if (TARGET_64BIT)
22931 if (AGGREGATE_TYPE_P (type)
22932 && TYPE_SIZE (type)
22933 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
22934 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
22935 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
22936 return 128;
22939 if (TREE_CODE (type) == ARRAY_TYPE)
22941 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
22942 return 64;
22943 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
22944 return 128;
22946 else if (TREE_CODE (type) == COMPLEX_TYPE)
22949 if (TYPE_MODE (type) == DCmode && align < 64)
22950 return 64;
22951 if ((TYPE_MODE (type) == XCmode
22952 || TYPE_MODE (type) == TCmode) && align < 128)
22953 return 128;
22955 else if ((TREE_CODE (type) == RECORD_TYPE
22956 || TREE_CODE (type) == UNION_TYPE
22957 || TREE_CODE (type) == QUAL_UNION_TYPE)
22958 && TYPE_FIELDS (type))
22960 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
22961 return 64;
22962 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
22963 return 128;
22965 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
22966 || TREE_CODE (type) == INTEGER_TYPE)
22968 if (TYPE_MODE (type) == DFmode && align < 64)
22969 return 64;
22970 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
22971 return 128;
22974 return align;
22977 /* Compute the alignment for a local variable or a stack slot. EXP is
22978 the data type or decl itself, MODE is the widest mode available and
22979 ALIGN is the alignment that the object would ordinarily have. The
22980 value of this macro is used instead of that alignment to align the
22981 object. */
22983 unsigned int
22984 ix86_local_alignment (tree exp, enum machine_mode mode,
22985 unsigned int align)
22987 tree type, decl;
22989 if (exp && DECL_P (exp))
22991 type = TREE_TYPE (exp);
22992 decl = exp;
22994 else
22996 type = exp;
22997 decl = NULL;
23000 /* Don't do dynamic stack realignment for long long objects with
23001 -mpreferred-stack-boundary=2. */
23002 if (!TARGET_64BIT
23003 && align == 64
23004 && ix86_preferred_stack_boundary < 64
23005 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
23006 && (!type || !TYPE_USER_ALIGN (type))
23007 && (!decl || !DECL_USER_ALIGN (decl)))
23008 align = 32;
23010 /* If TYPE is NULL, we are allocating a stack slot for caller-save
23011 register in MODE. We will return the largest alignment of XF
23012 and DF. */
23013 if (!type)
23015 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
23016 align = GET_MODE_ALIGNMENT (DFmode);
23017 return align;
23020 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
23021 to 16byte boundary. Exact wording is:
23023 An array uses the same alignment as its elements, except that a local or
23024 global array variable of length at least 16 bytes or
23025 a C99 variable-length array variable always has alignment of at least 16 bytes.
23027 This was added to allow use of aligned SSE instructions at arrays. This
23028 rule is meant for static storage (where compiler can not do the analysis
23029 by itself). We follow it for automatic variables only when convenient.
23030 We fully control everything in the function compiled and functions from
23031 other unit can not rely on the alignment.
23033 Exclude va_list type. It is the common case of local array where
23034 we can not benefit from the alignment. */
23035 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
23036 && TARGET_SSE)
23038 if (AGGREGATE_TYPE_P (type)
23039 && (va_list_type_node == NULL_TREE
23040 || (TYPE_MAIN_VARIANT (type)
23041 != TYPE_MAIN_VARIANT (va_list_type_node)))
23042 && TYPE_SIZE (type)
23043 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
23044 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
23045 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
23046 return 128;
23048 if (TREE_CODE (type) == ARRAY_TYPE)
23050 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
23051 return 64;
23052 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
23053 return 128;
23055 else if (TREE_CODE (type) == COMPLEX_TYPE)
23057 if (TYPE_MODE (type) == DCmode && align < 64)
23058 return 64;
23059 if ((TYPE_MODE (type) == XCmode
23060 || TYPE_MODE (type) == TCmode) && align < 128)
23061 return 128;
23063 else if ((TREE_CODE (type) == RECORD_TYPE
23064 || TREE_CODE (type) == UNION_TYPE
23065 || TREE_CODE (type) == QUAL_UNION_TYPE)
23066 && TYPE_FIELDS (type))
23068 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
23069 return 64;
23070 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
23071 return 128;
23073 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
23074 || TREE_CODE (type) == INTEGER_TYPE)
23077 if (TYPE_MODE (type) == DFmode && align < 64)
23078 return 64;
23079 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
23080 return 128;
23082 return align;
23085 /* Compute the minimum required alignment for dynamic stack realignment
23086 purposes for a local variable, parameter or a stack slot. EXP is
23087 the data type or decl itself, MODE is its mode and ALIGN is the
23088 alignment that the object would ordinarily have. */
23090 unsigned int
23091 ix86_minimum_alignment (tree exp, enum machine_mode mode,
23092 unsigned int align)
23094 tree type, decl;
23096 if (exp && DECL_P (exp))
23098 type = TREE_TYPE (exp);
23099 decl = exp;
23101 else
23103 type = exp;
23104 decl = NULL;
23107 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
23108 return align;
23110 /* Don't do dynamic stack realignment for long long objects with
23111 -mpreferred-stack-boundary=2. */
23112 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
23113 && (!type || !TYPE_USER_ALIGN (type))
23114 && (!decl || !DECL_USER_ALIGN (decl)))
23115 return 32;
23117 return align;
23120 /* Find a location for the static chain incoming to a nested function.
23121 This is a register, unless all free registers are used by arguments. */
23123 static rtx
23124 ix86_static_chain (const_tree fndecl, bool incoming_p)
23126 unsigned regno;
23128 if (!DECL_STATIC_CHAIN (fndecl))
23129 return NULL;
23131 if (TARGET_64BIT)
23133 /* We always use R10 in 64-bit mode. */
23134 regno = R10_REG;
23136 else
23138 tree fntype;
23139 /* By default in 32-bit mode we use ECX to pass the static chain. */
23140 regno = CX_REG;
23142 fntype = TREE_TYPE (fndecl);
23143 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
23145 /* Fastcall functions use ecx/edx for arguments, which leaves
23146 us with EAX for the static chain. */
23147 regno = AX_REG;
23149 else if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)))
23151 /* Thiscall functions use ecx for arguments, which leaves
23152 us with EAX for the static chain. */
23153 regno = AX_REG;
23155 else if (ix86_function_regparm (fntype, fndecl) == 3)
23157 /* For regparm 3, we have no free call-clobbered registers in
23158 which to store the static chain. In order to implement this,
23159 we have the trampoline push the static chain to the stack.
23160 However, we can't push a value below the return address when
23161 we call the nested function directly, so we have to use an
23162 alternate entry point. For this we use ESI, and have the
23163 alternate entry point push ESI, so that things appear the
23164 same once we're executing the nested function. */
23165 if (incoming_p)
23167 if (fndecl == current_function_decl)
23168 ix86_static_chain_on_stack = true;
23169 return gen_frame_mem (SImode,
23170 plus_constant (arg_pointer_rtx, -8));
23172 regno = SI_REG;
23176 return gen_rtx_REG (Pmode, regno);
23179 /* Emit RTL insns to initialize the variable parts of a trampoline.
23180 FNDECL is the decl of the target address; M_TRAMP is a MEM for
23181 the trampoline, and CHAIN_VALUE is an RTX for the static chain
23182 to be passed to the target function. */
23184 static void
23185 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
23187 rtx mem, fnaddr;
23189 fnaddr = XEXP (DECL_RTL (fndecl), 0);
23191 if (!TARGET_64BIT)
23193 rtx disp, chain;
23194 int opcode;
23196 /* Depending on the static chain location, either load a register
23197 with a constant, or push the constant to the stack. All of the
23198 instructions are the same size. */
23199 chain = ix86_static_chain (fndecl, true);
23200 if (REG_P (chain))
23202 if (REGNO (chain) == CX_REG)
23203 opcode = 0xb9;
23204 else if (REGNO (chain) == AX_REG)
23205 opcode = 0xb8;
23206 else
23207 gcc_unreachable ();
23209 else
23210 opcode = 0x68;
23212 mem = adjust_address (m_tramp, QImode, 0);
23213 emit_move_insn (mem, gen_int_mode (opcode, QImode));
23215 mem = adjust_address (m_tramp, SImode, 1);
23216 emit_move_insn (mem, chain_value);
23218 /* Compute offset from the end of the jmp to the target function.
23219 In the case in which the trampoline stores the static chain on
23220 the stack, we need to skip the first insn which pushes the
23221 (call-saved) register static chain; this push is 1 byte. */
23222 disp = expand_binop (SImode, sub_optab, fnaddr,
23223 plus_constant (XEXP (m_tramp, 0),
23224 MEM_P (chain) ? 9 : 10),
23225 NULL_RTX, 1, OPTAB_DIRECT);
23227 mem = adjust_address (m_tramp, QImode, 5);
23228 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
23230 mem = adjust_address (m_tramp, SImode, 6);
23231 emit_move_insn (mem, disp);
23233 else
23235 int offset = 0;
23237 /* Load the function address to r11. Try to load address using
23238 the shorter movl instead of movabs. We may want to support
23239 movq for kernel mode, but kernel does not use trampolines at
23240 the moment. */
23241 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
23243 fnaddr = copy_to_mode_reg (DImode, fnaddr);
23245 mem = adjust_address (m_tramp, HImode, offset);
23246 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
23248 mem = adjust_address (m_tramp, SImode, offset + 2);
23249 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
23250 offset += 6;
23252 else
23254 mem = adjust_address (m_tramp, HImode, offset);
23255 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
23257 mem = adjust_address (m_tramp, DImode, offset + 2);
23258 emit_move_insn (mem, fnaddr);
23259 offset += 10;
23262 /* Load static chain using movabs to r10. */
23263 mem = adjust_address (m_tramp, HImode, offset);
23264 emit_move_insn (mem, gen_int_mode (0xba49, HImode));
23266 mem = adjust_address (m_tramp, DImode, offset + 2);
23267 emit_move_insn (mem, chain_value);
23268 offset += 10;
23270 /* Jump to r11; the last (unused) byte is a nop, only there to
23271 pad the write out to a single 32-bit store. */
23272 mem = adjust_address (m_tramp, SImode, offset);
23273 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
23274 offset += 4;
23276 gcc_assert (offset <= TRAMPOLINE_SIZE);
23279 #ifdef ENABLE_EXECUTE_STACK
23280 #ifdef CHECK_EXECUTE_STACK_ENABLED
23281 if (CHECK_EXECUTE_STACK_ENABLED)
23282 #endif
23283 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
23284 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
23285 #endif
23288 /* The following file contains several enumerations and data structures
23289 built from the definitions in i386-builtin-types.def. */
23291 #include "i386-builtin-types.inc"
23293 /* Table for the ix86 builtin non-function types. */
23294 static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
23296 /* Retrieve an element from the above table, building some of
23297 the types lazily. */
23299 static tree
23300 ix86_get_builtin_type (enum ix86_builtin_type tcode)
23302 unsigned int index;
23303 tree type, itype;
23305 gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab));
23307 type = ix86_builtin_type_tab[(int) tcode];
23308 if (type != NULL)
23309 return type;
23311 gcc_assert (tcode > IX86_BT_LAST_PRIM);
23312 if (tcode <= IX86_BT_LAST_VECT)
23314 enum machine_mode mode;
23316 index = tcode - IX86_BT_LAST_PRIM - 1;
23317 itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]);
23318 mode = ix86_builtin_type_vect_mode[index];
23320 type = build_vector_type_for_mode (itype, mode);
23322 else
23324 int quals;
23326 index = tcode - IX86_BT_LAST_VECT - 1;
23327 if (tcode <= IX86_BT_LAST_PTR)
23328 quals = TYPE_UNQUALIFIED;
23329 else
23330 quals = TYPE_QUAL_CONST;
23332 itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]);
23333 if (quals != TYPE_UNQUALIFIED)
23334 itype = build_qualified_type (itype, quals);
23336 type = build_pointer_type (itype);
23339 ix86_builtin_type_tab[(int) tcode] = type;
23340 return type;
23343 /* Table for the ix86 builtin function types. */
23344 static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1];
23346 /* Retrieve an element from the above table, building some of
23347 the types lazily. */
23349 static tree
23350 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode)
23352 tree type;
23354 gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab));
23356 type = ix86_builtin_func_type_tab[(int) tcode];
23357 if (type != NULL)
23358 return type;
23360 if (tcode <= IX86_BT_LAST_FUNC)
23362 unsigned start = ix86_builtin_func_start[(int) tcode];
23363 unsigned after = ix86_builtin_func_start[(int) tcode + 1];
23364 tree rtype, atype, args = void_list_node;
23365 unsigned i;
23367 rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]);
23368 for (i = after - 1; i > start; --i)
23370 atype = ix86_get_builtin_type (ix86_builtin_func_args[i]);
23371 args = tree_cons (NULL, atype, args);
23374 type = build_function_type (rtype, args);
23376 else
23378 unsigned index = tcode - IX86_BT_LAST_FUNC - 1;
23379 enum ix86_builtin_func_type icode;
23381 icode = ix86_builtin_func_alias_base[index];
23382 type = ix86_get_builtin_func_type (icode);
23385 ix86_builtin_func_type_tab[(int) tcode] = type;
23386 return type;
23390 /* Codes for all the SSE/MMX builtins. */
23391 enum ix86_builtins
23393 IX86_BUILTIN_ADDPS,
23394 IX86_BUILTIN_ADDSS,
23395 IX86_BUILTIN_DIVPS,
23396 IX86_BUILTIN_DIVSS,
23397 IX86_BUILTIN_MULPS,
23398 IX86_BUILTIN_MULSS,
23399 IX86_BUILTIN_SUBPS,
23400 IX86_BUILTIN_SUBSS,
23402 IX86_BUILTIN_CMPEQPS,
23403 IX86_BUILTIN_CMPLTPS,
23404 IX86_BUILTIN_CMPLEPS,
23405 IX86_BUILTIN_CMPGTPS,
23406 IX86_BUILTIN_CMPGEPS,
23407 IX86_BUILTIN_CMPNEQPS,
23408 IX86_BUILTIN_CMPNLTPS,
23409 IX86_BUILTIN_CMPNLEPS,
23410 IX86_BUILTIN_CMPNGTPS,
23411 IX86_BUILTIN_CMPNGEPS,
23412 IX86_BUILTIN_CMPORDPS,
23413 IX86_BUILTIN_CMPUNORDPS,
23414 IX86_BUILTIN_CMPEQSS,
23415 IX86_BUILTIN_CMPLTSS,
23416 IX86_BUILTIN_CMPLESS,
23417 IX86_BUILTIN_CMPNEQSS,
23418 IX86_BUILTIN_CMPNLTSS,
23419 IX86_BUILTIN_CMPNLESS,
23420 IX86_BUILTIN_CMPNGTSS,
23421 IX86_BUILTIN_CMPNGESS,
23422 IX86_BUILTIN_CMPORDSS,
23423 IX86_BUILTIN_CMPUNORDSS,
23425 IX86_BUILTIN_COMIEQSS,
23426 IX86_BUILTIN_COMILTSS,
23427 IX86_BUILTIN_COMILESS,
23428 IX86_BUILTIN_COMIGTSS,
23429 IX86_BUILTIN_COMIGESS,
23430 IX86_BUILTIN_COMINEQSS,
23431 IX86_BUILTIN_UCOMIEQSS,
23432 IX86_BUILTIN_UCOMILTSS,
23433 IX86_BUILTIN_UCOMILESS,
23434 IX86_BUILTIN_UCOMIGTSS,
23435 IX86_BUILTIN_UCOMIGESS,
23436 IX86_BUILTIN_UCOMINEQSS,
23438 IX86_BUILTIN_CVTPI2PS,
23439 IX86_BUILTIN_CVTPS2PI,
23440 IX86_BUILTIN_CVTSI2SS,
23441 IX86_BUILTIN_CVTSI642SS,
23442 IX86_BUILTIN_CVTSS2SI,
23443 IX86_BUILTIN_CVTSS2SI64,
23444 IX86_BUILTIN_CVTTPS2PI,
23445 IX86_BUILTIN_CVTTSS2SI,
23446 IX86_BUILTIN_CVTTSS2SI64,
23448 IX86_BUILTIN_MAXPS,
23449 IX86_BUILTIN_MAXSS,
23450 IX86_BUILTIN_MINPS,
23451 IX86_BUILTIN_MINSS,
23453 IX86_BUILTIN_LOADUPS,
23454 IX86_BUILTIN_STOREUPS,
23455 IX86_BUILTIN_MOVSS,
23457 IX86_BUILTIN_MOVHLPS,
23458 IX86_BUILTIN_MOVLHPS,
23459 IX86_BUILTIN_LOADHPS,
23460 IX86_BUILTIN_LOADLPS,
23461 IX86_BUILTIN_STOREHPS,
23462 IX86_BUILTIN_STORELPS,
23464 IX86_BUILTIN_MASKMOVQ,
23465 IX86_BUILTIN_MOVMSKPS,
23466 IX86_BUILTIN_PMOVMSKB,
23468 IX86_BUILTIN_MOVNTPS,
23469 IX86_BUILTIN_MOVNTQ,
23471 IX86_BUILTIN_LOADDQU,
23472 IX86_BUILTIN_STOREDQU,
23474 IX86_BUILTIN_PACKSSWB,
23475 IX86_BUILTIN_PACKSSDW,
23476 IX86_BUILTIN_PACKUSWB,
23478 IX86_BUILTIN_PADDB,
23479 IX86_BUILTIN_PADDW,
23480 IX86_BUILTIN_PADDD,
23481 IX86_BUILTIN_PADDQ,
23482 IX86_BUILTIN_PADDSB,
23483 IX86_BUILTIN_PADDSW,
23484 IX86_BUILTIN_PADDUSB,
23485 IX86_BUILTIN_PADDUSW,
23486 IX86_BUILTIN_PSUBB,
23487 IX86_BUILTIN_PSUBW,
23488 IX86_BUILTIN_PSUBD,
23489 IX86_BUILTIN_PSUBQ,
23490 IX86_BUILTIN_PSUBSB,
23491 IX86_BUILTIN_PSUBSW,
23492 IX86_BUILTIN_PSUBUSB,
23493 IX86_BUILTIN_PSUBUSW,
23495 IX86_BUILTIN_PAND,
23496 IX86_BUILTIN_PANDN,
23497 IX86_BUILTIN_POR,
23498 IX86_BUILTIN_PXOR,
23500 IX86_BUILTIN_PAVGB,
23501 IX86_BUILTIN_PAVGW,
23503 IX86_BUILTIN_PCMPEQB,
23504 IX86_BUILTIN_PCMPEQW,
23505 IX86_BUILTIN_PCMPEQD,
23506 IX86_BUILTIN_PCMPGTB,
23507 IX86_BUILTIN_PCMPGTW,
23508 IX86_BUILTIN_PCMPGTD,
23510 IX86_BUILTIN_PMADDWD,
23512 IX86_BUILTIN_PMAXSW,
23513 IX86_BUILTIN_PMAXUB,
23514 IX86_BUILTIN_PMINSW,
23515 IX86_BUILTIN_PMINUB,
23517 IX86_BUILTIN_PMULHUW,
23518 IX86_BUILTIN_PMULHW,
23519 IX86_BUILTIN_PMULLW,
23521 IX86_BUILTIN_PSADBW,
23522 IX86_BUILTIN_PSHUFW,
23524 IX86_BUILTIN_PSLLW,
23525 IX86_BUILTIN_PSLLD,
23526 IX86_BUILTIN_PSLLQ,
23527 IX86_BUILTIN_PSRAW,
23528 IX86_BUILTIN_PSRAD,
23529 IX86_BUILTIN_PSRLW,
23530 IX86_BUILTIN_PSRLD,
23531 IX86_BUILTIN_PSRLQ,
23532 IX86_BUILTIN_PSLLWI,
23533 IX86_BUILTIN_PSLLDI,
23534 IX86_BUILTIN_PSLLQI,
23535 IX86_BUILTIN_PSRAWI,
23536 IX86_BUILTIN_PSRADI,
23537 IX86_BUILTIN_PSRLWI,
23538 IX86_BUILTIN_PSRLDI,
23539 IX86_BUILTIN_PSRLQI,
23541 IX86_BUILTIN_PUNPCKHBW,
23542 IX86_BUILTIN_PUNPCKHWD,
23543 IX86_BUILTIN_PUNPCKHDQ,
23544 IX86_BUILTIN_PUNPCKLBW,
23545 IX86_BUILTIN_PUNPCKLWD,
23546 IX86_BUILTIN_PUNPCKLDQ,
23548 IX86_BUILTIN_SHUFPS,
23550 IX86_BUILTIN_RCPPS,
23551 IX86_BUILTIN_RCPSS,
23552 IX86_BUILTIN_RSQRTPS,
23553 IX86_BUILTIN_RSQRTPS_NR,
23554 IX86_BUILTIN_RSQRTSS,
23555 IX86_BUILTIN_RSQRTF,
23556 IX86_BUILTIN_SQRTPS,
23557 IX86_BUILTIN_SQRTPS_NR,
23558 IX86_BUILTIN_SQRTSS,
23560 IX86_BUILTIN_UNPCKHPS,
23561 IX86_BUILTIN_UNPCKLPS,
23563 IX86_BUILTIN_ANDPS,
23564 IX86_BUILTIN_ANDNPS,
23565 IX86_BUILTIN_ORPS,
23566 IX86_BUILTIN_XORPS,
23568 IX86_BUILTIN_EMMS,
23569 IX86_BUILTIN_LDMXCSR,
23570 IX86_BUILTIN_STMXCSR,
23571 IX86_BUILTIN_SFENCE,
23573 /* 3DNow! Original */
23574 IX86_BUILTIN_FEMMS,
23575 IX86_BUILTIN_PAVGUSB,
23576 IX86_BUILTIN_PF2ID,
23577 IX86_BUILTIN_PFACC,
23578 IX86_BUILTIN_PFADD,
23579 IX86_BUILTIN_PFCMPEQ,
23580 IX86_BUILTIN_PFCMPGE,
23581 IX86_BUILTIN_PFCMPGT,
23582 IX86_BUILTIN_PFMAX,
23583 IX86_BUILTIN_PFMIN,
23584 IX86_BUILTIN_PFMUL,
23585 IX86_BUILTIN_PFRCP,
23586 IX86_BUILTIN_PFRCPIT1,
23587 IX86_BUILTIN_PFRCPIT2,
23588 IX86_BUILTIN_PFRSQIT1,
23589 IX86_BUILTIN_PFRSQRT,
23590 IX86_BUILTIN_PFSUB,
23591 IX86_BUILTIN_PFSUBR,
23592 IX86_BUILTIN_PI2FD,
23593 IX86_BUILTIN_PMULHRW,
23595 /* 3DNow! Athlon Extensions */
23596 IX86_BUILTIN_PF2IW,
23597 IX86_BUILTIN_PFNACC,
23598 IX86_BUILTIN_PFPNACC,
23599 IX86_BUILTIN_PI2FW,
23600 IX86_BUILTIN_PSWAPDSI,
23601 IX86_BUILTIN_PSWAPDSF,
23603 /* SSE2 */
23604 IX86_BUILTIN_ADDPD,
23605 IX86_BUILTIN_ADDSD,
23606 IX86_BUILTIN_DIVPD,
23607 IX86_BUILTIN_DIVSD,
23608 IX86_BUILTIN_MULPD,
23609 IX86_BUILTIN_MULSD,
23610 IX86_BUILTIN_SUBPD,
23611 IX86_BUILTIN_SUBSD,
23613 IX86_BUILTIN_CMPEQPD,
23614 IX86_BUILTIN_CMPLTPD,
23615 IX86_BUILTIN_CMPLEPD,
23616 IX86_BUILTIN_CMPGTPD,
23617 IX86_BUILTIN_CMPGEPD,
23618 IX86_BUILTIN_CMPNEQPD,
23619 IX86_BUILTIN_CMPNLTPD,
23620 IX86_BUILTIN_CMPNLEPD,
23621 IX86_BUILTIN_CMPNGTPD,
23622 IX86_BUILTIN_CMPNGEPD,
23623 IX86_BUILTIN_CMPORDPD,
23624 IX86_BUILTIN_CMPUNORDPD,
23625 IX86_BUILTIN_CMPEQSD,
23626 IX86_BUILTIN_CMPLTSD,
23627 IX86_BUILTIN_CMPLESD,
23628 IX86_BUILTIN_CMPNEQSD,
23629 IX86_BUILTIN_CMPNLTSD,
23630 IX86_BUILTIN_CMPNLESD,
23631 IX86_BUILTIN_CMPORDSD,
23632 IX86_BUILTIN_CMPUNORDSD,
23634 IX86_BUILTIN_COMIEQSD,
23635 IX86_BUILTIN_COMILTSD,
23636 IX86_BUILTIN_COMILESD,
23637 IX86_BUILTIN_COMIGTSD,
23638 IX86_BUILTIN_COMIGESD,
23639 IX86_BUILTIN_COMINEQSD,
23640 IX86_BUILTIN_UCOMIEQSD,
23641 IX86_BUILTIN_UCOMILTSD,
23642 IX86_BUILTIN_UCOMILESD,
23643 IX86_BUILTIN_UCOMIGTSD,
23644 IX86_BUILTIN_UCOMIGESD,
23645 IX86_BUILTIN_UCOMINEQSD,
23647 IX86_BUILTIN_MAXPD,
23648 IX86_BUILTIN_MAXSD,
23649 IX86_BUILTIN_MINPD,
23650 IX86_BUILTIN_MINSD,
23652 IX86_BUILTIN_ANDPD,
23653 IX86_BUILTIN_ANDNPD,
23654 IX86_BUILTIN_ORPD,
23655 IX86_BUILTIN_XORPD,
23657 IX86_BUILTIN_SQRTPD,
23658 IX86_BUILTIN_SQRTSD,
23660 IX86_BUILTIN_UNPCKHPD,
23661 IX86_BUILTIN_UNPCKLPD,
23663 IX86_BUILTIN_SHUFPD,
23665 IX86_BUILTIN_LOADUPD,
23666 IX86_BUILTIN_STOREUPD,
23667 IX86_BUILTIN_MOVSD,
23669 IX86_BUILTIN_LOADHPD,
23670 IX86_BUILTIN_LOADLPD,
23672 IX86_BUILTIN_CVTDQ2PD,
23673 IX86_BUILTIN_CVTDQ2PS,
23675 IX86_BUILTIN_CVTPD2DQ,
23676 IX86_BUILTIN_CVTPD2PI,
23677 IX86_BUILTIN_CVTPD2PS,
23678 IX86_BUILTIN_CVTTPD2DQ,
23679 IX86_BUILTIN_CVTTPD2PI,
23681 IX86_BUILTIN_CVTPI2PD,
23682 IX86_BUILTIN_CVTSI2SD,
23683 IX86_BUILTIN_CVTSI642SD,
23685 IX86_BUILTIN_CVTSD2SI,
23686 IX86_BUILTIN_CVTSD2SI64,
23687 IX86_BUILTIN_CVTSD2SS,
23688 IX86_BUILTIN_CVTSS2SD,
23689 IX86_BUILTIN_CVTTSD2SI,
23690 IX86_BUILTIN_CVTTSD2SI64,
23692 IX86_BUILTIN_CVTPS2DQ,
23693 IX86_BUILTIN_CVTPS2PD,
23694 IX86_BUILTIN_CVTTPS2DQ,
23696 IX86_BUILTIN_MOVNTI,
23697 IX86_BUILTIN_MOVNTPD,
23698 IX86_BUILTIN_MOVNTDQ,
23700 IX86_BUILTIN_MOVQ128,
23702 /* SSE2 MMX */
23703 IX86_BUILTIN_MASKMOVDQU,
23704 IX86_BUILTIN_MOVMSKPD,
23705 IX86_BUILTIN_PMOVMSKB128,
23707 IX86_BUILTIN_PACKSSWB128,
23708 IX86_BUILTIN_PACKSSDW128,
23709 IX86_BUILTIN_PACKUSWB128,
23711 IX86_BUILTIN_PADDB128,
23712 IX86_BUILTIN_PADDW128,
23713 IX86_BUILTIN_PADDD128,
23714 IX86_BUILTIN_PADDQ128,
23715 IX86_BUILTIN_PADDSB128,
23716 IX86_BUILTIN_PADDSW128,
23717 IX86_BUILTIN_PADDUSB128,
23718 IX86_BUILTIN_PADDUSW128,
23719 IX86_BUILTIN_PSUBB128,
23720 IX86_BUILTIN_PSUBW128,
23721 IX86_BUILTIN_PSUBD128,
23722 IX86_BUILTIN_PSUBQ128,
23723 IX86_BUILTIN_PSUBSB128,
23724 IX86_BUILTIN_PSUBSW128,
23725 IX86_BUILTIN_PSUBUSB128,
23726 IX86_BUILTIN_PSUBUSW128,
23728 IX86_BUILTIN_PAND128,
23729 IX86_BUILTIN_PANDN128,
23730 IX86_BUILTIN_POR128,
23731 IX86_BUILTIN_PXOR128,
23733 IX86_BUILTIN_PAVGB128,
23734 IX86_BUILTIN_PAVGW128,
23736 IX86_BUILTIN_PCMPEQB128,
23737 IX86_BUILTIN_PCMPEQW128,
23738 IX86_BUILTIN_PCMPEQD128,
23739 IX86_BUILTIN_PCMPGTB128,
23740 IX86_BUILTIN_PCMPGTW128,
23741 IX86_BUILTIN_PCMPGTD128,
23743 IX86_BUILTIN_PMADDWD128,
23745 IX86_BUILTIN_PMAXSW128,
23746 IX86_BUILTIN_PMAXUB128,
23747 IX86_BUILTIN_PMINSW128,
23748 IX86_BUILTIN_PMINUB128,
23750 IX86_BUILTIN_PMULUDQ,
23751 IX86_BUILTIN_PMULUDQ128,
23752 IX86_BUILTIN_PMULHUW128,
23753 IX86_BUILTIN_PMULHW128,
23754 IX86_BUILTIN_PMULLW128,
23756 IX86_BUILTIN_PSADBW128,
23757 IX86_BUILTIN_PSHUFHW,
23758 IX86_BUILTIN_PSHUFLW,
23759 IX86_BUILTIN_PSHUFD,
23761 IX86_BUILTIN_PSLLDQI128,
23762 IX86_BUILTIN_PSLLWI128,
23763 IX86_BUILTIN_PSLLDI128,
23764 IX86_BUILTIN_PSLLQI128,
23765 IX86_BUILTIN_PSRAWI128,
23766 IX86_BUILTIN_PSRADI128,
23767 IX86_BUILTIN_PSRLDQI128,
23768 IX86_BUILTIN_PSRLWI128,
23769 IX86_BUILTIN_PSRLDI128,
23770 IX86_BUILTIN_PSRLQI128,
23772 IX86_BUILTIN_PSLLDQ128,
23773 IX86_BUILTIN_PSLLW128,
23774 IX86_BUILTIN_PSLLD128,
23775 IX86_BUILTIN_PSLLQ128,
23776 IX86_BUILTIN_PSRAW128,
23777 IX86_BUILTIN_PSRAD128,
23778 IX86_BUILTIN_PSRLW128,
23779 IX86_BUILTIN_PSRLD128,
23780 IX86_BUILTIN_PSRLQ128,
23782 IX86_BUILTIN_PUNPCKHBW128,
23783 IX86_BUILTIN_PUNPCKHWD128,
23784 IX86_BUILTIN_PUNPCKHDQ128,
23785 IX86_BUILTIN_PUNPCKHQDQ128,
23786 IX86_BUILTIN_PUNPCKLBW128,
23787 IX86_BUILTIN_PUNPCKLWD128,
23788 IX86_BUILTIN_PUNPCKLDQ128,
23789 IX86_BUILTIN_PUNPCKLQDQ128,
23791 IX86_BUILTIN_CLFLUSH,
23792 IX86_BUILTIN_MFENCE,
23793 IX86_BUILTIN_LFENCE,
23795 IX86_BUILTIN_BSRSI,
23796 IX86_BUILTIN_BSRDI,
23797 IX86_BUILTIN_RDPMC,
23798 IX86_BUILTIN_RDTSC,
23799 IX86_BUILTIN_RDTSCP,
23800 IX86_BUILTIN_ROLQI,
23801 IX86_BUILTIN_ROLHI,
23802 IX86_BUILTIN_RORQI,
23803 IX86_BUILTIN_RORHI,
23805 /* SSE3. */
23806 IX86_BUILTIN_ADDSUBPS,
23807 IX86_BUILTIN_HADDPS,
23808 IX86_BUILTIN_HSUBPS,
23809 IX86_BUILTIN_MOVSHDUP,
23810 IX86_BUILTIN_MOVSLDUP,
23811 IX86_BUILTIN_ADDSUBPD,
23812 IX86_BUILTIN_HADDPD,
23813 IX86_BUILTIN_HSUBPD,
23814 IX86_BUILTIN_LDDQU,
23816 IX86_BUILTIN_MONITOR,
23817 IX86_BUILTIN_MWAIT,
23819 /* SSSE3. */
23820 IX86_BUILTIN_PHADDW,
23821 IX86_BUILTIN_PHADDD,
23822 IX86_BUILTIN_PHADDSW,
23823 IX86_BUILTIN_PHSUBW,
23824 IX86_BUILTIN_PHSUBD,
23825 IX86_BUILTIN_PHSUBSW,
23826 IX86_BUILTIN_PMADDUBSW,
23827 IX86_BUILTIN_PMULHRSW,
23828 IX86_BUILTIN_PSHUFB,
23829 IX86_BUILTIN_PSIGNB,
23830 IX86_BUILTIN_PSIGNW,
23831 IX86_BUILTIN_PSIGND,
23832 IX86_BUILTIN_PALIGNR,
23833 IX86_BUILTIN_PABSB,
23834 IX86_BUILTIN_PABSW,
23835 IX86_BUILTIN_PABSD,
23837 IX86_BUILTIN_PHADDW128,
23838 IX86_BUILTIN_PHADDD128,
23839 IX86_BUILTIN_PHADDSW128,
23840 IX86_BUILTIN_PHSUBW128,
23841 IX86_BUILTIN_PHSUBD128,
23842 IX86_BUILTIN_PHSUBSW128,
23843 IX86_BUILTIN_PMADDUBSW128,
23844 IX86_BUILTIN_PMULHRSW128,
23845 IX86_BUILTIN_PSHUFB128,
23846 IX86_BUILTIN_PSIGNB128,
23847 IX86_BUILTIN_PSIGNW128,
23848 IX86_BUILTIN_PSIGND128,
23849 IX86_BUILTIN_PALIGNR128,
23850 IX86_BUILTIN_PABSB128,
23851 IX86_BUILTIN_PABSW128,
23852 IX86_BUILTIN_PABSD128,
23854 /* AMDFAM10 - SSE4A New Instructions. */
23855 IX86_BUILTIN_MOVNTSD,
23856 IX86_BUILTIN_MOVNTSS,
23857 IX86_BUILTIN_EXTRQI,
23858 IX86_BUILTIN_EXTRQ,
23859 IX86_BUILTIN_INSERTQI,
23860 IX86_BUILTIN_INSERTQ,
23862 /* SSE4.1. */
23863 IX86_BUILTIN_BLENDPD,
23864 IX86_BUILTIN_BLENDPS,
23865 IX86_BUILTIN_BLENDVPD,
23866 IX86_BUILTIN_BLENDVPS,
23867 IX86_BUILTIN_PBLENDVB128,
23868 IX86_BUILTIN_PBLENDW128,
23870 IX86_BUILTIN_DPPD,
23871 IX86_BUILTIN_DPPS,
23873 IX86_BUILTIN_INSERTPS128,
23875 IX86_BUILTIN_MOVNTDQA,
23876 IX86_BUILTIN_MPSADBW128,
23877 IX86_BUILTIN_PACKUSDW128,
23878 IX86_BUILTIN_PCMPEQQ,
23879 IX86_BUILTIN_PHMINPOSUW128,
23881 IX86_BUILTIN_PMAXSB128,
23882 IX86_BUILTIN_PMAXSD128,
23883 IX86_BUILTIN_PMAXUD128,
23884 IX86_BUILTIN_PMAXUW128,
23886 IX86_BUILTIN_PMINSB128,
23887 IX86_BUILTIN_PMINSD128,
23888 IX86_BUILTIN_PMINUD128,
23889 IX86_BUILTIN_PMINUW128,
23891 IX86_BUILTIN_PMOVSXBW128,
23892 IX86_BUILTIN_PMOVSXBD128,
23893 IX86_BUILTIN_PMOVSXBQ128,
23894 IX86_BUILTIN_PMOVSXWD128,
23895 IX86_BUILTIN_PMOVSXWQ128,
23896 IX86_BUILTIN_PMOVSXDQ128,
23898 IX86_BUILTIN_PMOVZXBW128,
23899 IX86_BUILTIN_PMOVZXBD128,
23900 IX86_BUILTIN_PMOVZXBQ128,
23901 IX86_BUILTIN_PMOVZXWD128,
23902 IX86_BUILTIN_PMOVZXWQ128,
23903 IX86_BUILTIN_PMOVZXDQ128,
23905 IX86_BUILTIN_PMULDQ128,
23906 IX86_BUILTIN_PMULLD128,
23908 IX86_BUILTIN_ROUNDPD,
23909 IX86_BUILTIN_ROUNDPS,
23910 IX86_BUILTIN_ROUNDSD,
23911 IX86_BUILTIN_ROUNDSS,
23913 IX86_BUILTIN_PTESTZ,
23914 IX86_BUILTIN_PTESTC,
23915 IX86_BUILTIN_PTESTNZC,
23917 IX86_BUILTIN_VEC_INIT_V2SI,
23918 IX86_BUILTIN_VEC_INIT_V4HI,
23919 IX86_BUILTIN_VEC_INIT_V8QI,
23920 IX86_BUILTIN_VEC_EXT_V2DF,
23921 IX86_BUILTIN_VEC_EXT_V2DI,
23922 IX86_BUILTIN_VEC_EXT_V4SF,
23923 IX86_BUILTIN_VEC_EXT_V4SI,
23924 IX86_BUILTIN_VEC_EXT_V8HI,
23925 IX86_BUILTIN_VEC_EXT_V2SI,
23926 IX86_BUILTIN_VEC_EXT_V4HI,
23927 IX86_BUILTIN_VEC_EXT_V16QI,
23928 IX86_BUILTIN_VEC_SET_V2DI,
23929 IX86_BUILTIN_VEC_SET_V4SF,
23930 IX86_BUILTIN_VEC_SET_V4SI,
23931 IX86_BUILTIN_VEC_SET_V8HI,
23932 IX86_BUILTIN_VEC_SET_V4HI,
23933 IX86_BUILTIN_VEC_SET_V16QI,
23935 IX86_BUILTIN_VEC_PACK_SFIX,
23937 /* SSE4.2. */
23938 IX86_BUILTIN_CRC32QI,
23939 IX86_BUILTIN_CRC32HI,
23940 IX86_BUILTIN_CRC32SI,
23941 IX86_BUILTIN_CRC32DI,
23943 IX86_BUILTIN_PCMPESTRI128,
23944 IX86_BUILTIN_PCMPESTRM128,
23945 IX86_BUILTIN_PCMPESTRA128,
23946 IX86_BUILTIN_PCMPESTRC128,
23947 IX86_BUILTIN_PCMPESTRO128,
23948 IX86_BUILTIN_PCMPESTRS128,
23949 IX86_BUILTIN_PCMPESTRZ128,
23950 IX86_BUILTIN_PCMPISTRI128,
23951 IX86_BUILTIN_PCMPISTRM128,
23952 IX86_BUILTIN_PCMPISTRA128,
23953 IX86_BUILTIN_PCMPISTRC128,
23954 IX86_BUILTIN_PCMPISTRO128,
23955 IX86_BUILTIN_PCMPISTRS128,
23956 IX86_BUILTIN_PCMPISTRZ128,
23958 IX86_BUILTIN_PCMPGTQ,
23960 /* AES instructions */
23961 IX86_BUILTIN_AESENC128,
23962 IX86_BUILTIN_AESENCLAST128,
23963 IX86_BUILTIN_AESDEC128,
23964 IX86_BUILTIN_AESDECLAST128,
23965 IX86_BUILTIN_AESIMC128,
23966 IX86_BUILTIN_AESKEYGENASSIST128,
23968 /* PCLMUL instruction */
23969 IX86_BUILTIN_PCLMULQDQ128,
23971 /* AVX */
23972 IX86_BUILTIN_ADDPD256,
23973 IX86_BUILTIN_ADDPS256,
23974 IX86_BUILTIN_ADDSUBPD256,
23975 IX86_BUILTIN_ADDSUBPS256,
23976 IX86_BUILTIN_ANDPD256,
23977 IX86_BUILTIN_ANDPS256,
23978 IX86_BUILTIN_ANDNPD256,
23979 IX86_BUILTIN_ANDNPS256,
23980 IX86_BUILTIN_BLENDPD256,
23981 IX86_BUILTIN_BLENDPS256,
23982 IX86_BUILTIN_BLENDVPD256,
23983 IX86_BUILTIN_BLENDVPS256,
23984 IX86_BUILTIN_DIVPD256,
23985 IX86_BUILTIN_DIVPS256,
23986 IX86_BUILTIN_DPPS256,
23987 IX86_BUILTIN_HADDPD256,
23988 IX86_BUILTIN_HADDPS256,
23989 IX86_BUILTIN_HSUBPD256,
23990 IX86_BUILTIN_HSUBPS256,
23991 IX86_BUILTIN_MAXPD256,
23992 IX86_BUILTIN_MAXPS256,
23993 IX86_BUILTIN_MINPD256,
23994 IX86_BUILTIN_MINPS256,
23995 IX86_BUILTIN_MULPD256,
23996 IX86_BUILTIN_MULPS256,
23997 IX86_BUILTIN_ORPD256,
23998 IX86_BUILTIN_ORPS256,
23999 IX86_BUILTIN_SHUFPD256,
24000 IX86_BUILTIN_SHUFPS256,
24001 IX86_BUILTIN_SUBPD256,
24002 IX86_BUILTIN_SUBPS256,
24003 IX86_BUILTIN_XORPD256,
24004 IX86_BUILTIN_XORPS256,
24005 IX86_BUILTIN_CMPSD,
24006 IX86_BUILTIN_CMPSS,
24007 IX86_BUILTIN_CMPPD,
24008 IX86_BUILTIN_CMPPS,
24009 IX86_BUILTIN_CMPPD256,
24010 IX86_BUILTIN_CMPPS256,
24011 IX86_BUILTIN_CVTDQ2PD256,
24012 IX86_BUILTIN_CVTDQ2PS256,
24013 IX86_BUILTIN_CVTPD2PS256,
24014 IX86_BUILTIN_CVTPS2DQ256,
24015 IX86_BUILTIN_CVTPS2PD256,
24016 IX86_BUILTIN_CVTTPD2DQ256,
24017 IX86_BUILTIN_CVTPD2DQ256,
24018 IX86_BUILTIN_CVTTPS2DQ256,
24019 IX86_BUILTIN_EXTRACTF128PD256,
24020 IX86_BUILTIN_EXTRACTF128PS256,
24021 IX86_BUILTIN_EXTRACTF128SI256,
24022 IX86_BUILTIN_VZEROALL,
24023 IX86_BUILTIN_VZEROUPPER,
24024 IX86_BUILTIN_VPERMILVARPD,
24025 IX86_BUILTIN_VPERMILVARPS,
24026 IX86_BUILTIN_VPERMILVARPD256,
24027 IX86_BUILTIN_VPERMILVARPS256,
24028 IX86_BUILTIN_VPERMILPD,
24029 IX86_BUILTIN_VPERMILPS,
24030 IX86_BUILTIN_VPERMILPD256,
24031 IX86_BUILTIN_VPERMILPS256,
24032 IX86_BUILTIN_VPERMIL2PD,
24033 IX86_BUILTIN_VPERMIL2PS,
24034 IX86_BUILTIN_VPERMIL2PD256,
24035 IX86_BUILTIN_VPERMIL2PS256,
24036 IX86_BUILTIN_VPERM2F128PD256,
24037 IX86_BUILTIN_VPERM2F128PS256,
24038 IX86_BUILTIN_VPERM2F128SI256,
24039 IX86_BUILTIN_VBROADCASTSS,
24040 IX86_BUILTIN_VBROADCASTSD256,
24041 IX86_BUILTIN_VBROADCASTSS256,
24042 IX86_BUILTIN_VBROADCASTPD256,
24043 IX86_BUILTIN_VBROADCASTPS256,
24044 IX86_BUILTIN_VINSERTF128PD256,
24045 IX86_BUILTIN_VINSERTF128PS256,
24046 IX86_BUILTIN_VINSERTF128SI256,
24047 IX86_BUILTIN_LOADUPD256,
24048 IX86_BUILTIN_LOADUPS256,
24049 IX86_BUILTIN_STOREUPD256,
24050 IX86_BUILTIN_STOREUPS256,
24051 IX86_BUILTIN_LDDQU256,
24052 IX86_BUILTIN_MOVNTDQ256,
24053 IX86_BUILTIN_MOVNTPD256,
24054 IX86_BUILTIN_MOVNTPS256,
24055 IX86_BUILTIN_LOADDQU256,
24056 IX86_BUILTIN_STOREDQU256,
24057 IX86_BUILTIN_MASKLOADPD,
24058 IX86_BUILTIN_MASKLOADPS,
24059 IX86_BUILTIN_MASKSTOREPD,
24060 IX86_BUILTIN_MASKSTOREPS,
24061 IX86_BUILTIN_MASKLOADPD256,
24062 IX86_BUILTIN_MASKLOADPS256,
24063 IX86_BUILTIN_MASKSTOREPD256,
24064 IX86_BUILTIN_MASKSTOREPS256,
24065 IX86_BUILTIN_MOVSHDUP256,
24066 IX86_BUILTIN_MOVSLDUP256,
24067 IX86_BUILTIN_MOVDDUP256,
24069 IX86_BUILTIN_SQRTPD256,
24070 IX86_BUILTIN_SQRTPS256,
24071 IX86_BUILTIN_SQRTPS_NR256,
24072 IX86_BUILTIN_RSQRTPS256,
24073 IX86_BUILTIN_RSQRTPS_NR256,
24075 IX86_BUILTIN_RCPPS256,
24077 IX86_BUILTIN_ROUNDPD256,
24078 IX86_BUILTIN_ROUNDPS256,
24080 IX86_BUILTIN_UNPCKHPD256,
24081 IX86_BUILTIN_UNPCKLPD256,
24082 IX86_BUILTIN_UNPCKHPS256,
24083 IX86_BUILTIN_UNPCKLPS256,
24085 IX86_BUILTIN_SI256_SI,
24086 IX86_BUILTIN_PS256_PS,
24087 IX86_BUILTIN_PD256_PD,
24088 IX86_BUILTIN_SI_SI256,
24089 IX86_BUILTIN_PS_PS256,
24090 IX86_BUILTIN_PD_PD256,
24092 IX86_BUILTIN_VTESTZPD,
24093 IX86_BUILTIN_VTESTCPD,
24094 IX86_BUILTIN_VTESTNZCPD,
24095 IX86_BUILTIN_VTESTZPS,
24096 IX86_BUILTIN_VTESTCPS,
24097 IX86_BUILTIN_VTESTNZCPS,
24098 IX86_BUILTIN_VTESTZPD256,
24099 IX86_BUILTIN_VTESTCPD256,
24100 IX86_BUILTIN_VTESTNZCPD256,
24101 IX86_BUILTIN_VTESTZPS256,
24102 IX86_BUILTIN_VTESTCPS256,
24103 IX86_BUILTIN_VTESTNZCPS256,
24104 IX86_BUILTIN_PTESTZ256,
24105 IX86_BUILTIN_PTESTC256,
24106 IX86_BUILTIN_PTESTNZC256,
24108 IX86_BUILTIN_MOVMSKPD256,
24109 IX86_BUILTIN_MOVMSKPS256,
24111 /* TFmode support builtins. */
24112 IX86_BUILTIN_INFQ,
24113 IX86_BUILTIN_HUGE_VALQ,
24114 IX86_BUILTIN_FABSQ,
24115 IX86_BUILTIN_COPYSIGNQ,
24117 /* Vectorizer support builtins. */
24118 IX86_BUILTIN_CPYSGNPS,
24119 IX86_BUILTIN_CPYSGNPD,
24120 IX86_BUILTIN_CPYSGNPS256,
24121 IX86_BUILTIN_CPYSGNPD256,
24123 IX86_BUILTIN_CVTUDQ2PS,
24125 IX86_BUILTIN_VEC_PERM_V2DF,
24126 IX86_BUILTIN_VEC_PERM_V4SF,
24127 IX86_BUILTIN_VEC_PERM_V2DI,
24128 IX86_BUILTIN_VEC_PERM_V4SI,
24129 IX86_BUILTIN_VEC_PERM_V8HI,
24130 IX86_BUILTIN_VEC_PERM_V16QI,
24131 IX86_BUILTIN_VEC_PERM_V2DI_U,
24132 IX86_BUILTIN_VEC_PERM_V4SI_U,
24133 IX86_BUILTIN_VEC_PERM_V8HI_U,
24134 IX86_BUILTIN_VEC_PERM_V16QI_U,
24135 IX86_BUILTIN_VEC_PERM_V4DF,
24136 IX86_BUILTIN_VEC_PERM_V8SF,
24138 /* FMA4 and XOP instructions. */
24139 IX86_BUILTIN_VFMADDSS,
24140 IX86_BUILTIN_VFMADDSD,
24141 IX86_BUILTIN_VFMADDPS,
24142 IX86_BUILTIN_VFMADDPD,
24143 IX86_BUILTIN_VFMADDPS256,
24144 IX86_BUILTIN_VFMADDPD256,
24145 IX86_BUILTIN_VFMADDSUBPS,
24146 IX86_BUILTIN_VFMADDSUBPD,
24147 IX86_BUILTIN_VFMADDSUBPS256,
24148 IX86_BUILTIN_VFMADDSUBPD256,
24150 IX86_BUILTIN_VPCMOV,
24151 IX86_BUILTIN_VPCMOV_V2DI,
24152 IX86_BUILTIN_VPCMOV_V4SI,
24153 IX86_BUILTIN_VPCMOV_V8HI,
24154 IX86_BUILTIN_VPCMOV_V16QI,
24155 IX86_BUILTIN_VPCMOV_V4SF,
24156 IX86_BUILTIN_VPCMOV_V2DF,
24157 IX86_BUILTIN_VPCMOV256,
24158 IX86_BUILTIN_VPCMOV_V4DI256,
24159 IX86_BUILTIN_VPCMOV_V8SI256,
24160 IX86_BUILTIN_VPCMOV_V16HI256,
24161 IX86_BUILTIN_VPCMOV_V32QI256,
24162 IX86_BUILTIN_VPCMOV_V8SF256,
24163 IX86_BUILTIN_VPCMOV_V4DF256,
24165 IX86_BUILTIN_VPPERM,
24167 IX86_BUILTIN_VPMACSSWW,
24168 IX86_BUILTIN_VPMACSWW,
24169 IX86_BUILTIN_VPMACSSWD,
24170 IX86_BUILTIN_VPMACSWD,
24171 IX86_BUILTIN_VPMACSSDD,
24172 IX86_BUILTIN_VPMACSDD,
24173 IX86_BUILTIN_VPMACSSDQL,
24174 IX86_BUILTIN_VPMACSSDQH,
24175 IX86_BUILTIN_VPMACSDQL,
24176 IX86_BUILTIN_VPMACSDQH,
24177 IX86_BUILTIN_VPMADCSSWD,
24178 IX86_BUILTIN_VPMADCSWD,
24180 IX86_BUILTIN_VPHADDBW,
24181 IX86_BUILTIN_VPHADDBD,
24182 IX86_BUILTIN_VPHADDBQ,
24183 IX86_BUILTIN_VPHADDWD,
24184 IX86_BUILTIN_VPHADDWQ,
24185 IX86_BUILTIN_VPHADDDQ,
24186 IX86_BUILTIN_VPHADDUBW,
24187 IX86_BUILTIN_VPHADDUBD,
24188 IX86_BUILTIN_VPHADDUBQ,
24189 IX86_BUILTIN_VPHADDUWD,
24190 IX86_BUILTIN_VPHADDUWQ,
24191 IX86_BUILTIN_VPHADDUDQ,
24192 IX86_BUILTIN_VPHSUBBW,
24193 IX86_BUILTIN_VPHSUBWD,
24194 IX86_BUILTIN_VPHSUBDQ,
24196 IX86_BUILTIN_VPROTB,
24197 IX86_BUILTIN_VPROTW,
24198 IX86_BUILTIN_VPROTD,
24199 IX86_BUILTIN_VPROTQ,
24200 IX86_BUILTIN_VPROTB_IMM,
24201 IX86_BUILTIN_VPROTW_IMM,
24202 IX86_BUILTIN_VPROTD_IMM,
24203 IX86_BUILTIN_VPROTQ_IMM,
24205 IX86_BUILTIN_VPSHLB,
24206 IX86_BUILTIN_VPSHLW,
24207 IX86_BUILTIN_VPSHLD,
24208 IX86_BUILTIN_VPSHLQ,
24209 IX86_BUILTIN_VPSHAB,
24210 IX86_BUILTIN_VPSHAW,
24211 IX86_BUILTIN_VPSHAD,
24212 IX86_BUILTIN_VPSHAQ,
24214 IX86_BUILTIN_VFRCZSS,
24215 IX86_BUILTIN_VFRCZSD,
24216 IX86_BUILTIN_VFRCZPS,
24217 IX86_BUILTIN_VFRCZPD,
24218 IX86_BUILTIN_VFRCZPS256,
24219 IX86_BUILTIN_VFRCZPD256,
24221 IX86_BUILTIN_VPCOMEQUB,
24222 IX86_BUILTIN_VPCOMNEUB,
24223 IX86_BUILTIN_VPCOMLTUB,
24224 IX86_BUILTIN_VPCOMLEUB,
24225 IX86_BUILTIN_VPCOMGTUB,
24226 IX86_BUILTIN_VPCOMGEUB,
24227 IX86_BUILTIN_VPCOMFALSEUB,
24228 IX86_BUILTIN_VPCOMTRUEUB,
24230 IX86_BUILTIN_VPCOMEQUW,
24231 IX86_BUILTIN_VPCOMNEUW,
24232 IX86_BUILTIN_VPCOMLTUW,
24233 IX86_BUILTIN_VPCOMLEUW,
24234 IX86_BUILTIN_VPCOMGTUW,
24235 IX86_BUILTIN_VPCOMGEUW,
24236 IX86_BUILTIN_VPCOMFALSEUW,
24237 IX86_BUILTIN_VPCOMTRUEUW,
24239 IX86_BUILTIN_VPCOMEQUD,
24240 IX86_BUILTIN_VPCOMNEUD,
24241 IX86_BUILTIN_VPCOMLTUD,
24242 IX86_BUILTIN_VPCOMLEUD,
24243 IX86_BUILTIN_VPCOMGTUD,
24244 IX86_BUILTIN_VPCOMGEUD,
24245 IX86_BUILTIN_VPCOMFALSEUD,
24246 IX86_BUILTIN_VPCOMTRUEUD,
24248 IX86_BUILTIN_VPCOMEQUQ,
24249 IX86_BUILTIN_VPCOMNEUQ,
24250 IX86_BUILTIN_VPCOMLTUQ,
24251 IX86_BUILTIN_VPCOMLEUQ,
24252 IX86_BUILTIN_VPCOMGTUQ,
24253 IX86_BUILTIN_VPCOMGEUQ,
24254 IX86_BUILTIN_VPCOMFALSEUQ,
24255 IX86_BUILTIN_VPCOMTRUEUQ,
24257 IX86_BUILTIN_VPCOMEQB,
24258 IX86_BUILTIN_VPCOMNEB,
24259 IX86_BUILTIN_VPCOMLTB,
24260 IX86_BUILTIN_VPCOMLEB,
24261 IX86_BUILTIN_VPCOMGTB,
24262 IX86_BUILTIN_VPCOMGEB,
24263 IX86_BUILTIN_VPCOMFALSEB,
24264 IX86_BUILTIN_VPCOMTRUEB,
24266 IX86_BUILTIN_VPCOMEQW,
24267 IX86_BUILTIN_VPCOMNEW,
24268 IX86_BUILTIN_VPCOMLTW,
24269 IX86_BUILTIN_VPCOMLEW,
24270 IX86_BUILTIN_VPCOMGTW,
24271 IX86_BUILTIN_VPCOMGEW,
24272 IX86_BUILTIN_VPCOMFALSEW,
24273 IX86_BUILTIN_VPCOMTRUEW,
24275 IX86_BUILTIN_VPCOMEQD,
24276 IX86_BUILTIN_VPCOMNED,
24277 IX86_BUILTIN_VPCOMLTD,
24278 IX86_BUILTIN_VPCOMLED,
24279 IX86_BUILTIN_VPCOMGTD,
24280 IX86_BUILTIN_VPCOMGED,
24281 IX86_BUILTIN_VPCOMFALSED,
24282 IX86_BUILTIN_VPCOMTRUED,
24284 IX86_BUILTIN_VPCOMEQQ,
24285 IX86_BUILTIN_VPCOMNEQ,
24286 IX86_BUILTIN_VPCOMLTQ,
24287 IX86_BUILTIN_VPCOMLEQ,
24288 IX86_BUILTIN_VPCOMGTQ,
24289 IX86_BUILTIN_VPCOMGEQ,
24290 IX86_BUILTIN_VPCOMFALSEQ,
24291 IX86_BUILTIN_VPCOMTRUEQ,
24293 /* LWP instructions. */
24294 IX86_BUILTIN_LLWPCB,
24295 IX86_BUILTIN_SLWPCB,
24296 IX86_BUILTIN_LWPVAL32,
24297 IX86_BUILTIN_LWPVAL64,
24298 IX86_BUILTIN_LWPINS32,
24299 IX86_BUILTIN_LWPINS64,
24301 IX86_BUILTIN_CLZS,
24303 /* BMI instructions. */
24304 IX86_BUILTIN_BEXTR32,
24305 IX86_BUILTIN_BEXTR64,
24306 IX86_BUILTIN_CTZS,
24308 /* TBM instructions. */
24309 IX86_BUILTIN_BEXTRI32,
24310 IX86_BUILTIN_BEXTRI64,
24313 /* FSGSBASE instructions. */
24314 IX86_BUILTIN_RDFSBASE32,
24315 IX86_BUILTIN_RDFSBASE64,
24316 IX86_BUILTIN_RDGSBASE32,
24317 IX86_BUILTIN_RDGSBASE64,
24318 IX86_BUILTIN_WRFSBASE32,
24319 IX86_BUILTIN_WRFSBASE64,
24320 IX86_BUILTIN_WRGSBASE32,
24321 IX86_BUILTIN_WRGSBASE64,
24323 /* RDRND instructions. */
24324 IX86_BUILTIN_RDRAND16_STEP,
24325 IX86_BUILTIN_RDRAND32_STEP,
24326 IX86_BUILTIN_RDRAND64_STEP,
24328 /* F16C instructions. */
24329 IX86_BUILTIN_CVTPH2PS,
24330 IX86_BUILTIN_CVTPH2PS256,
24331 IX86_BUILTIN_CVTPS2PH,
24332 IX86_BUILTIN_CVTPS2PH256,
24334 IX86_BUILTIN_MAX
24337 /* Table for the ix86 builtin decls. */
24338 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
24340 /* Table of all of the builtin functions that are possible with different ISA's
24341 but are waiting to be built until a function is declared to use that
24342 ISA. */
24343 struct builtin_isa {
24344 const char *name; /* function name */
24345 enum ix86_builtin_func_type tcode; /* type to use in the declaration */
24346 int isa; /* isa_flags this builtin is defined for */
24347 bool const_p; /* true if the declaration is constant */
24348 bool set_and_not_built_p;
24351 static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
24354 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
24355 of which isa_flags to use in the ix86_builtins_isa array. Stores the
24356 function decl in the ix86_builtins array. Returns the function decl or
24357 NULL_TREE, if the builtin was not added.
24359 If the front end has a special hook for builtin functions, delay adding
24360 builtin functions that aren't in the current ISA until the ISA is changed
24361 with function specific optimization. Doing so, can save about 300K for the
24362 default compiler. When the builtin is expanded, check at that time whether
24363 it is valid.
24365 If the front end doesn't have a special hook, record all builtins, even if
24366 it isn't an instruction set in the current ISA in case the user uses
24367 function specific options for a different ISA, so that we don't get scope
24368 errors if a builtin is added in the middle of a function scope. */
24370 static inline tree
24371 def_builtin (int mask, const char *name, enum ix86_builtin_func_type tcode,
24372 enum ix86_builtins code)
24374 tree decl = NULL_TREE;
24376 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
24378 ix86_builtins_isa[(int) code].isa = mask;
24380 mask &= ~OPTION_MASK_ISA_64BIT;
24381 if (mask == 0
24382 || (mask & ix86_isa_flags) != 0
24383 || (lang_hooks.builtin_function
24384 == lang_hooks.builtin_function_ext_scope))
24387 tree type = ix86_get_builtin_func_type (tcode);
24388 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
24389 NULL, NULL_TREE);
24390 ix86_builtins[(int) code] = decl;
24391 ix86_builtins_isa[(int) code].set_and_not_built_p = false;
24393 else
24395 ix86_builtins[(int) code] = NULL_TREE;
24396 ix86_builtins_isa[(int) code].tcode = tcode;
24397 ix86_builtins_isa[(int) code].name = name;
24398 ix86_builtins_isa[(int) code].const_p = false;
24399 ix86_builtins_isa[(int) code].set_and_not_built_p = true;
24403 return decl;
24406 /* Like def_builtin, but also marks the function decl "const". */
24408 static inline tree
24409 def_builtin_const (int mask, const char *name,
24410 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
24412 tree decl = def_builtin (mask, name, tcode, code);
24413 if (decl)
24414 TREE_READONLY (decl) = 1;
24415 else
24416 ix86_builtins_isa[(int) code].const_p = true;
24418 return decl;
24421 /* Add any new builtin functions for a given ISA that may not have been
24422 declared. This saves a bit of space compared to adding all of the
24423 declarations to the tree, even if we didn't use them. */
24425 static void
24426 ix86_add_new_builtins (int isa)
24428 int i;
24430 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
24432 if ((ix86_builtins_isa[i].isa & isa) != 0
24433 && ix86_builtins_isa[i].set_and_not_built_p)
24435 tree decl, type;
24437 /* Don't define the builtin again. */
24438 ix86_builtins_isa[i].set_and_not_built_p = false;
24440 type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode);
24441 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
24442 type, i, BUILT_IN_MD, NULL,
24443 NULL_TREE);
24445 ix86_builtins[i] = decl;
24446 if (ix86_builtins_isa[i].const_p)
24447 TREE_READONLY (decl) = 1;
24452 /* Bits for builtin_description.flag. */
24454 /* Set when we don't support the comparison natively, and should
24455 swap_comparison in order to support it. */
24456 #define BUILTIN_DESC_SWAP_OPERANDS 1
24458 struct builtin_description
24460 const unsigned int mask;
24461 const enum insn_code icode;
24462 const char *const name;
24463 const enum ix86_builtins code;
24464 const enum rtx_code comparison;
24465 const int flag;
24468 static const struct builtin_description bdesc_comi[] =
24470 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
24471 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
24472 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
24473 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
24474 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
24475 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
24476 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
24477 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
24478 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
24479 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
24480 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
24481 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
24482 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
24483 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
24484 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
24485 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
24486 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
24487 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
24488 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
24489 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
24490 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
24491 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
24492 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
24493 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
24496 static const struct builtin_description bdesc_pcmpestr[] =
24498 /* SSE4.2 */
24499 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
24500 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
24501 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
24502 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
24503 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
24504 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
24505 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
24508 static const struct builtin_description bdesc_pcmpistr[] =
24510 /* SSE4.2 */
24511 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
24512 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
24513 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
24514 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
24515 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
24516 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
24517 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
24520 /* Special builtins with variable number of arguments. */
24521 static const struct builtin_description bdesc_special_args[] =
24523 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdtsc, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC, UNKNOWN, (int) UINT64_FTYPE_VOID },
24524 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdtscp, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP, UNKNOWN, (int) UINT64_FTYPE_PUNSIGNED },
24526 /* MMX */
24527 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
24529 /* 3DNow! */
24530 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
24532 /* SSE */
24533 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
24534 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
24535 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
24537 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
24538 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
24539 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
24540 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
24542 /* SSE or 3DNow!A */
24543 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
24544 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntdi, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PULONGLONG_ULONGLONG },
24546 /* SSE2 */
24547 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
24548 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
24549 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
24550 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
24551 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
24552 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
24553 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntsi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
24554 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
24555 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
24557 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
24558 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
24560 /* SSE3 */
24561 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
24563 /* SSE4.1 */
24564 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
24566 /* SSE4A */
24567 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
24568 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
24570 /* AVX */
24571 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
24572 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
24574 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4sf, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
24575 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4df, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
24576 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv8sf, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
24577 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v4df, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
24578 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v8sf, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
24580 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
24581 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
24582 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
24583 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
24584 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
24585 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
24586 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
24588 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
24589 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
24590 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
24592 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DI },
24593 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SI },
24594 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DI },
24595 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SI },
24596 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DI_V2DF },
24597 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SI_V4SF },
24598 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DI_V4DF },
24599 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SI_V8SF },
24601 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID },
24602 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID },
24603 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT },
24604 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvaldi3, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT64_UINT_UINT },
24605 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT },
24606 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT },
24608 /* FSGSBASE */
24609 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasesi, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
24610 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasedi, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
24611 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasesi, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
24612 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasedi, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
24613 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasesi, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
24614 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasedi, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
24615 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasesi, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
24616 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasedi, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
24619 /* Builtins with variable number of arguments. */
24620 static const struct builtin_description bdesc_args[] =
24622 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_bsr, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI, UNKNOWN, (int) INT_FTYPE_INT },
24623 { OPTION_MASK_ISA_64BIT, CODE_FOR_bsr_rex64, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI, UNKNOWN, (int) INT64_FTYPE_INT64 },
24624 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdpmc, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC, UNKNOWN, (int) UINT64_FTYPE_INT },
24625 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlqi3, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
24626 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlhi3, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
24627 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
24628 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
24630 /* MMX */
24631 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24632 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24633 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24634 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24635 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24636 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24638 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24639 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24640 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24641 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24642 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24643 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24644 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24645 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24647 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24648 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24650 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24651 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24652 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24653 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24655 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24656 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24657 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24658 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24659 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24660 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24662 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24663 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24664 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24665 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24666 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
24667 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
24669 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
24670 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
24671 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
24673 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
24675 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
24676 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
24677 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
24678 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
24679 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
24680 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
24682 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
24683 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
24684 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
24685 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
24686 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
24687 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
24689 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
24690 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
24691 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
24692 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
24694 /* 3DNow! */
24695 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
24696 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
24697 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
24698 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
24700 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24701 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24702 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24703 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
24704 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
24705 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
24706 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24707 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24708 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24709 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24710 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24711 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24712 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24713 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24714 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24716 /* 3DNow!A */
24717 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
24718 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
24719 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
24720 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
24721 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24722 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24724 /* SSE */
24725 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
24726 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
24727 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
24728 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
24729 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
24730 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
24731 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
24732 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
24733 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
24734 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
24735 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
24736 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
24738 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
24740 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24741 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24742 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24743 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24744 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24745 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24746 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24747 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24749 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
24750 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
24751 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
24752 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
24753 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
24754 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
24755 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
24756 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
24757 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
24758 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
24759 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
24760 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
24761 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
24762 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
24763 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
24764 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
24765 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
24766 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
24767 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
24768 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
24769 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
24770 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
24772 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24773 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24774 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24775 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24777 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24778 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24779 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24780 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24782 { OPTION_MASK_ISA_SSE, CODE_FOR_copysignv4sf3, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24784 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24785 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24786 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24787 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_highv4sf, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24788 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_lowv4sf, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24790 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
24791 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
24792 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
24794 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
24796 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
24797 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
24798 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
24800 /* SSE MMX or 3Dnow!A */
24801 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24802 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24803 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24805 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24806 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24807 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24808 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24810 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
24811 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
24813 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
24815 /* SSE2 */
24816 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
24818 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2df", IX86_BUILTIN_VEC_PERM_V2DF, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI },
24819 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4sf", IX86_BUILTIN_VEC_PERM_V4SF, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI },
24820 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2di", IX86_BUILTIN_VEC_PERM_V2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI },
24821 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4si", IX86_BUILTIN_VEC_PERM_V4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI },
24822 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8hi", IX86_BUILTIN_VEC_PERM_V8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI },
24823 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v16qi", IX86_BUILTIN_VEC_PERM_V16QI, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
24824 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2di_u", IX86_BUILTIN_VEC_PERM_V2DI_U, UNKNOWN, (int) V2UDI_FTYPE_V2UDI_V2UDI_V2UDI },
24825 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4si_u", IX86_BUILTIN_VEC_PERM_V4SI_U, UNKNOWN, (int) V4USI_FTYPE_V4USI_V4USI_V4USI },
24826 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8hi_u", IX86_BUILTIN_VEC_PERM_V8HI_U, UNKNOWN, (int) V8UHI_FTYPE_V8UHI_V8UHI_V8UHI },
24827 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v16qi_u", IX86_BUILTIN_VEC_PERM_V16QI_U, UNKNOWN, (int) V16UQI_FTYPE_V16UQI_V16UQI_V16UQI },
24828 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4df", IX86_BUILTIN_VEC_PERM_V4DF, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI },
24829 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8sf", IX86_BUILTIN_VEC_PERM_V8SF, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI },
24831 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
24832 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
24833 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
24834 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
24835 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2ps, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
24836 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtudq2ps, "__builtin_ia32_cvtudq2ps", IX86_BUILTIN_CVTUDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
24838 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
24839 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
24840 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
24841 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
24842 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
24844 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
24846 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
24847 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
24848 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
24849 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
24851 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2dq, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
24852 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
24853 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttps2dq, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
24855 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24856 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24857 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24858 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24859 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24860 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24861 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24862 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24864 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
24865 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
24866 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
24867 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
24868 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
24869 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
24870 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
24871 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
24872 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
24873 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
24874 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
24875 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
24876 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
24877 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
24878 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
24879 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
24880 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
24881 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
24882 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
24883 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
24885 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24886 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24887 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24888 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24890 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24891 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24892 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24893 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24895 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysignv2df3, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24897 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24898 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2df, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24899 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2df, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24901 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
24903 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24904 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24905 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24906 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24907 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24908 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24909 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24910 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24912 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24913 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24914 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24915 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24916 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24917 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24918 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24919 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24921 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24922 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
24924 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24925 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24926 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24927 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24929 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24930 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24932 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24933 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24934 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24935 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24936 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24937 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24939 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24940 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24941 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24942 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24944 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv16qi, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24945 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv8hi, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24946 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv4si, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24947 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2di, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24948 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv16qi, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24949 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv8hi, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24950 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv4si, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24951 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2di, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24953 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
24954 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
24955 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
24957 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24958 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
24960 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
24961 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
24963 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
24965 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
24966 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
24967 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
24968 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
24970 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlv1ti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
24971 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
24972 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
24973 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
24974 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
24975 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
24976 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
24978 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrv1ti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
24979 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
24980 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
24981 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
24982 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
24983 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
24984 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
24986 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
24987 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
24988 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
24989 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
24991 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
24992 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
24993 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
24995 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
24997 { OPTION_MASK_ISA_SSE2, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
24998 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
25000 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
25002 /* SSE2 MMX */
25003 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
25004 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
25006 /* SSE3 */
25007 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
25008 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
25010 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25011 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25012 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25013 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25014 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25015 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25017 /* SSSE3 */
25018 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
25019 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
25020 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
25021 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
25022 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
25023 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
25025 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25026 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25027 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25028 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
25029 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25030 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25031 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25032 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25033 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25034 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
25035 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25036 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25037 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
25038 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
25039 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25040 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25041 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25042 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
25043 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25044 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
25045 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25046 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25047 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25048 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
25050 /* SSSE3. */
25051 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT },
25052 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT },
25054 /* SSE4.1 */
25055 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
25056 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
25057 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
25058 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
25059 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
25060 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
25061 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
25062 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
25063 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
25064 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
25066 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
25067 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
25068 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
25069 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
25070 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
25071 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
25072 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
25073 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
25074 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
25075 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
25076 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
25077 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
25078 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
25080 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
25081 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25082 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25083 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25084 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25085 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25086 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25087 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25088 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25089 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25090 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
25091 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25093 /* SSE4.1 */
25094 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
25095 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
25096 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
25097 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
25099 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
25100 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
25101 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
25103 /* SSE4.2 */
25104 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25105 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
25106 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
25107 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
25108 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
25110 /* SSE4A */
25111 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
25112 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
25113 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
25114 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25116 /* AES */
25117 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
25118 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
25120 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25121 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25122 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25123 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25125 /* PCLMUL */
25126 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
25128 /* AVX */
25129 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25130 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25131 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25132 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25133 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25134 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25135 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25136 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25137 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25138 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25139 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25140 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25141 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25142 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25143 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25144 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25145 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25146 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25147 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25148 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25149 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25150 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25151 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25152 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25153 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25154 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25156 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
25157 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
25158 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
25159 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
25161 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
25162 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
25163 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
25164 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
25165 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
25166 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
25167 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
25168 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpsdv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
25169 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpssv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
25170 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
25171 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
25172 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
25173 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
25174 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
25175 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
25176 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
25177 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2pd256, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
25178 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2ps256, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
25179 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
25180 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2dq256, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
25181 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
25182 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttpd2dq256, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
25183 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
25184 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttps2dq256, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
25185 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
25186 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
25187 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
25188 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
25189 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
25190 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
25191 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
25192 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
25193 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
25194 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
25196 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
25197 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
25198 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
25200 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
25201 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
25202 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
25203 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
25204 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
25206 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
25208 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
25209 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
25211 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25212 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25213 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25214 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25216 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
25217 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
25218 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
25219 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8si, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
25220 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8sf, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
25221 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v4df, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
25223 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
25224 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
25225 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
25226 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
25227 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
25228 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
25229 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
25230 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
25231 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
25232 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
25233 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
25234 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
25235 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
25236 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
25237 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
25239 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
25240 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
25242 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv8sf3, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25243 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv4df3, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25245 { OPTION_MASK_ISA_ABM, CODE_FOR_clzhi2_abm, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
25247 /* BMI */
25248 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_si, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
25249 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_di, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
25250 { OPTION_MASK_ISA_BMI, CODE_FOR_ctzhi2, "__builtin_ctzs", IX86_BUILTIN_CTZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
25252 /* TBM */
25253 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_si, "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
25254 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_di, "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
25256 /* F16C */
25257 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int) V4SF_FTYPE_V8HI },
25258 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN, (int) V8SF_FTYPE_V8HI },
25259 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT },
25260 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph256, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT },
25263 /* FMA4 and XOP. */
25264 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
25265 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
25266 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
25267 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
25268 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
25269 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
25270 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
25271 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
25272 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
25273 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
25274 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
25275 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
25276 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
25277 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
25278 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
25279 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
25280 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
25281 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
25282 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
25283 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
25284 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
25285 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
25286 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
25287 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
25288 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
25289 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
25290 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
25291 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
25292 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
25293 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
25294 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
25295 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
25296 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
25297 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
25298 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
25299 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
25300 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
25301 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
25302 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
25303 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
25304 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
25305 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
25306 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
25307 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
25308 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
25309 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
25310 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
25311 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
25312 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
25313 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
25314 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
25315 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
25317 static const struct builtin_description bdesc_multi_arg[] =
25319 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v4sf,
25320 "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS,
25321 UNKNOWN, (int)MULTI_ARG_3_SF },
25322 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v2df,
25323 "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD,
25324 UNKNOWN, (int)MULTI_ARG_3_DF },
25326 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4sf,
25327 "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS,
25328 UNKNOWN, (int)MULTI_ARG_3_SF },
25329 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v2df,
25330 "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD,
25331 UNKNOWN, (int)MULTI_ARG_3_DF },
25332 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v8sf,
25333 "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256,
25334 UNKNOWN, (int)MULTI_ARG_3_SF2 },
25335 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4df,
25336 "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256,
25337 UNKNOWN, (int)MULTI_ARG_3_DF2 },
25339 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4sf,
25340 "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS,
25341 UNKNOWN, (int)MULTI_ARG_3_SF },
25342 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v2df,
25343 "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD,
25344 UNKNOWN, (int)MULTI_ARG_3_DF },
25345 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v8sf,
25346 "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256,
25347 UNKNOWN, (int)MULTI_ARG_3_SF2 },
25348 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4df,
25349 "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256,
25350 UNKNOWN, (int)MULTI_ARG_3_DF2 },
25352 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV, UNKNOWN, (int)MULTI_ARG_3_DI },
25353 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI },
25354 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4si, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI },
25355 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8hi, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI },
25356 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16qi, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI },
25357 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2df, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF },
25358 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4sf, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF },
25360 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
25361 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
25362 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8si256, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256, UNKNOWN, (int)MULTI_ARG_3_SI2 },
25363 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16hi256, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256, UNKNOWN, (int)MULTI_ARG_3_HI2 },
25364 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v32qi256, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256, UNKNOWN, (int)MULTI_ARG_3_QI2 },
25365 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4df256, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
25366 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8sf256, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
25368 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pperm, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM, UNKNOWN, (int)MULTI_ARG_3_QI },
25370 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssww, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
25371 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsww, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
25372 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsswd, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
25373 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacswd, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
25374 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdd, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
25375 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdd, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
25376 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdql, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
25377 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdqh, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
25378 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdql, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
25379 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdqh, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
25380 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcsswd, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
25381 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcswd, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
25383 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv2di3, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ, UNKNOWN, (int)MULTI_ARG_2_DI },
25384 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv4si3, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD, UNKNOWN, (int)MULTI_ARG_2_SI },
25385 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv8hi3, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW, UNKNOWN, (int)MULTI_ARG_2_HI },
25386 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv16qi3, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB, UNKNOWN, (int)MULTI_ARG_2_QI },
25387 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv2di3, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM },
25388 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv4si3, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM },
25389 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv8hi3, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM },
25390 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv16qi3, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM },
25391 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv2di3, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI },
25392 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv4si3, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD, UNKNOWN, (int)MULTI_ARG_2_SI },
25393 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv8hi3, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW, UNKNOWN, (int)MULTI_ARG_2_HI },
25394 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv16qi3, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB, UNKNOWN, (int)MULTI_ARG_2_QI },
25395 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv2di3, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI },
25396 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv4si3, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD, UNKNOWN, (int)MULTI_ARG_2_SI },
25397 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv8hi3, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
25398 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv16qi3, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
25400 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS, UNKNOWN, (int)MULTI_ARG_2_SF },
25401 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD, UNKNOWN, (int)MULTI_ARG_2_DF },
25402 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
25403 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
25404 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256, UNKNOWN, (int)MULTI_ARG_1_SF2 },
25405 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4df2, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256, UNKNOWN, (int)MULTI_ARG_1_DF2 },
25407 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbw, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
25408 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbd, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
25409 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbq, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
25410 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwd, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
25411 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwq, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
25412 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadddq, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
25413 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubw, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
25414 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubd, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
25415 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubq, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
25416 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwd, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
25417 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwq, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
25418 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddudq, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
25419 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubbw, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
25420 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubwd, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
25421 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubdq, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
25423 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
25424 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
25425 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
25426 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
25427 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
25428 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
25429 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
25431 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
25432 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
25433 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
25434 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
25435 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
25436 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
25437 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
25439 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
25440 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
25441 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
25442 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
25443 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
25444 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
25445 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
25447 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
25448 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
25449 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
25450 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
25451 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
25452 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
25453 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
25455 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
25456 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
25457 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
25458 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
25459 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
25460 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
25461 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
25463 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
25464 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
25465 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
25466 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
25467 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
25468 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
25469 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
25471 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
25472 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
25473 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
25474 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
25475 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
25476 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
25477 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
25479 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
25480 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
25481 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
25482 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
25483 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
25484 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
25485 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
25487 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
25488 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
25489 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
25490 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
25491 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
25492 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
25493 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
25494 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
25496 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
25497 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
25498 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
25499 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
25500 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
25501 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
25502 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
25503 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
25505 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I },
25506 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I },
25507 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I1 },
25508 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I1 },
25512 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
25513 in the current target ISA to allow the user to compile particular modules
25514 with different target specific options that differ from the command line
25515 options. */
25516 static void
25517 ix86_init_mmx_sse_builtins (void)
25519 const struct builtin_description * d;
25520 enum ix86_builtin_func_type ftype;
25521 size_t i;
25523 /* Add all special builtins with variable number of operands. */
25524 for (i = 0, d = bdesc_special_args;
25525 i < ARRAY_SIZE (bdesc_special_args);
25526 i++, d++)
25528 if (d->name == 0)
25529 continue;
25531 ftype = (enum ix86_builtin_func_type) d->flag;
25532 def_builtin (d->mask, d->name, ftype, d->code);
25535 /* Add all builtins with variable number of operands. */
25536 for (i = 0, d = bdesc_args;
25537 i < ARRAY_SIZE (bdesc_args);
25538 i++, d++)
25540 if (d->name == 0)
25541 continue;
25543 ftype = (enum ix86_builtin_func_type) d->flag;
25544 def_builtin_const (d->mask, d->name, ftype, d->code);
25547 /* pcmpestr[im] insns. */
25548 for (i = 0, d = bdesc_pcmpestr;
25549 i < ARRAY_SIZE (bdesc_pcmpestr);
25550 i++, d++)
25552 if (d->code == IX86_BUILTIN_PCMPESTRM128)
25553 ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT;
25554 else
25555 ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT;
25556 def_builtin_const (d->mask, d->name, ftype, d->code);
25559 /* pcmpistr[im] insns. */
25560 for (i = 0, d = bdesc_pcmpistr;
25561 i < ARRAY_SIZE (bdesc_pcmpistr);
25562 i++, d++)
25564 if (d->code == IX86_BUILTIN_PCMPISTRM128)
25565 ftype = V16QI_FTYPE_V16QI_V16QI_INT;
25566 else
25567 ftype = INT_FTYPE_V16QI_V16QI_INT;
25568 def_builtin_const (d->mask, d->name, ftype, d->code);
25571 /* comi/ucomi insns. */
25572 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
25574 if (d->mask == OPTION_MASK_ISA_SSE2)
25575 ftype = INT_FTYPE_V2DF_V2DF;
25576 else
25577 ftype = INT_FTYPE_V4SF_V4SF;
25578 def_builtin_const (d->mask, d->name, ftype, d->code);
25581 /* SSE */
25582 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr",
25583 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR);
25584 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr",
25585 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR);
25587 /* SSE or 3DNow!A */
25588 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
25589 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR,
25590 IX86_BUILTIN_MASKMOVQ);
25592 /* SSE2 */
25593 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu",
25594 VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU);
25596 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush",
25597 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH);
25598 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence",
25599 VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE);
25601 /* SSE3. */
25602 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor",
25603 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR);
25604 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait",
25605 VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
25607 /* AES */
25608 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128",
25609 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128);
25610 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128",
25611 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128);
25612 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128",
25613 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128);
25614 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128",
25615 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128);
25616 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128",
25617 V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128);
25618 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128",
25619 V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128);
25621 /* PCLMUL */
25622 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128",
25623 V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128);
25625 /* RDRND */
25626 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand16_step",
25627 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDRAND16_STEP);
25628 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand32_step",
25629 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDRAND32_STEP);
25630 def_builtin (OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT,
25631 "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG,
25632 IX86_BUILTIN_RDRAND64_STEP);
25634 /* MMX access to the vec_init patterns. */
25635 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
25636 V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
25638 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi",
25639 V4HI_FTYPE_HI_HI_HI_HI,
25640 IX86_BUILTIN_VEC_INIT_V4HI);
25642 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi",
25643 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
25644 IX86_BUILTIN_VEC_INIT_V8QI);
25646 /* Access to the vec_extract patterns. */
25647 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df",
25648 DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF);
25649 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di",
25650 DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI);
25651 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf",
25652 FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF);
25653 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si",
25654 SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI);
25655 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi",
25656 HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI);
25658 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
25659 "__builtin_ia32_vec_ext_v4hi",
25660 HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
25662 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si",
25663 SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
25665 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi",
25666 QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI);
25668 /* Access to the vec_set patterns. */
25669 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT,
25670 "__builtin_ia32_vec_set_v2di",
25671 V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI);
25673 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf",
25674 V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF);
25676 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si",
25677 V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI);
25679 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi",
25680 V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI);
25682 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
25683 "__builtin_ia32_vec_set_v4hi",
25684 V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI);
25686 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi",
25687 V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI);
25689 /* Add FMA4 multi-arg argument instructions */
25690 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
25692 if (d->name == 0)
25693 continue;
25695 ftype = (enum ix86_builtin_func_type) d->flag;
25696 def_builtin_const (d->mask, d->name, ftype, d->code);
25700 /* Internal method for ix86_init_builtins. */
25702 static void
25703 ix86_init_builtins_va_builtins_abi (void)
25705 tree ms_va_ref, sysv_va_ref;
25706 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
25707 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
25708 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
25709 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
25711 if (!TARGET_64BIT)
25712 return;
25713 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
25714 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
25715 ms_va_ref = build_reference_type (ms_va_list_type_node);
25716 sysv_va_ref =
25717 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
25719 fnvoid_va_end_ms =
25720 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
25721 fnvoid_va_start_ms =
25722 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
25723 fnvoid_va_end_sysv =
25724 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
25725 fnvoid_va_start_sysv =
25726 build_varargs_function_type_list (void_type_node, sysv_va_ref,
25727 NULL_TREE);
25728 fnvoid_va_copy_ms =
25729 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
25730 NULL_TREE);
25731 fnvoid_va_copy_sysv =
25732 build_function_type_list (void_type_node, sysv_va_ref,
25733 sysv_va_ref, NULL_TREE);
25735 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
25736 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
25737 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
25738 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
25739 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
25740 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
25741 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
25742 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
25743 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
25744 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
25745 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
25746 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
25749 static void
25750 ix86_init_builtin_types (void)
25752 tree float128_type_node, float80_type_node;
25754 /* The __float80 type. */
25755 float80_type_node = long_double_type_node;
25756 if (TYPE_MODE (float80_type_node) != XFmode)
25758 /* The __float80 type. */
25759 float80_type_node = make_node (REAL_TYPE);
25761 TYPE_PRECISION (float80_type_node) = 80;
25762 layout_type (float80_type_node);
25764 lang_hooks.types.register_builtin_type (float80_type_node, "__float80");
25766 /* The __float128 type. */
25767 float128_type_node = make_node (REAL_TYPE);
25768 TYPE_PRECISION (float128_type_node) = 128;
25769 layout_type (float128_type_node);
25770 lang_hooks.types.register_builtin_type (float128_type_node, "__float128");
25772 /* This macro is built by i386-builtin-types.awk. */
25773 DEFINE_BUILTIN_PRIMITIVE_TYPES;
25776 static void
25777 ix86_init_builtins (void)
25779 tree t;
25781 ix86_init_builtin_types ();
25783 /* TFmode support builtins. */
25784 def_builtin_const (0, "__builtin_infq",
25785 FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ);
25786 def_builtin_const (0, "__builtin_huge_valq",
25787 FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ);
25789 /* We will expand them to normal call if SSE2 isn't available since
25790 they are used by libgcc. */
25791 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128);
25792 t = add_builtin_function ("__builtin_fabsq", t, IX86_BUILTIN_FABSQ,
25793 BUILT_IN_MD, "__fabstf2", NULL_TREE);
25794 TREE_READONLY (t) = 1;
25795 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = t;
25797 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128);
25798 t = add_builtin_function ("__builtin_copysignq", t, IX86_BUILTIN_COPYSIGNQ,
25799 BUILT_IN_MD, "__copysigntf3", NULL_TREE);
25800 TREE_READONLY (t) = 1;
25801 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = t;
25803 ix86_init_mmx_sse_builtins ();
25805 if (TARGET_64BIT)
25806 ix86_init_builtins_va_builtins_abi ();
25808 #ifdef SUBTARGET_INIT_BUILTINS
25809 SUBTARGET_INIT_BUILTINS;
25810 #endif
25813 /* Return the ix86 builtin for CODE. */
25815 static tree
25816 ix86_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
25818 if (code >= IX86_BUILTIN_MAX)
25819 return error_mark_node;
25821 return ix86_builtins[code];
25824 /* Errors in the source file can cause expand_expr to return const0_rtx
25825 where we expect a vector. To avoid crashing, use one of the vector
25826 clear instructions. */
25827 static rtx
25828 safe_vector_operand (rtx x, enum machine_mode mode)
25830 if (x == const0_rtx)
25831 x = CONST0_RTX (mode);
25832 return x;
25835 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
25837 static rtx
25838 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
25840 rtx pat;
25841 tree arg0 = CALL_EXPR_ARG (exp, 0);
25842 tree arg1 = CALL_EXPR_ARG (exp, 1);
25843 rtx op0 = expand_normal (arg0);
25844 rtx op1 = expand_normal (arg1);
25845 enum machine_mode tmode = insn_data[icode].operand[0].mode;
25846 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
25847 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
25849 if (VECTOR_MODE_P (mode0))
25850 op0 = safe_vector_operand (op0, mode0);
25851 if (VECTOR_MODE_P (mode1))
25852 op1 = safe_vector_operand (op1, mode1);
25854 if (optimize || !target
25855 || GET_MODE (target) != tmode
25856 || !insn_data[icode].operand[0].predicate (target, tmode))
25857 target = gen_reg_rtx (tmode);
25859 if (GET_MODE (op1) == SImode && mode1 == TImode)
25861 rtx x = gen_reg_rtx (V4SImode);
25862 emit_insn (gen_sse2_loadd (x, op1));
25863 op1 = gen_lowpart (TImode, x);
25866 if (!insn_data[icode].operand[1].predicate (op0, mode0))
25867 op0 = copy_to_mode_reg (mode0, op0);
25868 if (!insn_data[icode].operand[2].predicate (op1, mode1))
25869 op1 = copy_to_mode_reg (mode1, op1);
25871 pat = GEN_FCN (icode) (target, op0, op1);
25872 if (! pat)
25873 return 0;
25875 emit_insn (pat);
25877 return target;
25880 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
25882 static rtx
25883 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
25884 enum ix86_builtin_func_type m_type,
25885 enum rtx_code sub_code)
25887 rtx pat;
25888 int i;
25889 int nargs;
25890 bool comparison_p = false;
25891 bool tf_p = false;
25892 bool last_arg_constant = false;
25893 int num_memory = 0;
25894 struct {
25895 rtx op;
25896 enum machine_mode mode;
25897 } args[4];
25899 enum machine_mode tmode = insn_data[icode].operand[0].mode;
25901 switch (m_type)
25903 case MULTI_ARG_4_DF2_DI_I:
25904 case MULTI_ARG_4_DF2_DI_I1:
25905 case MULTI_ARG_4_SF2_SI_I:
25906 case MULTI_ARG_4_SF2_SI_I1:
25907 nargs = 4;
25908 last_arg_constant = true;
25909 break;
25911 case MULTI_ARG_3_SF:
25912 case MULTI_ARG_3_DF:
25913 case MULTI_ARG_3_SF2:
25914 case MULTI_ARG_3_DF2:
25915 case MULTI_ARG_3_DI:
25916 case MULTI_ARG_3_SI:
25917 case MULTI_ARG_3_SI_DI:
25918 case MULTI_ARG_3_HI:
25919 case MULTI_ARG_3_HI_SI:
25920 case MULTI_ARG_3_QI:
25921 case MULTI_ARG_3_DI2:
25922 case MULTI_ARG_3_SI2:
25923 case MULTI_ARG_3_HI2:
25924 case MULTI_ARG_3_QI2:
25925 nargs = 3;
25926 break;
25928 case MULTI_ARG_2_SF:
25929 case MULTI_ARG_2_DF:
25930 case MULTI_ARG_2_DI:
25931 case MULTI_ARG_2_SI:
25932 case MULTI_ARG_2_HI:
25933 case MULTI_ARG_2_QI:
25934 nargs = 2;
25935 break;
25937 case MULTI_ARG_2_DI_IMM:
25938 case MULTI_ARG_2_SI_IMM:
25939 case MULTI_ARG_2_HI_IMM:
25940 case MULTI_ARG_2_QI_IMM:
25941 nargs = 2;
25942 last_arg_constant = true;
25943 break;
25945 case MULTI_ARG_1_SF:
25946 case MULTI_ARG_1_DF:
25947 case MULTI_ARG_1_SF2:
25948 case MULTI_ARG_1_DF2:
25949 case MULTI_ARG_1_DI:
25950 case MULTI_ARG_1_SI:
25951 case MULTI_ARG_1_HI:
25952 case MULTI_ARG_1_QI:
25953 case MULTI_ARG_1_SI_DI:
25954 case MULTI_ARG_1_HI_DI:
25955 case MULTI_ARG_1_HI_SI:
25956 case MULTI_ARG_1_QI_DI:
25957 case MULTI_ARG_1_QI_SI:
25958 case MULTI_ARG_1_QI_HI:
25959 nargs = 1;
25960 break;
25962 case MULTI_ARG_2_DI_CMP:
25963 case MULTI_ARG_2_SI_CMP:
25964 case MULTI_ARG_2_HI_CMP:
25965 case MULTI_ARG_2_QI_CMP:
25966 nargs = 2;
25967 comparison_p = true;
25968 break;
25970 case MULTI_ARG_2_SF_TF:
25971 case MULTI_ARG_2_DF_TF:
25972 case MULTI_ARG_2_DI_TF:
25973 case MULTI_ARG_2_SI_TF:
25974 case MULTI_ARG_2_HI_TF:
25975 case MULTI_ARG_2_QI_TF:
25976 nargs = 2;
25977 tf_p = true;
25978 break;
25980 default:
25981 gcc_unreachable ();
25984 if (optimize || !target
25985 || GET_MODE (target) != tmode
25986 || !insn_data[icode].operand[0].predicate (target, tmode))
25987 target = gen_reg_rtx (tmode);
25989 gcc_assert (nargs <= 4);
25991 for (i = 0; i < nargs; i++)
25993 tree arg = CALL_EXPR_ARG (exp, i);
25994 rtx op = expand_normal (arg);
25995 int adjust = (comparison_p) ? 1 : 0;
25996 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
25998 if (last_arg_constant && i == nargs-1)
26000 if (!CONST_INT_P (op))
26002 error ("last argument must be an immediate");
26003 return gen_reg_rtx (tmode);
26006 else
26008 if (VECTOR_MODE_P (mode))
26009 op = safe_vector_operand (op, mode);
26011 /* If we aren't optimizing, only allow one memory operand to be
26012 generated. */
26013 if (memory_operand (op, mode))
26014 num_memory++;
26016 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
26018 if (optimize
26019 || !insn_data[icode].operand[i+adjust+1].predicate (op, mode)
26020 || num_memory > 1)
26021 op = force_reg (mode, op);
26024 args[i].op = op;
26025 args[i].mode = mode;
26028 switch (nargs)
26030 case 1:
26031 pat = GEN_FCN (icode) (target, args[0].op);
26032 break;
26034 case 2:
26035 if (tf_p)
26036 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
26037 GEN_INT ((int)sub_code));
26038 else if (! comparison_p)
26039 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
26040 else
26042 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
26043 args[0].op,
26044 args[1].op);
26046 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
26048 break;
26050 case 3:
26051 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
26052 break;
26054 case 4:
26055 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op);
26056 break;
26058 default:
26059 gcc_unreachable ();
26062 if (! pat)
26063 return 0;
26065 emit_insn (pat);
26066 return target;
26069 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
26070 insns with vec_merge. */
26072 static rtx
26073 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
26074 rtx target)
26076 rtx pat;
26077 tree arg0 = CALL_EXPR_ARG (exp, 0);
26078 rtx op1, op0 = expand_normal (arg0);
26079 enum machine_mode tmode = insn_data[icode].operand[0].mode;
26080 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
26082 if (optimize || !target
26083 || GET_MODE (target) != tmode
26084 || !insn_data[icode].operand[0].predicate (target, tmode))
26085 target = gen_reg_rtx (tmode);
26087 if (VECTOR_MODE_P (mode0))
26088 op0 = safe_vector_operand (op0, mode0);
26090 if ((optimize && !register_operand (op0, mode0))
26091 || !insn_data[icode].operand[1].predicate (op0, mode0))
26092 op0 = copy_to_mode_reg (mode0, op0);
26094 op1 = op0;
26095 if (!insn_data[icode].operand[2].predicate (op1, mode0))
26096 op1 = copy_to_mode_reg (mode0, op1);
26098 pat = GEN_FCN (icode) (target, op0, op1);
26099 if (! pat)
26100 return 0;
26101 emit_insn (pat);
26102 return target;
26105 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
26107 static rtx
26108 ix86_expand_sse_compare (const struct builtin_description *d,
26109 tree exp, rtx target, bool swap)
26111 rtx pat;
26112 tree arg0 = CALL_EXPR_ARG (exp, 0);
26113 tree arg1 = CALL_EXPR_ARG (exp, 1);
26114 rtx op0 = expand_normal (arg0);
26115 rtx op1 = expand_normal (arg1);
26116 rtx op2;
26117 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
26118 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
26119 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
26120 enum rtx_code comparison = d->comparison;
26122 if (VECTOR_MODE_P (mode0))
26123 op0 = safe_vector_operand (op0, mode0);
26124 if (VECTOR_MODE_P (mode1))
26125 op1 = safe_vector_operand (op1, mode1);
26127 /* Swap operands if we have a comparison that isn't available in
26128 hardware. */
26129 if (swap)
26131 rtx tmp = gen_reg_rtx (mode1);
26132 emit_move_insn (tmp, op1);
26133 op1 = op0;
26134 op0 = tmp;
26137 if (optimize || !target
26138 || GET_MODE (target) != tmode
26139 || !insn_data[d->icode].operand[0].predicate (target, tmode))
26140 target = gen_reg_rtx (tmode);
26142 if ((optimize && !register_operand (op0, mode0))
26143 || !insn_data[d->icode].operand[1].predicate (op0, mode0))
26144 op0 = copy_to_mode_reg (mode0, op0);
26145 if ((optimize && !register_operand (op1, mode1))
26146 || !insn_data[d->icode].operand[2].predicate (op1, mode1))
26147 op1 = copy_to_mode_reg (mode1, op1);
26149 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
26150 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
26151 if (! pat)
26152 return 0;
26153 emit_insn (pat);
26154 return target;
26157 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
26159 static rtx
26160 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
26161 rtx target)
26163 rtx pat;
26164 tree arg0 = CALL_EXPR_ARG (exp, 0);
26165 tree arg1 = CALL_EXPR_ARG (exp, 1);
26166 rtx op0 = expand_normal (arg0);
26167 rtx op1 = expand_normal (arg1);
26168 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
26169 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
26170 enum rtx_code comparison = d->comparison;
26172 if (VECTOR_MODE_P (mode0))
26173 op0 = safe_vector_operand (op0, mode0);
26174 if (VECTOR_MODE_P (mode1))
26175 op1 = safe_vector_operand (op1, mode1);
26177 /* Swap operands if we have a comparison that isn't available in
26178 hardware. */
26179 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
26181 rtx tmp = op1;
26182 op1 = op0;
26183 op0 = tmp;
26186 target = gen_reg_rtx (SImode);
26187 emit_move_insn (target, const0_rtx);
26188 target = gen_rtx_SUBREG (QImode, target, 0);
26190 if ((optimize && !register_operand (op0, mode0))
26191 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
26192 op0 = copy_to_mode_reg (mode0, op0);
26193 if ((optimize && !register_operand (op1, mode1))
26194 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
26195 op1 = copy_to_mode_reg (mode1, op1);
26197 pat = GEN_FCN (d->icode) (op0, op1);
26198 if (! pat)
26199 return 0;
26200 emit_insn (pat);
26201 emit_insn (gen_rtx_SET (VOIDmode,
26202 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
26203 gen_rtx_fmt_ee (comparison, QImode,
26204 SET_DEST (pat),
26205 const0_rtx)));
26207 return SUBREG_REG (target);
26210 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
26212 static rtx
26213 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
26214 rtx target)
26216 rtx pat;
26217 tree arg0 = CALL_EXPR_ARG (exp, 0);
26218 tree arg1 = CALL_EXPR_ARG (exp, 1);
26219 rtx op0 = expand_normal (arg0);
26220 rtx op1 = expand_normal (arg1);
26221 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
26222 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
26223 enum rtx_code comparison = d->comparison;
26225 if (VECTOR_MODE_P (mode0))
26226 op0 = safe_vector_operand (op0, mode0);
26227 if (VECTOR_MODE_P (mode1))
26228 op1 = safe_vector_operand (op1, mode1);
26230 target = gen_reg_rtx (SImode);
26231 emit_move_insn (target, const0_rtx);
26232 target = gen_rtx_SUBREG (QImode, target, 0);
26234 if ((optimize && !register_operand (op0, mode0))
26235 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
26236 op0 = copy_to_mode_reg (mode0, op0);
26237 if ((optimize && !register_operand (op1, mode1))
26238 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
26239 op1 = copy_to_mode_reg (mode1, op1);
26241 pat = GEN_FCN (d->icode) (op0, op1);
26242 if (! pat)
26243 return 0;
26244 emit_insn (pat);
26245 emit_insn (gen_rtx_SET (VOIDmode,
26246 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
26247 gen_rtx_fmt_ee (comparison, QImode,
26248 SET_DEST (pat),
26249 const0_rtx)));
26251 return SUBREG_REG (target);
26254 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
26256 static rtx
26257 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
26258 tree exp, rtx target)
26260 rtx pat;
26261 tree arg0 = CALL_EXPR_ARG (exp, 0);
26262 tree arg1 = CALL_EXPR_ARG (exp, 1);
26263 tree arg2 = CALL_EXPR_ARG (exp, 2);
26264 tree arg3 = CALL_EXPR_ARG (exp, 3);
26265 tree arg4 = CALL_EXPR_ARG (exp, 4);
26266 rtx scratch0, scratch1;
26267 rtx op0 = expand_normal (arg0);
26268 rtx op1 = expand_normal (arg1);
26269 rtx op2 = expand_normal (arg2);
26270 rtx op3 = expand_normal (arg3);
26271 rtx op4 = expand_normal (arg4);
26272 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
26274 tmode0 = insn_data[d->icode].operand[0].mode;
26275 tmode1 = insn_data[d->icode].operand[1].mode;
26276 modev2 = insn_data[d->icode].operand[2].mode;
26277 modei3 = insn_data[d->icode].operand[3].mode;
26278 modev4 = insn_data[d->icode].operand[4].mode;
26279 modei5 = insn_data[d->icode].operand[5].mode;
26280 modeimm = insn_data[d->icode].operand[6].mode;
26282 if (VECTOR_MODE_P (modev2))
26283 op0 = safe_vector_operand (op0, modev2);
26284 if (VECTOR_MODE_P (modev4))
26285 op2 = safe_vector_operand (op2, modev4);
26287 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
26288 op0 = copy_to_mode_reg (modev2, op0);
26289 if (!insn_data[d->icode].operand[3].predicate (op1, modei3))
26290 op1 = copy_to_mode_reg (modei3, op1);
26291 if ((optimize && !register_operand (op2, modev4))
26292 || !insn_data[d->icode].operand[4].predicate (op2, modev4))
26293 op2 = copy_to_mode_reg (modev4, op2);
26294 if (!insn_data[d->icode].operand[5].predicate (op3, modei5))
26295 op3 = copy_to_mode_reg (modei5, op3);
26297 if (!insn_data[d->icode].operand[6].predicate (op4, modeimm))
26299 error ("the fifth argument must be a 8-bit immediate");
26300 return const0_rtx;
26303 if (d->code == IX86_BUILTIN_PCMPESTRI128)
26305 if (optimize || !target
26306 || GET_MODE (target) != tmode0
26307 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
26308 target = gen_reg_rtx (tmode0);
26310 scratch1 = gen_reg_rtx (tmode1);
26312 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
26314 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
26316 if (optimize || !target
26317 || GET_MODE (target) != tmode1
26318 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
26319 target = gen_reg_rtx (tmode1);
26321 scratch0 = gen_reg_rtx (tmode0);
26323 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
26325 else
26327 gcc_assert (d->flag);
26329 scratch0 = gen_reg_rtx (tmode0);
26330 scratch1 = gen_reg_rtx (tmode1);
26332 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
26335 if (! pat)
26336 return 0;
26338 emit_insn (pat);
26340 if (d->flag)
26342 target = gen_reg_rtx (SImode);
26343 emit_move_insn (target, const0_rtx);
26344 target = gen_rtx_SUBREG (QImode, target, 0);
26346 emit_insn
26347 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
26348 gen_rtx_fmt_ee (EQ, QImode,
26349 gen_rtx_REG ((enum machine_mode) d->flag,
26350 FLAGS_REG),
26351 const0_rtx)));
26352 return SUBREG_REG (target);
26354 else
26355 return target;
26359 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
26361 static rtx
26362 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
26363 tree exp, rtx target)
26365 rtx pat;
26366 tree arg0 = CALL_EXPR_ARG (exp, 0);
26367 tree arg1 = CALL_EXPR_ARG (exp, 1);
26368 tree arg2 = CALL_EXPR_ARG (exp, 2);
26369 rtx scratch0, scratch1;
26370 rtx op0 = expand_normal (arg0);
26371 rtx op1 = expand_normal (arg1);
26372 rtx op2 = expand_normal (arg2);
26373 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
26375 tmode0 = insn_data[d->icode].operand[0].mode;
26376 tmode1 = insn_data[d->icode].operand[1].mode;
26377 modev2 = insn_data[d->icode].operand[2].mode;
26378 modev3 = insn_data[d->icode].operand[3].mode;
26379 modeimm = insn_data[d->icode].operand[4].mode;
26381 if (VECTOR_MODE_P (modev2))
26382 op0 = safe_vector_operand (op0, modev2);
26383 if (VECTOR_MODE_P (modev3))
26384 op1 = safe_vector_operand (op1, modev3);
26386 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
26387 op0 = copy_to_mode_reg (modev2, op0);
26388 if ((optimize && !register_operand (op1, modev3))
26389 || !insn_data[d->icode].operand[3].predicate (op1, modev3))
26390 op1 = copy_to_mode_reg (modev3, op1);
26392 if (!insn_data[d->icode].operand[4].predicate (op2, modeimm))
26394 error ("the third argument must be a 8-bit immediate");
26395 return const0_rtx;
26398 if (d->code == IX86_BUILTIN_PCMPISTRI128)
26400 if (optimize || !target
26401 || GET_MODE (target) != tmode0
26402 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
26403 target = gen_reg_rtx (tmode0);
26405 scratch1 = gen_reg_rtx (tmode1);
26407 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
26409 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
26411 if (optimize || !target
26412 || GET_MODE (target) != tmode1
26413 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
26414 target = gen_reg_rtx (tmode1);
26416 scratch0 = gen_reg_rtx (tmode0);
26418 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
26420 else
26422 gcc_assert (d->flag);
26424 scratch0 = gen_reg_rtx (tmode0);
26425 scratch1 = gen_reg_rtx (tmode1);
26427 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
26430 if (! pat)
26431 return 0;
26433 emit_insn (pat);
26435 if (d->flag)
26437 target = gen_reg_rtx (SImode);
26438 emit_move_insn (target, const0_rtx);
26439 target = gen_rtx_SUBREG (QImode, target, 0);
26441 emit_insn
26442 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
26443 gen_rtx_fmt_ee (EQ, QImode,
26444 gen_rtx_REG ((enum machine_mode) d->flag,
26445 FLAGS_REG),
26446 const0_rtx)));
26447 return SUBREG_REG (target);
26449 else
26450 return target;
26453 /* Subroutine of ix86_expand_builtin to take care of insns with
26454 variable number of operands. */
26456 static rtx
26457 ix86_expand_args_builtin (const struct builtin_description *d,
26458 tree exp, rtx target)
26460 rtx pat, real_target;
26461 unsigned int i, nargs;
26462 unsigned int nargs_constant = 0;
26463 int num_memory = 0;
26464 struct
26466 rtx op;
26467 enum machine_mode mode;
26468 } args[4];
26469 bool last_arg_count = false;
26470 enum insn_code icode = d->icode;
26471 const struct insn_data_d *insn_p = &insn_data[icode];
26472 enum machine_mode tmode = insn_p->operand[0].mode;
26473 enum machine_mode rmode = VOIDmode;
26474 bool swap = false;
26475 enum rtx_code comparison = d->comparison;
26477 switch ((enum ix86_builtin_func_type) d->flag)
26479 case INT_FTYPE_V8SF_V8SF_PTEST:
26480 case INT_FTYPE_V4DI_V4DI_PTEST:
26481 case INT_FTYPE_V4DF_V4DF_PTEST:
26482 case INT_FTYPE_V4SF_V4SF_PTEST:
26483 case INT_FTYPE_V2DI_V2DI_PTEST:
26484 case INT_FTYPE_V2DF_V2DF_PTEST:
26485 return ix86_expand_sse_ptest (d, exp, target);
26486 case FLOAT128_FTYPE_FLOAT128:
26487 case FLOAT_FTYPE_FLOAT:
26488 case INT_FTYPE_INT:
26489 case UINT64_FTYPE_INT:
26490 case UINT16_FTYPE_UINT16:
26491 case INT64_FTYPE_INT64:
26492 case INT64_FTYPE_V4SF:
26493 case INT64_FTYPE_V2DF:
26494 case INT_FTYPE_V16QI:
26495 case INT_FTYPE_V8QI:
26496 case INT_FTYPE_V8SF:
26497 case INT_FTYPE_V4DF:
26498 case INT_FTYPE_V4SF:
26499 case INT_FTYPE_V2DF:
26500 case V16QI_FTYPE_V16QI:
26501 case V8SI_FTYPE_V8SF:
26502 case V8SI_FTYPE_V4SI:
26503 case V8HI_FTYPE_V8HI:
26504 case V8HI_FTYPE_V16QI:
26505 case V8QI_FTYPE_V8QI:
26506 case V8SF_FTYPE_V8SF:
26507 case V8SF_FTYPE_V8SI:
26508 case V8SF_FTYPE_V4SF:
26509 case V8SF_FTYPE_V8HI:
26510 case V4SI_FTYPE_V4SI:
26511 case V4SI_FTYPE_V16QI:
26512 case V4SI_FTYPE_V4SF:
26513 case V4SI_FTYPE_V8SI:
26514 case V4SI_FTYPE_V8HI:
26515 case V4SI_FTYPE_V4DF:
26516 case V4SI_FTYPE_V2DF:
26517 case V4HI_FTYPE_V4HI:
26518 case V4DF_FTYPE_V4DF:
26519 case V4DF_FTYPE_V4SI:
26520 case V4DF_FTYPE_V4SF:
26521 case V4DF_FTYPE_V2DF:
26522 case V4SF_FTYPE_V4SF:
26523 case V4SF_FTYPE_V4SI:
26524 case V4SF_FTYPE_V8SF:
26525 case V4SF_FTYPE_V4DF:
26526 case V4SF_FTYPE_V8HI:
26527 case V4SF_FTYPE_V2DF:
26528 case V2DI_FTYPE_V2DI:
26529 case V2DI_FTYPE_V16QI:
26530 case V2DI_FTYPE_V8HI:
26531 case V2DI_FTYPE_V4SI:
26532 case V2DF_FTYPE_V2DF:
26533 case V2DF_FTYPE_V4SI:
26534 case V2DF_FTYPE_V4DF:
26535 case V2DF_FTYPE_V4SF:
26536 case V2DF_FTYPE_V2SI:
26537 case V2SI_FTYPE_V2SI:
26538 case V2SI_FTYPE_V4SF:
26539 case V2SI_FTYPE_V2SF:
26540 case V2SI_FTYPE_V2DF:
26541 case V2SF_FTYPE_V2SF:
26542 case V2SF_FTYPE_V2SI:
26543 nargs = 1;
26544 break;
26545 case V4SF_FTYPE_V4SF_VEC_MERGE:
26546 case V2DF_FTYPE_V2DF_VEC_MERGE:
26547 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
26548 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
26549 case V16QI_FTYPE_V16QI_V16QI:
26550 case V16QI_FTYPE_V8HI_V8HI:
26551 case V8QI_FTYPE_V8QI_V8QI:
26552 case V8QI_FTYPE_V4HI_V4HI:
26553 case V8HI_FTYPE_V8HI_V8HI:
26554 case V8HI_FTYPE_V16QI_V16QI:
26555 case V8HI_FTYPE_V4SI_V4SI:
26556 case V8SF_FTYPE_V8SF_V8SF:
26557 case V8SF_FTYPE_V8SF_V8SI:
26558 case V4SI_FTYPE_V4SI_V4SI:
26559 case V4SI_FTYPE_V8HI_V8HI:
26560 case V4SI_FTYPE_V4SF_V4SF:
26561 case V4SI_FTYPE_V2DF_V2DF:
26562 case V4HI_FTYPE_V4HI_V4HI:
26563 case V4HI_FTYPE_V8QI_V8QI:
26564 case V4HI_FTYPE_V2SI_V2SI:
26565 case V4DF_FTYPE_V4DF_V4DF:
26566 case V4DF_FTYPE_V4DF_V4DI:
26567 case V4SF_FTYPE_V4SF_V4SF:
26568 case V4SF_FTYPE_V4SF_V4SI:
26569 case V4SF_FTYPE_V4SF_V2SI:
26570 case V4SF_FTYPE_V4SF_V2DF:
26571 case V4SF_FTYPE_V4SF_DI:
26572 case V4SF_FTYPE_V4SF_SI:
26573 case V2DI_FTYPE_V2DI_V2DI:
26574 case V2DI_FTYPE_V16QI_V16QI:
26575 case V2DI_FTYPE_V4SI_V4SI:
26576 case V2DI_FTYPE_V2DI_V16QI:
26577 case V2DI_FTYPE_V2DF_V2DF:
26578 case V2SI_FTYPE_V2SI_V2SI:
26579 case V2SI_FTYPE_V4HI_V4HI:
26580 case V2SI_FTYPE_V2SF_V2SF:
26581 case V2DF_FTYPE_V2DF_V2DF:
26582 case V2DF_FTYPE_V2DF_V4SF:
26583 case V2DF_FTYPE_V2DF_V2DI:
26584 case V2DF_FTYPE_V2DF_DI:
26585 case V2DF_FTYPE_V2DF_SI:
26586 case V2SF_FTYPE_V2SF_V2SF:
26587 case V1DI_FTYPE_V1DI_V1DI:
26588 case V1DI_FTYPE_V8QI_V8QI:
26589 case V1DI_FTYPE_V2SI_V2SI:
26590 if (comparison == UNKNOWN)
26591 return ix86_expand_binop_builtin (icode, exp, target);
26592 nargs = 2;
26593 break;
26594 case V4SF_FTYPE_V4SF_V4SF_SWAP:
26595 case V2DF_FTYPE_V2DF_V2DF_SWAP:
26596 gcc_assert (comparison != UNKNOWN);
26597 nargs = 2;
26598 swap = true;
26599 break;
26600 case V8HI_FTYPE_V8HI_V8HI_COUNT:
26601 case V8HI_FTYPE_V8HI_SI_COUNT:
26602 case V4SI_FTYPE_V4SI_V4SI_COUNT:
26603 case V4SI_FTYPE_V4SI_SI_COUNT:
26604 case V4HI_FTYPE_V4HI_V4HI_COUNT:
26605 case V4HI_FTYPE_V4HI_SI_COUNT:
26606 case V2DI_FTYPE_V2DI_V2DI_COUNT:
26607 case V2DI_FTYPE_V2DI_SI_COUNT:
26608 case V2SI_FTYPE_V2SI_V2SI_COUNT:
26609 case V2SI_FTYPE_V2SI_SI_COUNT:
26610 case V1DI_FTYPE_V1DI_V1DI_COUNT:
26611 case V1DI_FTYPE_V1DI_SI_COUNT:
26612 nargs = 2;
26613 last_arg_count = true;
26614 break;
26615 case UINT64_FTYPE_UINT64_UINT64:
26616 case UINT_FTYPE_UINT_UINT:
26617 case UINT_FTYPE_UINT_USHORT:
26618 case UINT_FTYPE_UINT_UCHAR:
26619 case UINT16_FTYPE_UINT16_INT:
26620 case UINT8_FTYPE_UINT8_INT:
26621 nargs = 2;
26622 break;
26623 case V2DI_FTYPE_V2DI_INT_CONVERT:
26624 nargs = 2;
26625 rmode = V1TImode;
26626 nargs_constant = 1;
26627 break;
26628 case V8HI_FTYPE_V8HI_INT:
26629 case V8HI_FTYPE_V8SF_INT:
26630 case V8HI_FTYPE_V4SF_INT:
26631 case V8SF_FTYPE_V8SF_INT:
26632 case V4SI_FTYPE_V4SI_INT:
26633 case V4SI_FTYPE_V8SI_INT:
26634 case V4HI_FTYPE_V4HI_INT:
26635 case V4DF_FTYPE_V4DF_INT:
26636 case V4SF_FTYPE_V4SF_INT:
26637 case V4SF_FTYPE_V8SF_INT:
26638 case V2DI_FTYPE_V2DI_INT:
26639 case V2DF_FTYPE_V2DF_INT:
26640 case V2DF_FTYPE_V4DF_INT:
26641 nargs = 2;
26642 nargs_constant = 1;
26643 break;
26644 case V16QI_FTYPE_V16QI_V16QI_V16QI:
26645 case V8SF_FTYPE_V8SF_V8SF_V8SF:
26646 case V4DF_FTYPE_V4DF_V4DF_V4DF:
26647 case V4SF_FTYPE_V4SF_V4SF_V4SF:
26648 case V2DF_FTYPE_V2DF_V2DF_V2DF:
26649 nargs = 3;
26650 break;
26651 case V16QI_FTYPE_V16QI_V16QI_INT:
26652 case V8HI_FTYPE_V8HI_V8HI_INT:
26653 case V8SI_FTYPE_V8SI_V8SI_INT:
26654 case V8SI_FTYPE_V8SI_V4SI_INT:
26655 case V8SF_FTYPE_V8SF_V8SF_INT:
26656 case V8SF_FTYPE_V8SF_V4SF_INT:
26657 case V4SI_FTYPE_V4SI_V4SI_INT:
26658 case V4DF_FTYPE_V4DF_V4DF_INT:
26659 case V4DF_FTYPE_V4DF_V2DF_INT:
26660 case V4SF_FTYPE_V4SF_V4SF_INT:
26661 case V2DI_FTYPE_V2DI_V2DI_INT:
26662 case V2DF_FTYPE_V2DF_V2DF_INT:
26663 nargs = 3;
26664 nargs_constant = 1;
26665 break;
26666 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT:
26667 nargs = 3;
26668 rmode = V2DImode;
26669 nargs_constant = 1;
26670 break;
26671 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT:
26672 nargs = 3;
26673 rmode = DImode;
26674 nargs_constant = 1;
26675 break;
26676 case V2DI_FTYPE_V2DI_UINT_UINT:
26677 nargs = 3;
26678 nargs_constant = 2;
26679 break;
26680 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
26681 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
26682 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
26683 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
26684 nargs = 4;
26685 nargs_constant = 1;
26686 break;
26687 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
26688 nargs = 4;
26689 nargs_constant = 2;
26690 break;
26691 default:
26692 gcc_unreachable ();
26695 gcc_assert (nargs <= ARRAY_SIZE (args));
26697 if (comparison != UNKNOWN)
26699 gcc_assert (nargs == 2);
26700 return ix86_expand_sse_compare (d, exp, target, swap);
26703 if (rmode == VOIDmode || rmode == tmode)
26705 if (optimize
26706 || target == 0
26707 || GET_MODE (target) != tmode
26708 || !insn_p->operand[0].predicate (target, tmode))
26709 target = gen_reg_rtx (tmode);
26710 real_target = target;
26712 else
26714 target = gen_reg_rtx (rmode);
26715 real_target = simplify_gen_subreg (tmode, target, rmode, 0);
26718 for (i = 0; i < nargs; i++)
26720 tree arg = CALL_EXPR_ARG (exp, i);
26721 rtx op = expand_normal (arg);
26722 enum machine_mode mode = insn_p->operand[i + 1].mode;
26723 bool match = insn_p->operand[i + 1].predicate (op, mode);
26725 if (last_arg_count && (i + 1) == nargs)
26727 /* SIMD shift insns take either an 8-bit immediate or
26728 register as count. But builtin functions take int as
26729 count. If count doesn't match, we put it in register. */
26730 if (!match)
26732 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
26733 if (!insn_p->operand[i + 1].predicate (op, mode))
26734 op = copy_to_reg (op);
26737 else if ((nargs - i) <= nargs_constant)
26739 if (!match)
26740 switch (icode)
26742 case CODE_FOR_sse4_1_roundpd:
26743 case CODE_FOR_sse4_1_roundps:
26744 case CODE_FOR_sse4_1_roundsd:
26745 case CODE_FOR_sse4_1_roundss:
26746 case CODE_FOR_sse4_1_blendps:
26747 case CODE_FOR_avx_blendpd256:
26748 case CODE_FOR_avx_vpermilv4df:
26749 case CODE_FOR_avx_roundpd256:
26750 case CODE_FOR_avx_roundps256:
26751 error ("the last argument must be a 4-bit immediate");
26752 return const0_rtx;
26754 case CODE_FOR_sse4_1_blendpd:
26755 case CODE_FOR_avx_vpermilv2df:
26756 case CODE_FOR_xop_vpermil2v2df3:
26757 case CODE_FOR_xop_vpermil2v4sf3:
26758 case CODE_FOR_xop_vpermil2v4df3:
26759 case CODE_FOR_xop_vpermil2v8sf3:
26760 error ("the last argument must be a 2-bit immediate");
26761 return const0_rtx;
26763 case CODE_FOR_avx_vextractf128v4df:
26764 case CODE_FOR_avx_vextractf128v8sf:
26765 case CODE_FOR_avx_vextractf128v8si:
26766 case CODE_FOR_avx_vinsertf128v4df:
26767 case CODE_FOR_avx_vinsertf128v8sf:
26768 case CODE_FOR_avx_vinsertf128v8si:
26769 error ("the last argument must be a 1-bit immediate");
26770 return const0_rtx;
26772 case CODE_FOR_avx_cmpsdv2df3:
26773 case CODE_FOR_avx_cmpssv4sf3:
26774 case CODE_FOR_avx_cmppdv2df3:
26775 case CODE_FOR_avx_cmppsv4sf3:
26776 case CODE_FOR_avx_cmppdv4df3:
26777 case CODE_FOR_avx_cmppsv8sf3:
26778 error ("the last argument must be a 5-bit immediate");
26779 return const0_rtx;
26781 default:
26782 switch (nargs_constant)
26784 case 2:
26785 if ((nargs - i) == nargs_constant)
26787 error ("the next to last argument must be an 8-bit immediate");
26788 break;
26790 case 1:
26791 error ("the last argument must be an 8-bit immediate");
26792 break;
26793 default:
26794 gcc_unreachable ();
26796 return const0_rtx;
26799 else
26801 if (VECTOR_MODE_P (mode))
26802 op = safe_vector_operand (op, mode);
26804 /* If we aren't optimizing, only allow one memory operand to
26805 be generated. */
26806 if (memory_operand (op, mode))
26807 num_memory++;
26809 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
26811 if (optimize || !match || num_memory > 1)
26812 op = copy_to_mode_reg (mode, op);
26814 else
26816 op = copy_to_reg (op);
26817 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
26821 args[i].op = op;
26822 args[i].mode = mode;
26825 switch (nargs)
26827 case 1:
26828 pat = GEN_FCN (icode) (real_target, args[0].op);
26829 break;
26830 case 2:
26831 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
26832 break;
26833 case 3:
26834 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
26835 args[2].op);
26836 break;
26837 case 4:
26838 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
26839 args[2].op, args[3].op);
26840 break;
26841 default:
26842 gcc_unreachable ();
26845 if (! pat)
26846 return 0;
26848 emit_insn (pat);
26849 return target;
26852 /* Subroutine of ix86_expand_builtin to take care of special insns
26853 with variable number of operands. */
26855 static rtx
26856 ix86_expand_special_args_builtin (const struct builtin_description *d,
26857 tree exp, rtx target)
26859 tree arg;
26860 rtx pat, op;
26861 unsigned int i, nargs, arg_adjust, memory;
26862 struct
26864 rtx op;
26865 enum machine_mode mode;
26866 } args[3];
26867 enum insn_code icode = d->icode;
26868 bool last_arg_constant = false;
26869 const struct insn_data_d *insn_p = &insn_data[icode];
26870 enum machine_mode tmode = insn_p->operand[0].mode;
26871 enum { load, store } klass;
26873 switch ((enum ix86_builtin_func_type) d->flag)
26875 case VOID_FTYPE_VOID:
26876 if (icode == CODE_FOR_avx_vzeroupper)
26877 target = GEN_INT (vzeroupper_intrinsic);
26878 emit_insn (GEN_FCN (icode) (target));
26879 return 0;
26880 case VOID_FTYPE_UINT64:
26881 case VOID_FTYPE_UNSIGNED:
26882 nargs = 0;
26883 klass = store;
26884 memory = 0;
26885 break;
26886 break;
26887 case UINT64_FTYPE_VOID:
26888 case UNSIGNED_FTYPE_VOID:
26889 nargs = 0;
26890 klass = load;
26891 memory = 0;
26892 break;
26893 case UINT64_FTYPE_PUNSIGNED:
26894 case V2DI_FTYPE_PV2DI:
26895 case V32QI_FTYPE_PCCHAR:
26896 case V16QI_FTYPE_PCCHAR:
26897 case V8SF_FTYPE_PCV4SF:
26898 case V8SF_FTYPE_PCFLOAT:
26899 case V4SF_FTYPE_PCFLOAT:
26900 case V4DF_FTYPE_PCV2DF:
26901 case V4DF_FTYPE_PCDOUBLE:
26902 case V2DF_FTYPE_PCDOUBLE:
26903 case VOID_FTYPE_PVOID:
26904 nargs = 1;
26905 klass = load;
26906 memory = 0;
26907 break;
26908 case VOID_FTYPE_PV2SF_V4SF:
26909 case VOID_FTYPE_PV4DI_V4DI:
26910 case VOID_FTYPE_PV2DI_V2DI:
26911 case VOID_FTYPE_PCHAR_V32QI:
26912 case VOID_FTYPE_PCHAR_V16QI:
26913 case VOID_FTYPE_PFLOAT_V8SF:
26914 case VOID_FTYPE_PFLOAT_V4SF:
26915 case VOID_FTYPE_PDOUBLE_V4DF:
26916 case VOID_FTYPE_PDOUBLE_V2DF:
26917 case VOID_FTYPE_PULONGLONG_ULONGLONG:
26918 case VOID_FTYPE_PINT_INT:
26919 nargs = 1;
26920 klass = store;
26921 /* Reserve memory operand for target. */
26922 memory = ARRAY_SIZE (args);
26923 break;
26924 case V4SF_FTYPE_V4SF_PCV2SF:
26925 case V2DF_FTYPE_V2DF_PCDOUBLE:
26926 nargs = 2;
26927 klass = load;
26928 memory = 1;
26929 break;
26930 case V8SF_FTYPE_PCV8SF_V8SI:
26931 case V4DF_FTYPE_PCV4DF_V4DI:
26932 case V4SF_FTYPE_PCV4SF_V4SI:
26933 case V2DF_FTYPE_PCV2DF_V2DI:
26934 nargs = 2;
26935 klass = load;
26936 memory = 0;
26937 break;
26938 case VOID_FTYPE_PV8SF_V8SI_V8SF:
26939 case VOID_FTYPE_PV4DF_V4DI_V4DF:
26940 case VOID_FTYPE_PV4SF_V4SI_V4SF:
26941 case VOID_FTYPE_PV2DF_V2DI_V2DF:
26942 nargs = 2;
26943 klass = store;
26944 /* Reserve memory operand for target. */
26945 memory = ARRAY_SIZE (args);
26946 break;
26947 case VOID_FTYPE_UINT_UINT_UINT:
26948 case VOID_FTYPE_UINT64_UINT_UINT:
26949 case UCHAR_FTYPE_UINT_UINT_UINT:
26950 case UCHAR_FTYPE_UINT64_UINT_UINT:
26951 nargs = 3;
26952 klass = load;
26953 memory = ARRAY_SIZE (args);
26954 last_arg_constant = true;
26955 break;
26956 default:
26957 gcc_unreachable ();
26960 gcc_assert (nargs <= ARRAY_SIZE (args));
26962 if (klass == store)
26964 arg = CALL_EXPR_ARG (exp, 0);
26965 op = expand_normal (arg);
26966 gcc_assert (target == 0);
26967 if (memory)
26968 target = gen_rtx_MEM (tmode, copy_to_mode_reg (Pmode, op));
26969 else
26970 target = force_reg (tmode, op);
26971 arg_adjust = 1;
26973 else
26975 arg_adjust = 0;
26976 if (optimize
26977 || target == 0
26978 || GET_MODE (target) != tmode
26979 || !insn_p->operand[0].predicate (target, tmode))
26980 target = gen_reg_rtx (tmode);
26983 for (i = 0; i < nargs; i++)
26985 enum machine_mode mode = insn_p->operand[i + 1].mode;
26986 bool match;
26988 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
26989 op = expand_normal (arg);
26990 match = insn_p->operand[i + 1].predicate (op, mode);
26992 if (last_arg_constant && (i + 1) == nargs)
26994 if (!match)
26996 if (icode == CODE_FOR_lwp_lwpvalsi3
26997 || icode == CODE_FOR_lwp_lwpinssi3
26998 || icode == CODE_FOR_lwp_lwpvaldi3
26999 || icode == CODE_FOR_lwp_lwpinsdi3)
27000 error ("the last argument must be a 32-bit immediate");
27001 else
27002 error ("the last argument must be an 8-bit immediate");
27003 return const0_rtx;
27006 else
27008 if (i == memory)
27010 /* This must be the memory operand. */
27011 op = gen_rtx_MEM (mode, copy_to_mode_reg (Pmode, op));
27012 gcc_assert (GET_MODE (op) == mode
27013 || GET_MODE (op) == VOIDmode);
27015 else
27017 /* This must be register. */
27018 if (VECTOR_MODE_P (mode))
27019 op = safe_vector_operand (op, mode);
27021 gcc_assert (GET_MODE (op) == mode
27022 || GET_MODE (op) == VOIDmode);
27023 op = copy_to_mode_reg (mode, op);
27027 args[i].op = op;
27028 args[i].mode = mode;
27031 switch (nargs)
27033 case 0:
27034 pat = GEN_FCN (icode) (target);
27035 break;
27036 case 1:
27037 pat = GEN_FCN (icode) (target, args[0].op);
27038 break;
27039 case 2:
27040 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
27041 break;
27042 case 3:
27043 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
27044 break;
27045 default:
27046 gcc_unreachable ();
27049 if (! pat)
27050 return 0;
27051 emit_insn (pat);
27052 return klass == store ? 0 : target;
27055 /* Return the integer constant in ARG. Constrain it to be in the range
27056 of the subparts of VEC_TYPE; issue an error if not. */
27058 static int
27059 get_element_number (tree vec_type, tree arg)
27061 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
27063 if (!host_integerp (arg, 1)
27064 || (elt = tree_low_cst (arg, 1), elt > max))
27066 error ("selector must be an integer constant in the range 0..%wi", max);
27067 return 0;
27070 return elt;
27073 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
27074 ix86_expand_vector_init. We DO have language-level syntax for this, in
27075 the form of (type){ init-list }. Except that since we can't place emms
27076 instructions from inside the compiler, we can't allow the use of MMX
27077 registers unless the user explicitly asks for it. So we do *not* define
27078 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
27079 we have builtins invoked by mmintrin.h that gives us license to emit
27080 these sorts of instructions. */
27082 static rtx
27083 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
27085 enum machine_mode tmode = TYPE_MODE (type);
27086 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
27087 int i, n_elt = GET_MODE_NUNITS (tmode);
27088 rtvec v = rtvec_alloc (n_elt);
27090 gcc_assert (VECTOR_MODE_P (tmode));
27091 gcc_assert (call_expr_nargs (exp) == n_elt);
27093 for (i = 0; i < n_elt; ++i)
27095 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
27096 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
27099 if (!target || !register_operand (target, tmode))
27100 target = gen_reg_rtx (tmode);
27102 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
27103 return target;
27106 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
27107 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
27108 had a language-level syntax for referencing vector elements. */
27110 static rtx
27111 ix86_expand_vec_ext_builtin (tree exp, rtx target)
27113 enum machine_mode tmode, mode0;
27114 tree arg0, arg1;
27115 int elt;
27116 rtx op0;
27118 arg0 = CALL_EXPR_ARG (exp, 0);
27119 arg1 = CALL_EXPR_ARG (exp, 1);
27121 op0 = expand_normal (arg0);
27122 elt = get_element_number (TREE_TYPE (arg0), arg1);
27124 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
27125 mode0 = TYPE_MODE (TREE_TYPE (arg0));
27126 gcc_assert (VECTOR_MODE_P (mode0));
27128 op0 = force_reg (mode0, op0);
27130 if (optimize || !target || !register_operand (target, tmode))
27131 target = gen_reg_rtx (tmode);
27133 ix86_expand_vector_extract (true, target, op0, elt);
27135 return target;
27138 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
27139 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
27140 a language-level syntax for referencing vector elements. */
27142 static rtx
27143 ix86_expand_vec_set_builtin (tree exp)
27145 enum machine_mode tmode, mode1;
27146 tree arg0, arg1, arg2;
27147 int elt;
27148 rtx op0, op1, target;
27150 arg0 = CALL_EXPR_ARG (exp, 0);
27151 arg1 = CALL_EXPR_ARG (exp, 1);
27152 arg2 = CALL_EXPR_ARG (exp, 2);
27154 tmode = TYPE_MODE (TREE_TYPE (arg0));
27155 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
27156 gcc_assert (VECTOR_MODE_P (tmode));
27158 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
27159 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
27160 elt = get_element_number (TREE_TYPE (arg0), arg2);
27162 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
27163 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
27165 op0 = force_reg (tmode, op0);
27166 op1 = force_reg (mode1, op1);
27168 /* OP0 is the source of these builtin functions and shouldn't be
27169 modified. Create a copy, use it and return it as target. */
27170 target = gen_reg_rtx (tmode);
27171 emit_move_insn (target, op0);
27172 ix86_expand_vector_set (true, target, op1, elt);
27174 return target;
27177 /* Expand an expression EXP that calls a built-in function,
27178 with result going to TARGET if that's convenient
27179 (and in mode MODE if that's convenient).
27180 SUBTARGET may be used as the target for computing one of EXP's operands.
27181 IGNORE is nonzero if the value is to be ignored. */
27183 static rtx
27184 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
27185 enum machine_mode mode ATTRIBUTE_UNUSED,
27186 int ignore ATTRIBUTE_UNUSED)
27188 const struct builtin_description *d;
27189 size_t i;
27190 enum insn_code icode;
27191 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
27192 tree arg0, arg1, arg2;
27193 rtx op0, op1, op2, pat;
27194 enum machine_mode mode0, mode1, mode2;
27195 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
27197 /* Determine whether the builtin function is available under the current ISA.
27198 Originally the builtin was not created if it wasn't applicable to the
27199 current ISA based on the command line switches. With function specific
27200 options, we need to check in the context of the function making the call
27201 whether it is supported. */
27202 if (ix86_builtins_isa[fcode].isa
27203 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
27205 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
27206 NULL, NULL, false);
27208 if (!opts)
27209 error ("%qE needs unknown isa option", fndecl);
27210 else
27212 gcc_assert (opts != NULL);
27213 error ("%qE needs isa option %s", fndecl, opts);
27214 free (opts);
27216 return const0_rtx;
27219 switch (fcode)
27221 case IX86_BUILTIN_MASKMOVQ:
27222 case IX86_BUILTIN_MASKMOVDQU:
27223 icode = (fcode == IX86_BUILTIN_MASKMOVQ
27224 ? CODE_FOR_mmx_maskmovq
27225 : CODE_FOR_sse2_maskmovdqu);
27226 /* Note the arg order is different from the operand order. */
27227 arg1 = CALL_EXPR_ARG (exp, 0);
27228 arg2 = CALL_EXPR_ARG (exp, 1);
27229 arg0 = CALL_EXPR_ARG (exp, 2);
27230 op0 = expand_normal (arg0);
27231 op1 = expand_normal (arg1);
27232 op2 = expand_normal (arg2);
27233 mode0 = insn_data[icode].operand[0].mode;
27234 mode1 = insn_data[icode].operand[1].mode;
27235 mode2 = insn_data[icode].operand[2].mode;
27237 op0 = force_reg (Pmode, op0);
27238 op0 = gen_rtx_MEM (mode1, op0);
27240 if (!insn_data[icode].operand[0].predicate (op0, mode0))
27241 op0 = copy_to_mode_reg (mode0, op0);
27242 if (!insn_data[icode].operand[1].predicate (op1, mode1))
27243 op1 = copy_to_mode_reg (mode1, op1);
27244 if (!insn_data[icode].operand[2].predicate (op2, mode2))
27245 op2 = copy_to_mode_reg (mode2, op2);
27246 pat = GEN_FCN (icode) (op0, op1, op2);
27247 if (! pat)
27248 return 0;
27249 emit_insn (pat);
27250 return 0;
27252 case IX86_BUILTIN_LDMXCSR:
27253 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
27254 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
27255 emit_move_insn (target, op0);
27256 emit_insn (gen_sse_ldmxcsr (target));
27257 return 0;
27259 case IX86_BUILTIN_STMXCSR:
27260 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
27261 emit_insn (gen_sse_stmxcsr (target));
27262 return copy_to_mode_reg (SImode, target);
27264 case IX86_BUILTIN_CLFLUSH:
27265 arg0 = CALL_EXPR_ARG (exp, 0);
27266 op0 = expand_normal (arg0);
27267 icode = CODE_FOR_sse2_clflush;
27268 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
27269 op0 = copy_to_mode_reg (Pmode, op0);
27271 emit_insn (gen_sse2_clflush (op0));
27272 return 0;
27274 case IX86_BUILTIN_MONITOR:
27275 arg0 = CALL_EXPR_ARG (exp, 0);
27276 arg1 = CALL_EXPR_ARG (exp, 1);
27277 arg2 = CALL_EXPR_ARG (exp, 2);
27278 op0 = expand_normal (arg0);
27279 op1 = expand_normal (arg1);
27280 op2 = expand_normal (arg2);
27281 if (!REG_P (op0))
27282 op0 = copy_to_mode_reg (Pmode, op0);
27283 if (!REG_P (op1))
27284 op1 = copy_to_mode_reg (SImode, op1);
27285 if (!REG_P (op2))
27286 op2 = copy_to_mode_reg (SImode, op2);
27287 emit_insn (ix86_gen_monitor (op0, op1, op2));
27288 return 0;
27290 case IX86_BUILTIN_MWAIT:
27291 arg0 = CALL_EXPR_ARG (exp, 0);
27292 arg1 = CALL_EXPR_ARG (exp, 1);
27293 op0 = expand_normal (arg0);
27294 op1 = expand_normal (arg1);
27295 if (!REG_P (op0))
27296 op0 = copy_to_mode_reg (SImode, op0);
27297 if (!REG_P (op1))
27298 op1 = copy_to_mode_reg (SImode, op1);
27299 emit_insn (gen_sse3_mwait (op0, op1));
27300 return 0;
27302 case IX86_BUILTIN_VEC_INIT_V2SI:
27303 case IX86_BUILTIN_VEC_INIT_V4HI:
27304 case IX86_BUILTIN_VEC_INIT_V8QI:
27305 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
27307 case IX86_BUILTIN_VEC_EXT_V2DF:
27308 case IX86_BUILTIN_VEC_EXT_V2DI:
27309 case IX86_BUILTIN_VEC_EXT_V4SF:
27310 case IX86_BUILTIN_VEC_EXT_V4SI:
27311 case IX86_BUILTIN_VEC_EXT_V8HI:
27312 case IX86_BUILTIN_VEC_EXT_V2SI:
27313 case IX86_BUILTIN_VEC_EXT_V4HI:
27314 case IX86_BUILTIN_VEC_EXT_V16QI:
27315 return ix86_expand_vec_ext_builtin (exp, target);
27317 case IX86_BUILTIN_VEC_SET_V2DI:
27318 case IX86_BUILTIN_VEC_SET_V4SF:
27319 case IX86_BUILTIN_VEC_SET_V4SI:
27320 case IX86_BUILTIN_VEC_SET_V8HI:
27321 case IX86_BUILTIN_VEC_SET_V4HI:
27322 case IX86_BUILTIN_VEC_SET_V16QI:
27323 return ix86_expand_vec_set_builtin (exp);
27325 case IX86_BUILTIN_VEC_PERM_V2DF:
27326 case IX86_BUILTIN_VEC_PERM_V4SF:
27327 case IX86_BUILTIN_VEC_PERM_V2DI:
27328 case IX86_BUILTIN_VEC_PERM_V4SI:
27329 case IX86_BUILTIN_VEC_PERM_V8HI:
27330 case IX86_BUILTIN_VEC_PERM_V16QI:
27331 case IX86_BUILTIN_VEC_PERM_V2DI_U:
27332 case IX86_BUILTIN_VEC_PERM_V4SI_U:
27333 case IX86_BUILTIN_VEC_PERM_V8HI_U:
27334 case IX86_BUILTIN_VEC_PERM_V16QI_U:
27335 case IX86_BUILTIN_VEC_PERM_V4DF:
27336 case IX86_BUILTIN_VEC_PERM_V8SF:
27337 return ix86_expand_vec_perm_builtin (exp);
27339 case IX86_BUILTIN_INFQ:
27340 case IX86_BUILTIN_HUGE_VALQ:
27342 REAL_VALUE_TYPE inf;
27343 rtx tmp;
27345 real_inf (&inf);
27346 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
27348 tmp = validize_mem (force_const_mem (mode, tmp));
27350 if (target == 0)
27351 target = gen_reg_rtx (mode);
27353 emit_move_insn (target, tmp);
27354 return target;
27357 case IX86_BUILTIN_LLWPCB:
27358 arg0 = CALL_EXPR_ARG (exp, 0);
27359 op0 = expand_normal (arg0);
27360 icode = CODE_FOR_lwp_llwpcb;
27361 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
27362 op0 = copy_to_mode_reg (Pmode, op0);
27363 emit_insn (gen_lwp_llwpcb (op0));
27364 return 0;
27366 case IX86_BUILTIN_SLWPCB:
27367 icode = CODE_FOR_lwp_slwpcb;
27368 if (!target
27369 || !insn_data[icode].operand[0].predicate (target, Pmode))
27370 target = gen_reg_rtx (Pmode);
27371 emit_insn (gen_lwp_slwpcb (target));
27372 return target;
27374 case IX86_BUILTIN_BEXTRI32:
27375 case IX86_BUILTIN_BEXTRI64:
27376 arg0 = CALL_EXPR_ARG (exp, 0);
27377 arg1 = CALL_EXPR_ARG (exp, 1);
27378 op0 = expand_normal (arg0);
27379 op1 = expand_normal (arg1);
27380 icode = (fcode == IX86_BUILTIN_BEXTRI32
27381 ? CODE_FOR_tbm_bextri_si
27382 : CODE_FOR_tbm_bextri_di);
27383 if (!CONST_INT_P (op1))
27385 error ("last argument must be an immediate");
27386 return const0_rtx;
27388 else
27390 unsigned char length = (INTVAL (op1) >> 8) & 0xFF;
27391 unsigned char lsb_index = INTVAL (op1) & 0xFF;
27392 op1 = GEN_INT (length);
27393 op2 = GEN_INT (lsb_index);
27394 pat = GEN_FCN (icode) (target, op0, op1, op2);
27395 if (pat)
27396 emit_insn (pat);
27397 return target;
27400 case IX86_BUILTIN_RDRAND16_STEP:
27401 icode = CODE_FOR_rdrandhi_1;
27402 mode0 = HImode;
27403 goto rdrand_step;
27405 case IX86_BUILTIN_RDRAND32_STEP:
27406 icode = CODE_FOR_rdrandsi_1;
27407 mode0 = SImode;
27408 goto rdrand_step;
27410 case IX86_BUILTIN_RDRAND64_STEP:
27411 icode = CODE_FOR_rdranddi_1;
27412 mode0 = DImode;
27414 rdrand_step:
27415 op0 = gen_reg_rtx (mode0);
27416 emit_insn (GEN_FCN (icode) (op0));
27418 op1 = gen_reg_rtx (SImode);
27419 emit_move_insn (op1, CONST1_RTX (SImode));
27421 /* Emit SImode conditional move. */
27422 if (mode0 == HImode)
27424 op2 = gen_reg_rtx (SImode);
27425 emit_insn (gen_zero_extendhisi2 (op2, op0));
27427 else if (mode0 == SImode)
27428 op2 = op0;
27429 else
27430 op2 = gen_rtx_SUBREG (SImode, op0, 0);
27432 pat = gen_rtx_GEU (VOIDmode, gen_rtx_REG (CCCmode, FLAGS_REG),
27433 const0_rtx);
27434 emit_insn (gen_rtx_SET (VOIDmode, op1,
27435 gen_rtx_IF_THEN_ELSE (SImode, pat, op2, op1)));
27436 emit_move_insn (target, op1);
27438 arg0 = CALL_EXPR_ARG (exp, 0);
27439 op1 = expand_normal (arg0);
27440 if (!address_operand (op1, VOIDmode))
27441 op1 = copy_addr_to_reg (op1);
27442 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
27443 return target;
27445 default:
27446 break;
27449 for (i = 0, d = bdesc_special_args;
27450 i < ARRAY_SIZE (bdesc_special_args);
27451 i++, d++)
27452 if (d->code == fcode)
27453 return ix86_expand_special_args_builtin (d, exp, target);
27455 for (i = 0, d = bdesc_args;
27456 i < ARRAY_SIZE (bdesc_args);
27457 i++, d++)
27458 if (d->code == fcode)
27459 switch (fcode)
27461 case IX86_BUILTIN_FABSQ:
27462 case IX86_BUILTIN_COPYSIGNQ:
27463 if (!TARGET_SSE2)
27464 /* Emit a normal call if SSE2 isn't available. */
27465 return expand_call (exp, target, ignore);
27466 default:
27467 return ix86_expand_args_builtin (d, exp, target);
27470 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
27471 if (d->code == fcode)
27472 return ix86_expand_sse_comi (d, exp, target);
27474 for (i = 0, d = bdesc_pcmpestr;
27475 i < ARRAY_SIZE (bdesc_pcmpestr);
27476 i++, d++)
27477 if (d->code == fcode)
27478 return ix86_expand_sse_pcmpestr (d, exp, target);
27480 for (i = 0, d = bdesc_pcmpistr;
27481 i < ARRAY_SIZE (bdesc_pcmpistr);
27482 i++, d++)
27483 if (d->code == fcode)
27484 return ix86_expand_sse_pcmpistr (d, exp, target);
27486 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
27487 if (d->code == fcode)
27488 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
27489 (enum ix86_builtin_func_type)
27490 d->flag, d->comparison);
27492 gcc_unreachable ();
27495 /* Returns a function decl for a vectorized version of the builtin function
27496 with builtin function code FN and the result vector type TYPE, or NULL_TREE
27497 if it is not available. */
27499 static tree
27500 ix86_builtin_vectorized_function (tree fndecl, tree type_out,
27501 tree type_in)
27503 enum machine_mode in_mode, out_mode;
27504 int in_n, out_n;
27505 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
27507 if (TREE_CODE (type_out) != VECTOR_TYPE
27508 || TREE_CODE (type_in) != VECTOR_TYPE
27509 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
27510 return NULL_TREE;
27512 out_mode = TYPE_MODE (TREE_TYPE (type_out));
27513 out_n = TYPE_VECTOR_SUBPARTS (type_out);
27514 in_mode = TYPE_MODE (TREE_TYPE (type_in));
27515 in_n = TYPE_VECTOR_SUBPARTS (type_in);
27517 switch (fn)
27519 case BUILT_IN_SQRT:
27520 if (out_mode == DFmode && in_mode == DFmode)
27522 if (out_n == 2 && in_n == 2)
27523 return ix86_builtins[IX86_BUILTIN_SQRTPD];
27524 else if (out_n == 4 && in_n == 4)
27525 return ix86_builtins[IX86_BUILTIN_SQRTPD256];
27527 break;
27529 case BUILT_IN_SQRTF:
27530 if (out_mode == SFmode && in_mode == SFmode)
27532 if (out_n == 4 && in_n == 4)
27533 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR];
27534 else if (out_n == 8 && in_n == 8)
27535 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR256];
27537 break;
27539 case BUILT_IN_LRINT:
27540 if (out_mode == SImode && out_n == 4
27541 && in_mode == DFmode && in_n == 2)
27542 return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX];
27543 break;
27545 case BUILT_IN_LRINTF:
27546 if (out_mode == SImode && in_mode == SFmode)
27548 if (out_n == 4 && in_n == 4)
27549 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
27550 else if (out_n == 8 && in_n == 8)
27551 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ256];
27553 break;
27555 case BUILT_IN_COPYSIGN:
27556 if (out_mode == DFmode && in_mode == DFmode)
27558 if (out_n == 2 && in_n == 2)
27559 return ix86_builtins[IX86_BUILTIN_CPYSGNPD];
27560 else if (out_n == 4 && in_n == 4)
27561 return ix86_builtins[IX86_BUILTIN_CPYSGNPD256];
27563 break;
27565 case BUILT_IN_COPYSIGNF:
27566 if (out_mode == SFmode && in_mode == SFmode)
27568 if (out_n == 4 && in_n == 4)
27569 return ix86_builtins[IX86_BUILTIN_CPYSGNPS];
27570 else if (out_n == 8 && in_n == 8)
27571 return ix86_builtins[IX86_BUILTIN_CPYSGNPS256];
27573 break;
27575 case BUILT_IN_FMA:
27576 if (out_mode == DFmode && in_mode == DFmode)
27578 if (out_n == 2 && in_n == 2)
27579 return ix86_builtins[IX86_BUILTIN_VFMADDPD];
27580 if (out_n == 4 && in_n == 4)
27581 return ix86_builtins[IX86_BUILTIN_VFMADDPD256];
27583 break;
27585 case BUILT_IN_FMAF:
27586 if (out_mode == SFmode && in_mode == SFmode)
27588 if (out_n == 4 && in_n == 4)
27589 return ix86_builtins[IX86_BUILTIN_VFMADDPS];
27590 if (out_n == 8 && in_n == 8)
27591 return ix86_builtins[IX86_BUILTIN_VFMADDPS256];
27593 break;
27595 default:
27596 break;
27599 /* Dispatch to a handler for a vectorization library. */
27600 if (ix86_veclib_handler)
27601 return ix86_veclib_handler ((enum built_in_function) fn, type_out,
27602 type_in);
27604 return NULL_TREE;
27607 /* Handler for an SVML-style interface to
27608 a library with vectorized intrinsics. */
27610 static tree
27611 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
27613 char name[20];
27614 tree fntype, new_fndecl, args;
27615 unsigned arity;
27616 const char *bname;
27617 enum machine_mode el_mode, in_mode;
27618 int n, in_n;
27620 /* The SVML is suitable for unsafe math only. */
27621 if (!flag_unsafe_math_optimizations)
27622 return NULL_TREE;
27624 el_mode = TYPE_MODE (TREE_TYPE (type_out));
27625 n = TYPE_VECTOR_SUBPARTS (type_out);
27626 in_mode = TYPE_MODE (TREE_TYPE (type_in));
27627 in_n = TYPE_VECTOR_SUBPARTS (type_in);
27628 if (el_mode != in_mode
27629 || n != in_n)
27630 return NULL_TREE;
27632 switch (fn)
27634 case BUILT_IN_EXP:
27635 case BUILT_IN_LOG:
27636 case BUILT_IN_LOG10:
27637 case BUILT_IN_POW:
27638 case BUILT_IN_TANH:
27639 case BUILT_IN_TAN:
27640 case BUILT_IN_ATAN:
27641 case BUILT_IN_ATAN2:
27642 case BUILT_IN_ATANH:
27643 case BUILT_IN_CBRT:
27644 case BUILT_IN_SINH:
27645 case BUILT_IN_SIN:
27646 case BUILT_IN_ASINH:
27647 case BUILT_IN_ASIN:
27648 case BUILT_IN_COSH:
27649 case BUILT_IN_COS:
27650 case BUILT_IN_ACOSH:
27651 case BUILT_IN_ACOS:
27652 if (el_mode != DFmode || n != 2)
27653 return NULL_TREE;
27654 break;
27656 case BUILT_IN_EXPF:
27657 case BUILT_IN_LOGF:
27658 case BUILT_IN_LOG10F:
27659 case BUILT_IN_POWF:
27660 case BUILT_IN_TANHF:
27661 case BUILT_IN_TANF:
27662 case BUILT_IN_ATANF:
27663 case BUILT_IN_ATAN2F:
27664 case BUILT_IN_ATANHF:
27665 case BUILT_IN_CBRTF:
27666 case BUILT_IN_SINHF:
27667 case BUILT_IN_SINF:
27668 case BUILT_IN_ASINHF:
27669 case BUILT_IN_ASINF:
27670 case BUILT_IN_COSHF:
27671 case BUILT_IN_COSF:
27672 case BUILT_IN_ACOSHF:
27673 case BUILT_IN_ACOSF:
27674 if (el_mode != SFmode || n != 4)
27675 return NULL_TREE;
27676 break;
27678 default:
27679 return NULL_TREE;
27682 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
27684 if (fn == BUILT_IN_LOGF)
27685 strcpy (name, "vmlsLn4");
27686 else if (fn == BUILT_IN_LOG)
27687 strcpy (name, "vmldLn2");
27688 else if (n == 4)
27690 sprintf (name, "vmls%s", bname+10);
27691 name[strlen (name)-1] = '4';
27693 else
27694 sprintf (name, "vmld%s2", bname+10);
27696 /* Convert to uppercase. */
27697 name[4] &= ~0x20;
27699 arity = 0;
27700 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
27701 args = TREE_CHAIN (args))
27702 arity++;
27704 if (arity == 1)
27705 fntype = build_function_type_list (type_out, type_in, NULL);
27706 else
27707 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
27709 /* Build a function declaration for the vectorized function. */
27710 new_fndecl = build_decl (BUILTINS_LOCATION,
27711 FUNCTION_DECL, get_identifier (name), fntype);
27712 TREE_PUBLIC (new_fndecl) = 1;
27713 DECL_EXTERNAL (new_fndecl) = 1;
27714 DECL_IS_NOVOPS (new_fndecl) = 1;
27715 TREE_READONLY (new_fndecl) = 1;
27717 return new_fndecl;
27720 /* Handler for an ACML-style interface to
27721 a library with vectorized intrinsics. */
27723 static tree
27724 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
27726 char name[20] = "__vr.._";
27727 tree fntype, new_fndecl, args;
27728 unsigned arity;
27729 const char *bname;
27730 enum machine_mode el_mode, in_mode;
27731 int n, in_n;
27733 /* The ACML is 64bits only and suitable for unsafe math only as
27734 it does not correctly support parts of IEEE with the required
27735 precision such as denormals. */
27736 if (!TARGET_64BIT
27737 || !flag_unsafe_math_optimizations)
27738 return NULL_TREE;
27740 el_mode = TYPE_MODE (TREE_TYPE (type_out));
27741 n = TYPE_VECTOR_SUBPARTS (type_out);
27742 in_mode = TYPE_MODE (TREE_TYPE (type_in));
27743 in_n = TYPE_VECTOR_SUBPARTS (type_in);
27744 if (el_mode != in_mode
27745 || n != in_n)
27746 return NULL_TREE;
27748 switch (fn)
27750 case BUILT_IN_SIN:
27751 case BUILT_IN_COS:
27752 case BUILT_IN_EXP:
27753 case BUILT_IN_LOG:
27754 case BUILT_IN_LOG2:
27755 case BUILT_IN_LOG10:
27756 name[4] = 'd';
27757 name[5] = '2';
27758 if (el_mode != DFmode
27759 || n != 2)
27760 return NULL_TREE;
27761 break;
27763 case BUILT_IN_SINF:
27764 case BUILT_IN_COSF:
27765 case BUILT_IN_EXPF:
27766 case BUILT_IN_POWF:
27767 case BUILT_IN_LOGF:
27768 case BUILT_IN_LOG2F:
27769 case BUILT_IN_LOG10F:
27770 name[4] = 's';
27771 name[5] = '4';
27772 if (el_mode != SFmode
27773 || n != 4)
27774 return NULL_TREE;
27775 break;
27777 default:
27778 return NULL_TREE;
27781 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
27782 sprintf (name + 7, "%s", bname+10);
27784 arity = 0;
27785 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
27786 args = TREE_CHAIN (args))
27787 arity++;
27789 if (arity == 1)
27790 fntype = build_function_type_list (type_out, type_in, NULL);
27791 else
27792 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
27794 /* Build a function declaration for the vectorized function. */
27795 new_fndecl = build_decl (BUILTINS_LOCATION,
27796 FUNCTION_DECL, get_identifier (name), fntype);
27797 TREE_PUBLIC (new_fndecl) = 1;
27798 DECL_EXTERNAL (new_fndecl) = 1;
27799 DECL_IS_NOVOPS (new_fndecl) = 1;
27800 TREE_READONLY (new_fndecl) = 1;
27802 return new_fndecl;
27806 /* Returns a decl of a function that implements conversion of an integer vector
27807 into a floating-point vector, or vice-versa. DEST_TYPE and SRC_TYPE
27808 are the types involved when converting according to CODE.
27809 Return NULL_TREE if it is not available. */
27811 static tree
27812 ix86_vectorize_builtin_conversion (unsigned int code,
27813 tree dest_type, tree src_type)
27815 if (! TARGET_SSE2)
27816 return NULL_TREE;
27818 switch (code)
27820 case FLOAT_EXPR:
27821 switch (TYPE_MODE (src_type))
27823 case V4SImode:
27824 switch (TYPE_MODE (dest_type))
27826 case V4SFmode:
27827 return (TYPE_UNSIGNED (src_type)
27828 ? ix86_builtins[IX86_BUILTIN_CVTUDQ2PS]
27829 : ix86_builtins[IX86_BUILTIN_CVTDQ2PS]);
27830 case V4DFmode:
27831 return (TYPE_UNSIGNED (src_type)
27832 ? NULL_TREE
27833 : ix86_builtins[IX86_BUILTIN_CVTDQ2PD256]);
27834 default:
27835 return NULL_TREE;
27837 break;
27838 case V8SImode:
27839 switch (TYPE_MODE (dest_type))
27841 case V8SFmode:
27842 return (TYPE_UNSIGNED (src_type)
27843 ? NULL_TREE
27844 : ix86_builtins[IX86_BUILTIN_CVTDQ2PS256]);
27845 default:
27846 return NULL_TREE;
27848 break;
27849 default:
27850 return NULL_TREE;
27853 case FIX_TRUNC_EXPR:
27854 switch (TYPE_MODE (dest_type))
27856 case V4SImode:
27857 switch (TYPE_MODE (src_type))
27859 case V4SFmode:
27860 return (TYPE_UNSIGNED (dest_type)
27861 ? NULL_TREE
27862 : ix86_builtins[IX86_BUILTIN_CVTTPS2DQ]);
27863 case V4DFmode:
27864 return (TYPE_UNSIGNED (dest_type)
27865 ? NULL_TREE
27866 : ix86_builtins[IX86_BUILTIN_CVTTPD2DQ256]);
27867 default:
27868 return NULL_TREE;
27870 break;
27872 case V8SImode:
27873 switch (TYPE_MODE (src_type))
27875 case V8SFmode:
27876 return (TYPE_UNSIGNED (dest_type)
27877 ? NULL_TREE
27878 : ix86_builtins[IX86_BUILTIN_CVTTPS2DQ256]);
27879 default:
27880 return NULL_TREE;
27882 break;
27884 default:
27885 return NULL_TREE;
27888 default:
27889 return NULL_TREE;
27892 return NULL_TREE;
27895 /* Returns a code for a target-specific builtin that implements
27896 reciprocal of the function, or NULL_TREE if not available. */
27898 static tree
27899 ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
27900 bool sqrt ATTRIBUTE_UNUSED)
27902 if (! (TARGET_SSE_MATH && !optimize_insn_for_size_p ()
27903 && flag_finite_math_only && !flag_trapping_math
27904 && flag_unsafe_math_optimizations))
27905 return NULL_TREE;
27907 if (md_fn)
27908 /* Machine dependent builtins. */
27909 switch (fn)
27911 /* Vectorized version of sqrt to rsqrt conversion. */
27912 case IX86_BUILTIN_SQRTPS_NR:
27913 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR];
27915 case IX86_BUILTIN_SQRTPS_NR256:
27916 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR256];
27918 default:
27919 return NULL_TREE;
27921 else
27922 /* Normal builtins. */
27923 switch (fn)
27925 /* Sqrt to rsqrt conversion. */
27926 case BUILT_IN_SQRTF:
27927 return ix86_builtins[IX86_BUILTIN_RSQRTF];
27929 default:
27930 return NULL_TREE;
27934 /* Helper for avx_vpermilps256_operand et al. This is also used by
27935 the expansion functions to turn the parallel back into a mask.
27936 The return value is 0 for no match and the imm8+1 for a match. */
27939 avx_vpermilp_parallel (rtx par, enum machine_mode mode)
27941 unsigned i, nelt = GET_MODE_NUNITS (mode);
27942 unsigned mask = 0;
27943 unsigned char ipar[8];
27945 if (XVECLEN (par, 0) != (int) nelt)
27946 return 0;
27948 /* Validate that all of the elements are constants, and not totally
27949 out of range. Copy the data into an integral array to make the
27950 subsequent checks easier. */
27951 for (i = 0; i < nelt; ++i)
27953 rtx er = XVECEXP (par, 0, i);
27954 unsigned HOST_WIDE_INT ei;
27956 if (!CONST_INT_P (er))
27957 return 0;
27958 ei = INTVAL (er);
27959 if (ei >= nelt)
27960 return 0;
27961 ipar[i] = ei;
27964 switch (mode)
27966 case V4DFmode:
27967 /* In the 256-bit DFmode case, we can only move elements within
27968 a 128-bit lane. */
27969 for (i = 0; i < 2; ++i)
27971 if (ipar[i] >= 2)
27972 return 0;
27973 mask |= ipar[i] << i;
27975 for (i = 2; i < 4; ++i)
27977 if (ipar[i] < 2)
27978 return 0;
27979 mask |= (ipar[i] - 2) << i;
27981 break;
27983 case V8SFmode:
27984 /* In the 256-bit SFmode case, we have full freedom of movement
27985 within the low 128-bit lane, but the high 128-bit lane must
27986 mirror the exact same pattern. */
27987 for (i = 0; i < 4; ++i)
27988 if (ipar[i] + 4 != ipar[i + 4])
27989 return 0;
27990 nelt = 4;
27991 /* FALLTHRU */
27993 case V2DFmode:
27994 case V4SFmode:
27995 /* In the 128-bit case, we've full freedom in the placement of
27996 the elements from the source operand. */
27997 for (i = 0; i < nelt; ++i)
27998 mask |= ipar[i] << (i * (nelt / 2));
27999 break;
28001 default:
28002 gcc_unreachable ();
28005 /* Make sure success has a non-zero value by adding one. */
28006 return mask + 1;
28009 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
28010 the expansion functions to turn the parallel back into a mask.
28011 The return value is 0 for no match and the imm8+1 for a match. */
28014 avx_vperm2f128_parallel (rtx par, enum machine_mode mode)
28016 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
28017 unsigned mask = 0;
28018 unsigned char ipar[8];
28020 if (XVECLEN (par, 0) != (int) nelt)
28021 return 0;
28023 /* Validate that all of the elements are constants, and not totally
28024 out of range. Copy the data into an integral array to make the
28025 subsequent checks easier. */
28026 for (i = 0; i < nelt; ++i)
28028 rtx er = XVECEXP (par, 0, i);
28029 unsigned HOST_WIDE_INT ei;
28031 if (!CONST_INT_P (er))
28032 return 0;
28033 ei = INTVAL (er);
28034 if (ei >= 2 * nelt)
28035 return 0;
28036 ipar[i] = ei;
28039 /* Validate that the halves of the permute are halves. */
28040 for (i = 0; i < nelt2 - 1; ++i)
28041 if (ipar[i] + 1 != ipar[i + 1])
28042 return 0;
28043 for (i = nelt2; i < nelt - 1; ++i)
28044 if (ipar[i] + 1 != ipar[i + 1])
28045 return 0;
28047 /* Reconstruct the mask. */
28048 for (i = 0; i < 2; ++i)
28050 unsigned e = ipar[i * nelt2];
28051 if (e % nelt2)
28052 return 0;
28053 e /= nelt2;
28054 mask |= e << (i * 4);
28057 /* Make sure success has a non-zero value by adding one. */
28058 return mask + 1;
28062 /* Store OPERAND to the memory after reload is completed. This means
28063 that we can't easily use assign_stack_local. */
28065 ix86_force_to_memory (enum machine_mode mode, rtx operand)
28067 rtx result;
28069 gcc_assert (reload_completed);
28070 if (ix86_using_red_zone ())
28072 result = gen_rtx_MEM (mode,
28073 gen_rtx_PLUS (Pmode,
28074 stack_pointer_rtx,
28075 GEN_INT (-RED_ZONE_SIZE)));
28076 emit_move_insn (result, operand);
28078 else if (TARGET_64BIT)
28080 switch (mode)
28082 case HImode:
28083 case SImode:
28084 operand = gen_lowpart (DImode, operand);
28085 /* FALLTHRU */
28086 case DImode:
28087 emit_insn (
28088 gen_rtx_SET (VOIDmode,
28089 gen_rtx_MEM (DImode,
28090 gen_rtx_PRE_DEC (DImode,
28091 stack_pointer_rtx)),
28092 operand));
28093 break;
28094 default:
28095 gcc_unreachable ();
28097 result = gen_rtx_MEM (mode, stack_pointer_rtx);
28099 else
28101 switch (mode)
28103 case DImode:
28105 rtx operands[2];
28106 split_double_mode (mode, &operand, 1, operands, operands + 1);
28107 emit_insn (
28108 gen_rtx_SET (VOIDmode,
28109 gen_rtx_MEM (SImode,
28110 gen_rtx_PRE_DEC (Pmode,
28111 stack_pointer_rtx)),
28112 operands[1]));
28113 emit_insn (
28114 gen_rtx_SET (VOIDmode,
28115 gen_rtx_MEM (SImode,
28116 gen_rtx_PRE_DEC (Pmode,
28117 stack_pointer_rtx)),
28118 operands[0]));
28120 break;
28121 case HImode:
28122 /* Store HImodes as SImodes. */
28123 operand = gen_lowpart (SImode, operand);
28124 /* FALLTHRU */
28125 case SImode:
28126 emit_insn (
28127 gen_rtx_SET (VOIDmode,
28128 gen_rtx_MEM (GET_MODE (operand),
28129 gen_rtx_PRE_DEC (SImode,
28130 stack_pointer_rtx)),
28131 operand));
28132 break;
28133 default:
28134 gcc_unreachable ();
28136 result = gen_rtx_MEM (mode, stack_pointer_rtx);
28138 return result;
28141 /* Free operand from the memory. */
28142 void
28143 ix86_free_from_memory (enum machine_mode mode)
28145 if (!ix86_using_red_zone ())
28147 int size;
28149 if (mode == DImode || TARGET_64BIT)
28150 size = 8;
28151 else
28152 size = 4;
28153 /* Use LEA to deallocate stack space. In peephole2 it will be converted
28154 to pop or add instruction if registers are available. */
28155 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
28156 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
28157 GEN_INT (size))));
28161 /* Implement TARGET_IRA_COVER_CLASSES. If -mfpmath=sse, we prefer
28162 SSE_REGS to FLOAT_REGS if their costs for a pseudo are the
28163 same. */
28164 static const reg_class_t *
28165 i386_ira_cover_classes (void)
28167 static const reg_class_t sse_fpmath_classes[] = {
28168 GENERAL_REGS, SSE_REGS, MMX_REGS, FLOAT_REGS, LIM_REG_CLASSES
28170 static const reg_class_t no_sse_fpmath_classes[] = {
28171 GENERAL_REGS, FLOAT_REGS, MMX_REGS, SSE_REGS, LIM_REG_CLASSES
28174 return TARGET_SSE_MATH ? sse_fpmath_classes : no_sse_fpmath_classes;
28177 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
28179 Put float CONST_DOUBLE in the constant pool instead of fp regs.
28180 QImode must go into class Q_REGS.
28181 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
28182 movdf to do mem-to-mem moves through integer regs. */
28184 static reg_class_t
28185 ix86_preferred_reload_class (rtx x, reg_class_t regclass)
28187 enum machine_mode mode = GET_MODE (x);
28189 /* We're only allowed to return a subclass of CLASS. Many of the
28190 following checks fail for NO_REGS, so eliminate that early. */
28191 if (regclass == NO_REGS)
28192 return NO_REGS;
28194 /* All classes can load zeros. */
28195 if (x == CONST0_RTX (mode))
28196 return regclass;
28198 /* Force constants into memory if we are loading a (nonzero) constant into
28199 an MMX or SSE register. This is because there are no MMX/SSE instructions
28200 to load from a constant. */
28201 if (CONSTANT_P (x)
28202 && (MAYBE_MMX_CLASS_P (regclass) || MAYBE_SSE_CLASS_P (regclass)))
28203 return NO_REGS;
28205 /* Prefer SSE regs only, if we can use them for math. */
28206 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
28207 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
28209 /* Floating-point constants need more complex checks. */
28210 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
28212 /* General regs can load everything. */
28213 if (reg_class_subset_p (regclass, GENERAL_REGS))
28214 return regclass;
28216 /* Floats can load 0 and 1 plus some others. Note that we eliminated
28217 zero above. We only want to wind up preferring 80387 registers if
28218 we plan on doing computation with them. */
28219 if (TARGET_80387
28220 && standard_80387_constant_p (x))
28222 /* Limit class to non-sse. */
28223 if (regclass == FLOAT_SSE_REGS)
28224 return FLOAT_REGS;
28225 if (regclass == FP_TOP_SSE_REGS)
28226 return FP_TOP_REG;
28227 if (regclass == FP_SECOND_SSE_REGS)
28228 return FP_SECOND_REG;
28229 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
28230 return regclass;
28233 return NO_REGS;
28236 /* Generally when we see PLUS here, it's the function invariant
28237 (plus soft-fp const_int). Which can only be computed into general
28238 regs. */
28239 if (GET_CODE (x) == PLUS)
28240 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
28242 /* QImode constants are easy to load, but non-constant QImode data
28243 must go into Q_REGS. */
28244 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
28246 if (reg_class_subset_p (regclass, Q_REGS))
28247 return regclass;
28248 if (reg_class_subset_p (Q_REGS, regclass))
28249 return Q_REGS;
28250 return NO_REGS;
28253 return regclass;
28256 /* Discourage putting floating-point values in SSE registers unless
28257 SSE math is being used, and likewise for the 387 registers. */
28258 static reg_class_t
28259 ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
28261 enum machine_mode mode = GET_MODE (x);
28263 /* Restrict the output reload class to the register bank that we are doing
28264 math on. If we would like not to return a subset of CLASS, reject this
28265 alternative: if reload cannot do this, it will still use its choice. */
28266 mode = GET_MODE (x);
28267 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
28268 return MAYBE_SSE_CLASS_P (regclass) ? SSE_REGS : NO_REGS;
28270 if (X87_FLOAT_MODE_P (mode))
28272 if (regclass == FP_TOP_SSE_REGS)
28273 return FP_TOP_REG;
28274 else if (regclass == FP_SECOND_SSE_REGS)
28275 return FP_SECOND_REG;
28276 else
28277 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
28280 return regclass;
28283 static reg_class_t
28284 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
28285 enum machine_mode mode,
28286 secondary_reload_info *sri ATTRIBUTE_UNUSED)
28288 /* QImode spills from non-QI registers require
28289 intermediate register on 32bit targets. */
28290 if (!in_p && mode == QImode && !TARGET_64BIT
28291 && (rclass == GENERAL_REGS
28292 || rclass == LEGACY_REGS
28293 || rclass == INDEX_REGS))
28295 int regno;
28297 if (REG_P (x))
28298 regno = REGNO (x);
28299 else
28300 regno = -1;
28302 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
28303 regno = true_regnum (x);
28305 /* Return Q_REGS if the operand is in memory. */
28306 if (regno == -1)
28307 return Q_REGS;
28310 return NO_REGS;
28313 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
28315 static bool
28316 ix86_class_likely_spilled_p (reg_class_t rclass)
28318 switch (rclass)
28320 case AREG:
28321 case DREG:
28322 case CREG:
28323 case BREG:
28324 case AD_REGS:
28325 case SIREG:
28326 case DIREG:
28327 case SSE_FIRST_REG:
28328 case FP_TOP_REG:
28329 case FP_SECOND_REG:
28330 return true;
28332 default:
28333 break;
28336 return false;
28339 /* If we are copying between general and FP registers, we need a memory
28340 location. The same is true for SSE and MMX registers.
28342 To optimize register_move_cost performance, allow inline variant.
28344 The macro can't work reliably when one of the CLASSES is class containing
28345 registers from multiple units (SSE, MMX, integer). We avoid this by never
28346 combining those units in single alternative in the machine description.
28347 Ensure that this constraint holds to avoid unexpected surprises.
28349 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
28350 enforce these sanity checks. */
28352 static inline bool
28353 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
28354 enum machine_mode mode, int strict)
28356 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
28357 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
28358 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
28359 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
28360 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
28361 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
28363 gcc_assert (!strict);
28364 return true;
28367 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
28368 return true;
28370 /* ??? This is a lie. We do have moves between mmx/general, and for
28371 mmx/sse2. But by saying we need secondary memory we discourage the
28372 register allocator from using the mmx registers unless needed. */
28373 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
28374 return true;
28376 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
28378 /* SSE1 doesn't have any direct moves from other classes. */
28379 if (!TARGET_SSE2)
28380 return true;
28382 /* If the target says that inter-unit moves are more expensive
28383 than moving through memory, then don't generate them. */
28384 if (!TARGET_INTER_UNIT_MOVES)
28385 return true;
28387 /* Between SSE and general, we have moves no larger than word size. */
28388 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
28389 return true;
28392 return false;
28395 bool
28396 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
28397 enum machine_mode mode, int strict)
28399 return inline_secondary_memory_needed (class1, class2, mode, strict);
28402 /* Return true if the registers in CLASS cannot represent the change from
28403 modes FROM to TO. */
28405 bool
28406 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
28407 enum reg_class regclass)
28409 if (from == to)
28410 return false;
28412 /* x87 registers can't do subreg at all, as all values are reformatted
28413 to extended precision. */
28414 if (MAYBE_FLOAT_CLASS_P (regclass))
28415 return true;
28417 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
28419 /* Vector registers do not support QI or HImode loads. If we don't
28420 disallow a change to these modes, reload will assume it's ok to
28421 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
28422 the vec_dupv4hi pattern. */
28423 if (GET_MODE_SIZE (from) < 4)
28424 return true;
28426 /* Vector registers do not support subreg with nonzero offsets, which
28427 are otherwise valid for integer registers. Since we can't see
28428 whether we have a nonzero offset from here, prohibit all
28429 nonparadoxical subregs changing size. */
28430 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
28431 return true;
28434 return false;
28437 /* Return the cost of moving data of mode M between a
28438 register and memory. A value of 2 is the default; this cost is
28439 relative to those in `REGISTER_MOVE_COST'.
28441 This function is used extensively by register_move_cost that is used to
28442 build tables at startup. Make it inline in this case.
28443 When IN is 2, return maximum of in and out move cost.
28445 If moving between registers and memory is more expensive than
28446 between two registers, you should define this macro to express the
28447 relative cost.
28449 Model also increased moving costs of QImode registers in non
28450 Q_REGS classes.
28452 static inline int
28453 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
28454 int in)
28456 int cost;
28457 if (FLOAT_CLASS_P (regclass))
28459 int index;
28460 switch (mode)
28462 case SFmode:
28463 index = 0;
28464 break;
28465 case DFmode:
28466 index = 1;
28467 break;
28468 case XFmode:
28469 index = 2;
28470 break;
28471 default:
28472 return 100;
28474 if (in == 2)
28475 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
28476 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
28478 if (SSE_CLASS_P (regclass))
28480 int index;
28481 switch (GET_MODE_SIZE (mode))
28483 case 4:
28484 index = 0;
28485 break;
28486 case 8:
28487 index = 1;
28488 break;
28489 case 16:
28490 index = 2;
28491 break;
28492 default:
28493 return 100;
28495 if (in == 2)
28496 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
28497 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
28499 if (MMX_CLASS_P (regclass))
28501 int index;
28502 switch (GET_MODE_SIZE (mode))
28504 case 4:
28505 index = 0;
28506 break;
28507 case 8:
28508 index = 1;
28509 break;
28510 default:
28511 return 100;
28513 if (in)
28514 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
28515 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
28517 switch (GET_MODE_SIZE (mode))
28519 case 1:
28520 if (Q_CLASS_P (regclass) || TARGET_64BIT)
28522 if (!in)
28523 return ix86_cost->int_store[0];
28524 if (TARGET_PARTIAL_REG_DEPENDENCY
28525 && optimize_function_for_speed_p (cfun))
28526 cost = ix86_cost->movzbl_load;
28527 else
28528 cost = ix86_cost->int_load[0];
28529 if (in == 2)
28530 return MAX (cost, ix86_cost->int_store[0]);
28531 return cost;
28533 else
28535 if (in == 2)
28536 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
28537 if (in)
28538 return ix86_cost->movzbl_load;
28539 else
28540 return ix86_cost->int_store[0] + 4;
28542 break;
28543 case 2:
28544 if (in == 2)
28545 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
28546 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
28547 default:
28548 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
28549 if (mode == TFmode)
28550 mode = XFmode;
28551 if (in == 2)
28552 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
28553 else if (in)
28554 cost = ix86_cost->int_load[2];
28555 else
28556 cost = ix86_cost->int_store[2];
28557 return (cost * (((int) GET_MODE_SIZE (mode)
28558 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
28562 static int
28563 ix86_memory_move_cost (enum machine_mode mode, reg_class_t regclass,
28564 bool in)
28566 return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
28570 /* Return the cost of moving data from a register in class CLASS1 to
28571 one in class CLASS2.
28573 It is not required that the cost always equal 2 when FROM is the same as TO;
28574 on some machines it is expensive to move between registers if they are not
28575 general registers. */
28577 static int
28578 ix86_register_move_cost (enum machine_mode mode, reg_class_t class1_i,
28579 reg_class_t class2_i)
28581 enum reg_class class1 = (enum reg_class) class1_i;
28582 enum reg_class class2 = (enum reg_class) class2_i;
28584 /* In case we require secondary memory, compute cost of the store followed
28585 by load. In order to avoid bad register allocation choices, we need
28586 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
28588 if (inline_secondary_memory_needed (class1, class2, mode, 0))
28590 int cost = 1;
28592 cost += inline_memory_move_cost (mode, class1, 2);
28593 cost += inline_memory_move_cost (mode, class2, 2);
28595 /* In case of copying from general_purpose_register we may emit multiple
28596 stores followed by single load causing memory size mismatch stall.
28597 Count this as arbitrarily high cost of 20. */
28598 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
28599 cost += 20;
28601 /* In the case of FP/MMX moves, the registers actually overlap, and we
28602 have to switch modes in order to treat them differently. */
28603 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
28604 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
28605 cost += 20;
28607 return cost;
28610 /* Moves between SSE/MMX and integer unit are expensive. */
28611 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
28612 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
28614 /* ??? By keeping returned value relatively high, we limit the number
28615 of moves between integer and MMX/SSE registers for all targets.
28616 Additionally, high value prevents problem with x86_modes_tieable_p(),
28617 where integer modes in MMX/SSE registers are not tieable
28618 because of missing QImode and HImode moves to, from or between
28619 MMX/SSE registers. */
28620 return MAX (8, ix86_cost->mmxsse_to_integer);
28622 if (MAYBE_FLOAT_CLASS_P (class1))
28623 return ix86_cost->fp_move;
28624 if (MAYBE_SSE_CLASS_P (class1))
28625 return ix86_cost->sse_move;
28626 if (MAYBE_MMX_CLASS_P (class1))
28627 return ix86_cost->mmx_move;
28628 return 2;
28631 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
28633 bool
28634 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
28636 /* Flags and only flags can only hold CCmode values. */
28637 if (CC_REGNO_P (regno))
28638 return GET_MODE_CLASS (mode) == MODE_CC;
28639 if (GET_MODE_CLASS (mode) == MODE_CC
28640 || GET_MODE_CLASS (mode) == MODE_RANDOM
28641 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
28642 return 0;
28643 if (FP_REGNO_P (regno))
28644 return VALID_FP_MODE_P (mode);
28645 if (SSE_REGNO_P (regno))
28647 /* We implement the move patterns for all vector modes into and
28648 out of SSE registers, even when no operation instructions
28649 are available. OImode move is available only when AVX is
28650 enabled. */
28651 return ((TARGET_AVX && mode == OImode)
28652 || VALID_AVX256_REG_MODE (mode)
28653 || VALID_SSE_REG_MODE (mode)
28654 || VALID_SSE2_REG_MODE (mode)
28655 || VALID_MMX_REG_MODE (mode)
28656 || VALID_MMX_REG_MODE_3DNOW (mode));
28658 if (MMX_REGNO_P (regno))
28660 /* We implement the move patterns for 3DNOW modes even in MMX mode,
28661 so if the register is available at all, then we can move data of
28662 the given mode into or out of it. */
28663 return (VALID_MMX_REG_MODE (mode)
28664 || VALID_MMX_REG_MODE_3DNOW (mode));
28667 if (mode == QImode)
28669 /* Take care for QImode values - they can be in non-QI regs,
28670 but then they do cause partial register stalls. */
28671 if (regno <= BX_REG || TARGET_64BIT)
28672 return 1;
28673 if (!TARGET_PARTIAL_REG_STALL)
28674 return 1;
28675 return reload_in_progress || reload_completed;
28677 /* We handle both integer and floats in the general purpose registers. */
28678 else if (VALID_INT_MODE_P (mode))
28679 return 1;
28680 else if (VALID_FP_MODE_P (mode))
28681 return 1;
28682 else if (VALID_DFP_MODE_P (mode))
28683 return 1;
28684 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
28685 on to use that value in smaller contexts, this can easily force a
28686 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
28687 supporting DImode, allow it. */
28688 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
28689 return 1;
28691 return 0;
28694 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
28695 tieable integer mode. */
28697 static bool
28698 ix86_tieable_integer_mode_p (enum machine_mode mode)
28700 switch (mode)
28702 case HImode:
28703 case SImode:
28704 return true;
28706 case QImode:
28707 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
28709 case DImode:
28710 return TARGET_64BIT;
28712 default:
28713 return false;
28717 /* Return true if MODE1 is accessible in a register that can hold MODE2
28718 without copying. That is, all register classes that can hold MODE2
28719 can also hold MODE1. */
28721 bool
28722 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
28724 if (mode1 == mode2)
28725 return true;
28727 if (ix86_tieable_integer_mode_p (mode1)
28728 && ix86_tieable_integer_mode_p (mode2))
28729 return true;
28731 /* MODE2 being XFmode implies fp stack or general regs, which means we
28732 can tie any smaller floating point modes to it. Note that we do not
28733 tie this with TFmode. */
28734 if (mode2 == XFmode)
28735 return mode1 == SFmode || mode1 == DFmode;
28737 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
28738 that we can tie it with SFmode. */
28739 if (mode2 == DFmode)
28740 return mode1 == SFmode;
28742 /* If MODE2 is only appropriate for an SSE register, then tie with
28743 any other mode acceptable to SSE registers. */
28744 if (GET_MODE_SIZE (mode2) == 16
28745 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
28746 return (GET_MODE_SIZE (mode1) == 16
28747 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
28749 /* If MODE2 is appropriate for an MMX register, then tie
28750 with any other mode acceptable to MMX registers. */
28751 if (GET_MODE_SIZE (mode2) == 8
28752 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
28753 return (GET_MODE_SIZE (mode1) == 8
28754 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
28756 return false;
28759 /* Compute a (partial) cost for rtx X. Return true if the complete
28760 cost has been computed, and false if subexpressions should be
28761 scanned. In either case, *TOTAL contains the cost result. */
28763 static bool
28764 ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total, bool speed)
28766 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
28767 enum machine_mode mode = GET_MODE (x);
28768 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
28770 switch (code)
28772 case CONST_INT:
28773 case CONST:
28774 case LABEL_REF:
28775 case SYMBOL_REF:
28776 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
28777 *total = 3;
28778 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
28779 *total = 2;
28780 else if (flag_pic && SYMBOLIC_CONST (x)
28781 && (!TARGET_64BIT
28782 || (!GET_CODE (x) != LABEL_REF
28783 && (GET_CODE (x) != SYMBOL_REF
28784 || !SYMBOL_REF_LOCAL_P (x)))))
28785 *total = 1;
28786 else
28787 *total = 0;
28788 return true;
28790 case CONST_DOUBLE:
28791 if (mode == VOIDmode)
28792 *total = 0;
28793 else
28794 switch (standard_80387_constant_p (x))
28796 case 1: /* 0.0 */
28797 *total = 1;
28798 break;
28799 default: /* Other constants */
28800 *total = 2;
28801 break;
28802 case 0:
28803 case -1:
28804 /* Start with (MEM (SYMBOL_REF)), since that's where
28805 it'll probably end up. Add a penalty for size. */
28806 *total = (COSTS_N_INSNS (1)
28807 + (flag_pic != 0 && !TARGET_64BIT)
28808 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
28809 break;
28811 return true;
28813 case ZERO_EXTEND:
28814 /* The zero extensions is often completely free on x86_64, so make
28815 it as cheap as possible. */
28816 if (TARGET_64BIT && mode == DImode
28817 && GET_MODE (XEXP (x, 0)) == SImode)
28818 *total = 1;
28819 else if (TARGET_ZERO_EXTEND_WITH_AND)
28820 *total = cost->add;
28821 else
28822 *total = cost->movzx;
28823 return false;
28825 case SIGN_EXTEND:
28826 *total = cost->movsx;
28827 return false;
28829 case ASHIFT:
28830 if (CONST_INT_P (XEXP (x, 1))
28831 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
28833 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
28834 if (value == 1)
28836 *total = cost->add;
28837 return false;
28839 if ((value == 2 || value == 3)
28840 && cost->lea <= cost->shift_const)
28842 *total = cost->lea;
28843 return false;
28846 /* FALLTHRU */
28848 case ROTATE:
28849 case ASHIFTRT:
28850 case LSHIFTRT:
28851 case ROTATERT:
28852 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
28854 if (CONST_INT_P (XEXP (x, 1)))
28856 if (INTVAL (XEXP (x, 1)) > 32)
28857 *total = cost->shift_const + COSTS_N_INSNS (2);
28858 else
28859 *total = cost->shift_const * 2;
28861 else
28863 if (GET_CODE (XEXP (x, 1)) == AND)
28864 *total = cost->shift_var * 2;
28865 else
28866 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
28869 else
28871 if (CONST_INT_P (XEXP (x, 1)))
28872 *total = cost->shift_const;
28873 else
28874 *total = cost->shift_var;
28876 return false;
28878 case FMA:
28880 rtx sub;
28882 gcc_assert (FLOAT_MODE_P (mode));
28883 gcc_assert (TARGET_FMA || TARGET_FMA4);
28885 /* ??? SSE scalar/vector cost should be used here. */
28886 /* ??? Bald assumption that fma has the same cost as fmul. */
28887 *total = cost->fmul;
28888 *total += rtx_cost (XEXP (x, 1), FMA, speed);
28890 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
28891 sub = XEXP (x, 0);
28892 if (GET_CODE (sub) == NEG)
28893 sub = XEXP (x, 0);
28894 *total += rtx_cost (sub, FMA, speed);
28896 sub = XEXP (x, 2);
28897 if (GET_CODE (sub) == NEG)
28898 sub = XEXP (x, 0);
28899 *total += rtx_cost (sub, FMA, speed);
28900 return true;
28903 case MULT:
28904 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
28906 /* ??? SSE scalar cost should be used here. */
28907 *total = cost->fmul;
28908 return false;
28910 else if (X87_FLOAT_MODE_P (mode))
28912 *total = cost->fmul;
28913 return false;
28915 else if (FLOAT_MODE_P (mode))
28917 /* ??? SSE vector cost should be used here. */
28918 *total = cost->fmul;
28919 return false;
28921 else
28923 rtx op0 = XEXP (x, 0);
28924 rtx op1 = XEXP (x, 1);
28925 int nbits;
28926 if (CONST_INT_P (XEXP (x, 1)))
28928 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
28929 for (nbits = 0; value != 0; value &= value - 1)
28930 nbits++;
28932 else
28933 /* This is arbitrary. */
28934 nbits = 7;
28936 /* Compute costs correctly for widening multiplication. */
28937 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
28938 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
28939 == GET_MODE_SIZE (mode))
28941 int is_mulwiden = 0;
28942 enum machine_mode inner_mode = GET_MODE (op0);
28944 if (GET_CODE (op0) == GET_CODE (op1))
28945 is_mulwiden = 1, op1 = XEXP (op1, 0);
28946 else if (CONST_INT_P (op1))
28948 if (GET_CODE (op0) == SIGN_EXTEND)
28949 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
28950 == INTVAL (op1);
28951 else
28952 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
28955 if (is_mulwiden)
28956 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
28959 *total = (cost->mult_init[MODE_INDEX (mode)]
28960 + nbits * cost->mult_bit
28961 + rtx_cost (op0, outer_code, speed) + rtx_cost (op1, outer_code, speed));
28963 return true;
28966 case DIV:
28967 case UDIV:
28968 case MOD:
28969 case UMOD:
28970 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
28971 /* ??? SSE cost should be used here. */
28972 *total = cost->fdiv;
28973 else if (X87_FLOAT_MODE_P (mode))
28974 *total = cost->fdiv;
28975 else if (FLOAT_MODE_P (mode))
28976 /* ??? SSE vector cost should be used here. */
28977 *total = cost->fdiv;
28978 else
28979 *total = cost->divide[MODE_INDEX (mode)];
28980 return false;
28982 case PLUS:
28983 if (GET_MODE_CLASS (mode) == MODE_INT
28984 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
28986 if (GET_CODE (XEXP (x, 0)) == PLUS
28987 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
28988 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
28989 && CONSTANT_P (XEXP (x, 1)))
28991 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
28992 if (val == 2 || val == 4 || val == 8)
28994 *total = cost->lea;
28995 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
28996 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
28997 outer_code, speed);
28998 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
28999 return true;
29002 else if (GET_CODE (XEXP (x, 0)) == MULT
29003 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
29005 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
29006 if (val == 2 || val == 4 || val == 8)
29008 *total = cost->lea;
29009 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
29010 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
29011 return true;
29014 else if (GET_CODE (XEXP (x, 0)) == PLUS)
29016 *total = cost->lea;
29017 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
29018 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
29019 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
29020 return true;
29023 /* FALLTHRU */
29025 case MINUS:
29026 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
29028 /* ??? SSE cost should be used here. */
29029 *total = cost->fadd;
29030 return false;
29032 else if (X87_FLOAT_MODE_P (mode))
29034 *total = cost->fadd;
29035 return false;
29037 else if (FLOAT_MODE_P (mode))
29039 /* ??? SSE vector cost should be used here. */
29040 *total = cost->fadd;
29041 return false;
29043 /* FALLTHRU */
29045 case AND:
29046 case IOR:
29047 case XOR:
29048 if (!TARGET_64BIT && mode == DImode)
29050 *total = (cost->add * 2
29051 + (rtx_cost (XEXP (x, 0), outer_code, speed)
29052 << (GET_MODE (XEXP (x, 0)) != DImode))
29053 + (rtx_cost (XEXP (x, 1), outer_code, speed)
29054 << (GET_MODE (XEXP (x, 1)) != DImode)));
29055 return true;
29057 /* FALLTHRU */
29059 case NEG:
29060 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
29062 /* ??? SSE cost should be used here. */
29063 *total = cost->fchs;
29064 return false;
29066 else if (X87_FLOAT_MODE_P (mode))
29068 *total = cost->fchs;
29069 return false;
29071 else if (FLOAT_MODE_P (mode))
29073 /* ??? SSE vector cost should be used here. */
29074 *total = cost->fchs;
29075 return false;
29077 /* FALLTHRU */
29079 case NOT:
29080 if (!TARGET_64BIT && mode == DImode)
29081 *total = cost->add * 2;
29082 else
29083 *total = cost->add;
29084 return false;
29086 case COMPARE:
29087 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
29088 && XEXP (XEXP (x, 0), 1) == const1_rtx
29089 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
29090 && XEXP (x, 1) == const0_rtx)
29092 /* This kind of construct is implemented using test[bwl].
29093 Treat it as if we had an AND. */
29094 *total = (cost->add
29095 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed)
29096 + rtx_cost (const1_rtx, outer_code, speed));
29097 return true;
29099 return false;
29101 case FLOAT_EXTEND:
29102 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
29103 *total = 0;
29104 return false;
29106 case ABS:
29107 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
29108 /* ??? SSE cost should be used here. */
29109 *total = cost->fabs;
29110 else if (X87_FLOAT_MODE_P (mode))
29111 *total = cost->fabs;
29112 else if (FLOAT_MODE_P (mode))
29113 /* ??? SSE vector cost should be used here. */
29114 *total = cost->fabs;
29115 return false;
29117 case SQRT:
29118 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
29119 /* ??? SSE cost should be used here. */
29120 *total = cost->fsqrt;
29121 else if (X87_FLOAT_MODE_P (mode))
29122 *total = cost->fsqrt;
29123 else if (FLOAT_MODE_P (mode))
29124 /* ??? SSE vector cost should be used here. */
29125 *total = cost->fsqrt;
29126 return false;
29128 case UNSPEC:
29129 if (XINT (x, 1) == UNSPEC_TP)
29130 *total = 0;
29131 return false;
29133 case VEC_SELECT:
29134 case VEC_CONCAT:
29135 case VEC_MERGE:
29136 case VEC_DUPLICATE:
29137 /* ??? Assume all of these vector manipulation patterns are
29138 recognizable. In which case they all pretty much have the
29139 same cost. */
29140 *total = COSTS_N_INSNS (1);
29141 return true;
29143 default:
29144 return false;
29148 #if TARGET_MACHO
29150 static int current_machopic_label_num;
29152 /* Given a symbol name and its associated stub, write out the
29153 definition of the stub. */
29155 void
29156 machopic_output_stub (FILE *file, const char *symb, const char *stub)
29158 unsigned int length;
29159 char *binder_name, *symbol_name, lazy_ptr_name[32];
29160 int label = ++current_machopic_label_num;
29162 /* For 64-bit we shouldn't get here. */
29163 gcc_assert (!TARGET_64BIT);
29165 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
29166 symb = targetm.strip_name_encoding (symb);
29168 length = strlen (stub);
29169 binder_name = XALLOCAVEC (char, length + 32);
29170 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
29172 length = strlen (symb);
29173 symbol_name = XALLOCAVEC (char, length + 32);
29174 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
29176 sprintf (lazy_ptr_name, "L%d$lz", label);
29178 if (MACHOPIC_ATT_STUB)
29179 switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
29180 else if (MACHOPIC_PURE)
29182 if (TARGET_DEEP_BRANCH_PREDICTION)
29183 switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
29184 else
29185 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
29187 else
29188 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
29190 fprintf (file, "%s:\n", stub);
29191 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
29193 if (MACHOPIC_ATT_STUB)
29195 fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
29197 else if (MACHOPIC_PURE)
29199 /* PIC stub. */
29200 if (TARGET_DEEP_BRANCH_PREDICTION)
29202 /* 25-byte PIC stub using "CALL get_pc_thunk". */
29203 rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
29204 output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */
29205 fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n", label, lazy_ptr_name, label);
29207 else
29209 /* 26-byte PIC stub using inline picbase: "CALL L42 ! L42: pop %eax". */
29210 fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%ecx\n", label, label);
29211 fprintf (file, "\tmovl %s-LPC$%d(%%ecx),%%ecx\n", lazy_ptr_name, label);
29213 fprintf (file, "\tjmp\t*%%ecx\n");
29215 else
29216 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
29218 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
29219 it needs no stub-binding-helper. */
29220 if (MACHOPIC_ATT_STUB)
29221 return;
29223 fprintf (file, "%s:\n", binder_name);
29225 if (MACHOPIC_PURE)
29227 fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name);
29228 fprintf (file, "\tpushl\t%%ecx\n");
29230 else
29231 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
29233 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
29235 /* N.B. Keep the correspondence of these
29236 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
29237 old-pic/new-pic/non-pic stubs; altering this will break
29238 compatibility with existing dylibs. */
29239 if (MACHOPIC_PURE)
29241 /* PIC stubs. */
29242 if (TARGET_DEEP_BRANCH_PREDICTION)
29243 /* 25-byte PIC stub using "CALL get_pc_thunk". */
29244 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
29245 else
29246 /* 26-byte PIC stub using inline picbase: "CALL L42 ! L42: pop %ebx". */
29247 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
29249 else
29250 /* 16-byte -mdynamic-no-pic stub. */
29251 switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]);
29253 fprintf (file, "%s:\n", lazy_ptr_name);
29254 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
29255 fprintf (file, ASM_LONG "%s\n", binder_name);
29257 #endif /* TARGET_MACHO */
29259 /* Order the registers for register allocator. */
29261 void
29262 x86_order_regs_for_local_alloc (void)
29264 int pos = 0;
29265 int i;
29267 /* First allocate the local general purpose registers. */
29268 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
29269 if (GENERAL_REGNO_P (i) && call_used_regs[i])
29270 reg_alloc_order [pos++] = i;
29272 /* Global general purpose registers. */
29273 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
29274 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
29275 reg_alloc_order [pos++] = i;
29277 /* x87 registers come first in case we are doing FP math
29278 using them. */
29279 if (!TARGET_SSE_MATH)
29280 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
29281 reg_alloc_order [pos++] = i;
29283 /* SSE registers. */
29284 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
29285 reg_alloc_order [pos++] = i;
29286 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
29287 reg_alloc_order [pos++] = i;
29289 /* x87 registers. */
29290 if (TARGET_SSE_MATH)
29291 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
29292 reg_alloc_order [pos++] = i;
29294 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
29295 reg_alloc_order [pos++] = i;
29297 /* Initialize the rest of array as we do not allocate some registers
29298 at all. */
29299 while (pos < FIRST_PSEUDO_REGISTER)
29300 reg_alloc_order [pos++] = 0;
29303 /* Handle a "callee_pop_aggregate_return" attribute; arguments as
29304 in struct attribute_spec handler. */
29305 static tree
29306 ix86_handle_callee_pop_aggregate_return (tree *node, tree name,
29307 tree args,
29308 int flags ATTRIBUTE_UNUSED,
29309 bool *no_add_attrs)
29311 if (TREE_CODE (*node) != FUNCTION_TYPE
29312 && TREE_CODE (*node) != METHOD_TYPE
29313 && TREE_CODE (*node) != FIELD_DECL
29314 && TREE_CODE (*node) != TYPE_DECL)
29316 warning (OPT_Wattributes, "%qE attribute only applies to functions",
29317 name);
29318 *no_add_attrs = true;
29319 return NULL_TREE;
29321 if (TARGET_64BIT)
29323 warning (OPT_Wattributes, "%qE attribute only available for 32-bit",
29324 name);
29325 *no_add_attrs = true;
29326 return NULL_TREE;
29328 if (is_attribute_p ("callee_pop_aggregate_return", name))
29330 tree cst;
29332 cst = TREE_VALUE (args);
29333 if (TREE_CODE (cst) != INTEGER_CST)
29335 warning (OPT_Wattributes,
29336 "%qE attribute requires an integer constant argument",
29337 name);
29338 *no_add_attrs = true;
29340 else if (compare_tree_int (cst, 0) != 0
29341 && compare_tree_int (cst, 1) != 0)
29343 warning (OPT_Wattributes,
29344 "argument to %qE attribute is neither zero, nor one",
29345 name);
29346 *no_add_attrs = true;
29349 return NULL_TREE;
29352 return NULL_TREE;
29355 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
29356 struct attribute_spec.handler. */
29357 static tree
29358 ix86_handle_abi_attribute (tree *node, tree name,
29359 tree args ATTRIBUTE_UNUSED,
29360 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
29362 if (TREE_CODE (*node) != FUNCTION_TYPE
29363 && TREE_CODE (*node) != METHOD_TYPE
29364 && TREE_CODE (*node) != FIELD_DECL
29365 && TREE_CODE (*node) != TYPE_DECL)
29367 warning (OPT_Wattributes, "%qE attribute only applies to functions",
29368 name);
29369 *no_add_attrs = true;
29370 return NULL_TREE;
29372 if (!TARGET_64BIT)
29374 warning (OPT_Wattributes, "%qE attribute only available for 64-bit",
29375 name);
29376 *no_add_attrs = true;
29377 return NULL_TREE;
29380 /* Can combine regparm with all attributes but fastcall. */
29381 if (is_attribute_p ("ms_abi", name))
29383 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
29385 error ("ms_abi and sysv_abi attributes are not compatible");
29388 return NULL_TREE;
29390 else if (is_attribute_p ("sysv_abi", name))
29392 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
29394 error ("ms_abi and sysv_abi attributes are not compatible");
29397 return NULL_TREE;
29400 return NULL_TREE;
29403 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
29404 struct attribute_spec.handler. */
29405 static tree
29406 ix86_handle_struct_attribute (tree *node, tree name,
29407 tree args ATTRIBUTE_UNUSED,
29408 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
29410 tree *type = NULL;
29411 if (DECL_P (*node))
29413 if (TREE_CODE (*node) == TYPE_DECL)
29414 type = &TREE_TYPE (*node);
29416 else
29417 type = node;
29419 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
29420 || TREE_CODE (*type) == UNION_TYPE)))
29422 warning (OPT_Wattributes, "%qE attribute ignored",
29423 name);
29424 *no_add_attrs = true;
29427 else if ((is_attribute_p ("ms_struct", name)
29428 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
29429 || ((is_attribute_p ("gcc_struct", name)
29430 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
29432 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
29433 name);
29434 *no_add_attrs = true;
29437 return NULL_TREE;
29440 static tree
29441 ix86_handle_fndecl_attribute (tree *node, tree name,
29442 tree args ATTRIBUTE_UNUSED,
29443 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
29445 if (TREE_CODE (*node) != FUNCTION_DECL)
29447 warning (OPT_Wattributes, "%qE attribute only applies to functions",
29448 name);
29449 *no_add_attrs = true;
29451 return NULL_TREE;
29454 static bool
29455 ix86_ms_bitfield_layout_p (const_tree record_type)
29457 return ((TARGET_MS_BITFIELD_LAYOUT
29458 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
29459 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
29462 /* Returns an expression indicating where the this parameter is
29463 located on entry to the FUNCTION. */
29465 static rtx
29466 x86_this_parameter (tree function)
29468 tree type = TREE_TYPE (function);
29469 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
29470 int nregs;
29472 if (TARGET_64BIT)
29474 const int *parm_regs;
29476 if (ix86_function_type_abi (type) == MS_ABI)
29477 parm_regs = x86_64_ms_abi_int_parameter_registers;
29478 else
29479 parm_regs = x86_64_int_parameter_registers;
29480 return gen_rtx_REG (DImode, parm_regs[aggr]);
29483 nregs = ix86_function_regparm (type, function);
29485 if (nregs > 0 && !stdarg_p (type))
29487 int regno;
29489 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
29490 regno = aggr ? DX_REG : CX_REG;
29491 else if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (type)))
29493 regno = CX_REG;
29494 if (aggr)
29495 return gen_rtx_MEM (SImode,
29496 plus_constant (stack_pointer_rtx, 4));
29498 else
29500 regno = AX_REG;
29501 if (aggr)
29503 regno = DX_REG;
29504 if (nregs == 1)
29505 return gen_rtx_MEM (SImode,
29506 plus_constant (stack_pointer_rtx, 4));
29509 return gen_rtx_REG (SImode, regno);
29512 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, aggr ? 8 : 4));
29515 /* Determine whether x86_output_mi_thunk can succeed. */
29517 static bool
29518 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
29519 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
29520 HOST_WIDE_INT vcall_offset, const_tree function)
29522 /* 64-bit can handle anything. */
29523 if (TARGET_64BIT)
29524 return true;
29526 /* For 32-bit, everything's fine if we have one free register. */
29527 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
29528 return true;
29530 /* Need a free register for vcall_offset. */
29531 if (vcall_offset)
29532 return false;
29534 /* Need a free register for GOT references. */
29535 if (flag_pic && !targetm.binds_local_p (function))
29536 return false;
29538 /* Otherwise ok. */
29539 return true;
29542 /* Output the assembler code for a thunk function. THUNK_DECL is the
29543 declaration for the thunk function itself, FUNCTION is the decl for
29544 the target function. DELTA is an immediate constant offset to be
29545 added to THIS. If VCALL_OFFSET is nonzero, the word at
29546 *(*this + vcall_offset) should be added to THIS. */
29548 static void
29549 x86_output_mi_thunk (FILE *file,
29550 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
29551 HOST_WIDE_INT vcall_offset, tree function)
29553 rtx xops[3];
29554 rtx this_param = x86_this_parameter (function);
29555 rtx this_reg, tmp;
29557 /* Make sure unwind info is emitted for the thunk if needed. */
29558 final_start_function (emit_barrier (), file, 1);
29560 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
29561 pull it in now and let DELTA benefit. */
29562 if (REG_P (this_param))
29563 this_reg = this_param;
29564 else if (vcall_offset)
29566 /* Put the this parameter into %eax. */
29567 xops[0] = this_param;
29568 xops[1] = this_reg = gen_rtx_REG (Pmode, AX_REG);
29569 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
29571 else
29572 this_reg = NULL_RTX;
29574 /* Adjust the this parameter by a fixed constant. */
29575 if (delta)
29577 xops[0] = GEN_INT (delta);
29578 xops[1] = this_reg ? this_reg : this_param;
29579 if (TARGET_64BIT)
29581 if (!x86_64_general_operand (xops[0], DImode))
29583 tmp = gen_rtx_REG (DImode, R10_REG);
29584 xops[1] = tmp;
29585 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
29586 xops[0] = tmp;
29587 xops[1] = this_param;
29589 if (x86_maybe_negate_const_int (&xops[0], DImode))
29590 output_asm_insn ("sub{q}\t{%0, %1|%1, %0}", xops);
29591 else
29592 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
29594 else if (x86_maybe_negate_const_int (&xops[0], SImode))
29595 output_asm_insn ("sub{l}\t{%0, %1|%1, %0}", xops);
29596 else
29597 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
29600 /* Adjust the this parameter by a value stored in the vtable. */
29601 if (vcall_offset)
29603 if (TARGET_64BIT)
29604 tmp = gen_rtx_REG (DImode, R10_REG);
29605 else
29607 int tmp_regno = CX_REG;
29608 if (lookup_attribute ("fastcall",
29609 TYPE_ATTRIBUTES (TREE_TYPE (function)))
29610 || lookup_attribute ("thiscall",
29611 TYPE_ATTRIBUTES (TREE_TYPE (function))))
29612 tmp_regno = AX_REG;
29613 tmp = gen_rtx_REG (SImode, tmp_regno);
29616 xops[0] = gen_rtx_MEM (Pmode, this_reg);
29617 xops[1] = tmp;
29618 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
29620 /* Adjust the this parameter. */
29621 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
29622 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
29624 rtx tmp2 = gen_rtx_REG (DImode, R11_REG);
29625 xops[0] = GEN_INT (vcall_offset);
29626 xops[1] = tmp2;
29627 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
29628 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
29630 xops[1] = this_reg;
29631 output_asm_insn ("add%z1\t{%0, %1|%1, %0}", xops);
29634 /* If necessary, drop THIS back to its stack slot. */
29635 if (this_reg && this_reg != this_param)
29637 xops[0] = this_reg;
29638 xops[1] = this_param;
29639 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
29642 xops[0] = XEXP (DECL_RTL (function), 0);
29643 if (TARGET_64BIT)
29645 if (!flag_pic || targetm.binds_local_p (function)
29646 || DEFAULT_ABI == MS_ABI)
29647 output_asm_insn ("jmp\t%P0", xops);
29648 /* All thunks should be in the same object as their target,
29649 and thus binds_local_p should be true. */
29650 else if (TARGET_64BIT && cfun->machine->call_abi == MS_ABI)
29651 gcc_unreachable ();
29652 else
29654 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
29655 tmp = gen_rtx_CONST (Pmode, tmp);
29656 tmp = gen_rtx_MEM (QImode, tmp);
29657 xops[0] = tmp;
29658 output_asm_insn ("jmp\t%A0", xops);
29661 else
29663 if (!flag_pic || targetm.binds_local_p (function))
29664 output_asm_insn ("jmp\t%P0", xops);
29665 else
29666 #if TARGET_MACHO
29667 if (TARGET_MACHO)
29669 rtx sym_ref = XEXP (DECL_RTL (function), 0);
29670 if (TARGET_MACHO_BRANCH_ISLANDS)
29671 sym_ref = (gen_rtx_SYMBOL_REF
29672 (Pmode,
29673 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
29674 tmp = gen_rtx_MEM (QImode, sym_ref);
29675 xops[0] = tmp;
29676 output_asm_insn ("jmp\t%0", xops);
29678 else
29679 #endif /* TARGET_MACHO */
29681 tmp = gen_rtx_REG (SImode, CX_REG);
29682 output_set_got (tmp, NULL_RTX);
29684 xops[1] = tmp;
29685 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
29686 output_asm_insn ("jmp\t{*}%1", xops);
29689 final_end_function ();
29692 static void
29693 x86_file_start (void)
29695 default_file_start ();
29696 #if TARGET_MACHO
29697 darwin_file_start ();
29698 #endif
29699 if (X86_FILE_START_VERSION_DIRECTIVE)
29700 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
29701 if (X86_FILE_START_FLTUSED)
29702 fputs ("\t.global\t__fltused\n", asm_out_file);
29703 if (ix86_asm_dialect == ASM_INTEL)
29704 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
29708 x86_field_alignment (tree field, int computed)
29710 enum machine_mode mode;
29711 tree type = TREE_TYPE (field);
29713 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
29714 return computed;
29715 mode = TYPE_MODE (strip_array_types (type));
29716 if (mode == DFmode || mode == DCmode
29717 || GET_MODE_CLASS (mode) == MODE_INT
29718 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
29719 return MIN (32, computed);
29720 return computed;
29723 /* Output assembler code to FILE to increment profiler label # LABELNO
29724 for profiling a function entry. */
29725 void
29726 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
29728 const char *mcount_name = (flag_fentry ? MCOUNT_NAME_BEFORE_PROLOGUE
29729 : MCOUNT_NAME);
29731 if (TARGET_64BIT)
29733 #ifndef NO_PROFILE_COUNTERS
29734 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
29735 #endif
29737 if (DEFAULT_ABI == SYSV_ABI && flag_pic)
29738 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
29739 else
29740 fprintf (file, "\tcall\t%s\n", mcount_name);
29742 else if (flag_pic)
29744 #ifndef NO_PROFILE_COUNTERS
29745 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
29746 LPREFIX, labelno);
29747 #endif
29748 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
29750 else
29752 #ifndef NO_PROFILE_COUNTERS
29753 fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
29754 LPREFIX, labelno);
29755 #endif
29756 fprintf (file, "\tcall\t%s\n", mcount_name);
29760 /* We don't have exact information about the insn sizes, but we may assume
29761 quite safely that we are informed about all 1 byte insns and memory
29762 address sizes. This is enough to eliminate unnecessary padding in
29763 99% of cases. */
29765 static int
29766 min_insn_size (rtx insn)
29768 int l = 0, len;
29770 if (!INSN_P (insn) || !active_insn_p (insn))
29771 return 0;
29773 /* Discard alignments we've emit and jump instructions. */
29774 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
29775 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
29776 return 0;
29777 if (JUMP_TABLE_DATA_P (insn))
29778 return 0;
29780 /* Important case - calls are always 5 bytes.
29781 It is common to have many calls in the row. */
29782 if (CALL_P (insn)
29783 && symbolic_reference_mentioned_p (PATTERN (insn))
29784 && !SIBLING_CALL_P (insn))
29785 return 5;
29786 len = get_attr_length (insn);
29787 if (len <= 1)
29788 return 1;
29790 /* For normal instructions we rely on get_attr_length being exact,
29791 with a few exceptions. */
29792 if (!JUMP_P (insn))
29794 enum attr_type type = get_attr_type (insn);
29796 switch (type)
29798 case TYPE_MULTI:
29799 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
29800 || asm_noperands (PATTERN (insn)) >= 0)
29801 return 0;
29802 break;
29803 case TYPE_OTHER:
29804 case TYPE_FCMP:
29805 break;
29806 default:
29807 /* Otherwise trust get_attr_length. */
29808 return len;
29811 l = get_attr_length_address (insn);
29812 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
29813 l = 4;
29815 if (l)
29816 return 1+l;
29817 else
29818 return 2;
29821 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
29823 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
29824 window. */
29826 static void
29827 ix86_avoid_jump_mispredicts (void)
29829 rtx insn, start = get_insns ();
29830 int nbytes = 0, njumps = 0;
29831 int isjump = 0;
29833 /* Look for all minimal intervals of instructions containing 4 jumps.
29834 The intervals are bounded by START and INSN. NBYTES is the total
29835 size of instructions in the interval including INSN and not including
29836 START. When the NBYTES is smaller than 16 bytes, it is possible
29837 that the end of START and INSN ends up in the same 16byte page.
29839 The smallest offset in the page INSN can start is the case where START
29840 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
29841 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
29843 for (insn = start; insn; insn = NEXT_INSN (insn))
29845 int min_size;
29847 if (LABEL_P (insn))
29849 int align = label_to_alignment (insn);
29850 int max_skip = label_to_max_skip (insn);
29852 if (max_skip > 15)
29853 max_skip = 15;
29854 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
29855 already in the current 16 byte page, because otherwise
29856 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
29857 bytes to reach 16 byte boundary. */
29858 if (align <= 0
29859 || (align <= 3 && max_skip != (1 << align) - 1))
29860 max_skip = 0;
29861 if (dump_file)
29862 fprintf (dump_file, "Label %i with max_skip %i\n",
29863 INSN_UID (insn), max_skip);
29864 if (max_skip)
29866 while (nbytes + max_skip >= 16)
29868 start = NEXT_INSN (start);
29869 if ((JUMP_P (start)
29870 && GET_CODE (PATTERN (start)) != ADDR_VEC
29871 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
29872 || CALL_P (start))
29873 njumps--, isjump = 1;
29874 else
29875 isjump = 0;
29876 nbytes -= min_insn_size (start);
29879 continue;
29882 min_size = min_insn_size (insn);
29883 nbytes += min_size;
29884 if (dump_file)
29885 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
29886 INSN_UID (insn), min_size);
29887 if ((JUMP_P (insn)
29888 && GET_CODE (PATTERN (insn)) != ADDR_VEC
29889 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
29890 || CALL_P (insn))
29891 njumps++;
29892 else
29893 continue;
29895 while (njumps > 3)
29897 start = NEXT_INSN (start);
29898 if ((JUMP_P (start)
29899 && GET_CODE (PATTERN (start)) != ADDR_VEC
29900 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
29901 || CALL_P (start))
29902 njumps--, isjump = 1;
29903 else
29904 isjump = 0;
29905 nbytes -= min_insn_size (start);
29907 gcc_assert (njumps >= 0);
29908 if (dump_file)
29909 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
29910 INSN_UID (start), INSN_UID (insn), nbytes);
29912 if (njumps == 3 && isjump && nbytes < 16)
29914 int padsize = 15 - nbytes + min_insn_size (insn);
29916 if (dump_file)
29917 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
29918 INSN_UID (insn), padsize);
29919 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
29923 #endif
29925 /* AMD Athlon works faster
29926 when RET is not destination of conditional jump or directly preceded
29927 by other jump instruction. We avoid the penalty by inserting NOP just
29928 before the RET instructions in such cases. */
29929 static void
29930 ix86_pad_returns (void)
29932 edge e;
29933 edge_iterator ei;
29935 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
29937 basic_block bb = e->src;
29938 rtx ret = BB_END (bb);
29939 rtx prev;
29940 bool replace = false;
29942 if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN
29943 || optimize_bb_for_size_p (bb))
29944 continue;
29945 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
29946 if (active_insn_p (prev) || LABEL_P (prev))
29947 break;
29948 if (prev && LABEL_P (prev))
29950 edge e;
29951 edge_iterator ei;
29953 FOR_EACH_EDGE (e, ei, bb->preds)
29954 if (EDGE_FREQUENCY (e) && e->src->index >= 0
29955 && !(e->flags & EDGE_FALLTHRU))
29956 replace = true;
29958 if (!replace)
29960 prev = prev_active_insn (ret);
29961 if (prev
29962 && ((JUMP_P (prev) && any_condjump_p (prev))
29963 || CALL_P (prev)))
29964 replace = true;
29965 /* Empty functions get branch mispredict even when
29966 the jump destination is not visible to us. */
29967 if (!prev && !optimize_function_for_size_p (cfun))
29968 replace = true;
29970 if (replace)
29972 emit_jump_insn_before (gen_return_internal_long (), ret);
29973 delete_insn (ret);
29978 /* Count the minimum number of instructions in BB. Return 4 if the
29979 number of instructions >= 4. */
29981 static int
29982 ix86_count_insn_bb (basic_block bb)
29984 rtx insn;
29985 int insn_count = 0;
29987 /* Count number of instructions in this block. Return 4 if the number
29988 of instructions >= 4. */
29989 FOR_BB_INSNS (bb, insn)
29991 /* Only happen in exit blocks. */
29992 if (JUMP_P (insn)
29993 && GET_CODE (PATTERN (insn)) == RETURN)
29994 break;
29996 if (NONDEBUG_INSN_P (insn)
29997 && GET_CODE (PATTERN (insn)) != USE
29998 && GET_CODE (PATTERN (insn)) != CLOBBER)
30000 insn_count++;
30001 if (insn_count >= 4)
30002 return insn_count;
30006 return insn_count;
30010 /* Count the minimum number of instructions in code path in BB.
30011 Return 4 if the number of instructions >= 4. */
30013 static int
30014 ix86_count_insn (basic_block bb)
30016 edge e;
30017 edge_iterator ei;
30018 int min_prev_count;
30020 /* Only bother counting instructions along paths with no
30021 more than 2 basic blocks between entry and exit. Given
30022 that BB has an edge to exit, determine if a predecessor
30023 of BB has an edge from entry. If so, compute the number
30024 of instructions in the predecessor block. If there
30025 happen to be multiple such blocks, compute the minimum. */
30026 min_prev_count = 4;
30027 FOR_EACH_EDGE (e, ei, bb->preds)
30029 edge prev_e;
30030 edge_iterator prev_ei;
30032 if (e->src == ENTRY_BLOCK_PTR)
30034 min_prev_count = 0;
30035 break;
30037 FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
30039 if (prev_e->src == ENTRY_BLOCK_PTR)
30041 int count = ix86_count_insn_bb (e->src);
30042 if (count < min_prev_count)
30043 min_prev_count = count;
30044 break;
30049 if (min_prev_count < 4)
30050 min_prev_count += ix86_count_insn_bb (bb);
30052 return min_prev_count;
30055 /* Pad short funtion to 4 instructions. */
30057 static void
30058 ix86_pad_short_function (void)
30060 edge e;
30061 edge_iterator ei;
30063 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
30065 rtx ret = BB_END (e->src);
30066 if (JUMP_P (ret) && GET_CODE (PATTERN (ret)) == RETURN)
30068 int insn_count = ix86_count_insn (e->src);
30070 /* Pad short function. */
30071 if (insn_count < 4)
30073 rtx insn = ret;
30075 /* Find epilogue. */
30076 while (insn
30077 && (!NOTE_P (insn)
30078 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
30079 insn = PREV_INSN (insn);
30081 if (!insn)
30082 insn = ret;
30084 /* Two NOPs count as one instruction. */
30085 insn_count = 2 * (4 - insn_count);
30086 emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
30092 /* Implement machine specific optimizations. We implement padding of returns
30093 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
30094 static void
30095 ix86_reorg (void)
30097 /* We are freeing block_for_insn in the toplev to keep compatibility
30098 with old MDEP_REORGS that are not CFG based. Recompute it now. */
30099 compute_bb_for_insn ();
30101 if (optimize && optimize_function_for_speed_p (cfun))
30103 if (TARGET_PAD_SHORT_FUNCTION)
30104 ix86_pad_short_function ();
30105 else if (TARGET_PAD_RETURNS)
30106 ix86_pad_returns ();
30107 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
30108 if (TARGET_FOUR_JUMP_LIMIT)
30109 ix86_avoid_jump_mispredicts ();
30110 #endif
30113 /* Run the vzeroupper optimization if needed. */
30114 if (TARGET_VZEROUPPER)
30115 move_or_delete_vzeroupper ();
30118 /* Return nonzero when QImode register that must be represented via REX prefix
30119 is used. */
30120 bool
30121 x86_extended_QIreg_mentioned_p (rtx insn)
30123 int i;
30124 extract_insn_cached (insn);
30125 for (i = 0; i < recog_data.n_operands; i++)
30126 if (REG_P (recog_data.operand[i])
30127 && REGNO (recog_data.operand[i]) > BX_REG)
30128 return true;
30129 return false;
30132 /* Return nonzero when P points to register encoded via REX prefix.
30133 Called via for_each_rtx. */
30134 static int
30135 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
30137 unsigned int regno;
30138 if (!REG_P (*p))
30139 return 0;
30140 regno = REGNO (*p);
30141 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
30144 /* Return true when INSN mentions register that must be encoded using REX
30145 prefix. */
30146 bool
30147 x86_extended_reg_mentioned_p (rtx insn)
30149 return for_each_rtx (INSN_P (insn) ? &PATTERN (insn) : &insn,
30150 extended_reg_mentioned_1, NULL);
30153 /* If profitable, negate (without causing overflow) integer constant
30154 of mode MODE at location LOC. Return true in this case. */
30155 bool
30156 x86_maybe_negate_const_int (rtx *loc, enum machine_mode mode)
30158 HOST_WIDE_INT val;
30160 if (!CONST_INT_P (*loc))
30161 return false;
30163 switch (mode)
30165 case DImode:
30166 /* DImode x86_64 constants must fit in 32 bits. */
30167 gcc_assert (x86_64_immediate_operand (*loc, mode));
30169 mode = SImode;
30170 break;
30172 case SImode:
30173 case HImode:
30174 case QImode:
30175 break;
30177 default:
30178 gcc_unreachable ();
30181 /* Avoid overflows. */
30182 if (mode_signbit_p (mode, *loc))
30183 return false;
30185 val = INTVAL (*loc);
30187 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
30188 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
30189 if ((val < 0 && val != -128)
30190 || val == 128)
30192 *loc = GEN_INT (-val);
30193 return true;
30196 return false;
30199 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
30200 optabs would emit if we didn't have TFmode patterns. */
30202 void
30203 x86_emit_floatuns (rtx operands[2])
30205 rtx neglab, donelab, i0, i1, f0, in, out;
30206 enum machine_mode mode, inmode;
30208 inmode = GET_MODE (operands[1]);
30209 gcc_assert (inmode == SImode || inmode == DImode);
30211 out = operands[0];
30212 in = force_reg (inmode, operands[1]);
30213 mode = GET_MODE (out);
30214 neglab = gen_label_rtx ();
30215 donelab = gen_label_rtx ();
30216 f0 = gen_reg_rtx (mode);
30218 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
30220 expand_float (out, in, 0);
30222 emit_jump_insn (gen_jump (donelab));
30223 emit_barrier ();
30225 emit_label (neglab);
30227 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
30228 1, OPTAB_DIRECT);
30229 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
30230 1, OPTAB_DIRECT);
30231 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
30233 expand_float (f0, i0, 0);
30235 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
30237 emit_label (donelab);
30240 /* AVX does not support 32-byte integer vector operations,
30241 thus the longest vector we are faced with is V16QImode. */
30242 #define MAX_VECT_LEN 16
30244 struct expand_vec_perm_d
30246 rtx target, op0, op1;
30247 unsigned char perm[MAX_VECT_LEN];
30248 enum machine_mode vmode;
30249 unsigned char nelt;
30250 bool testing_p;
30253 static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
30254 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
30256 /* Get a vector mode of the same size as the original but with elements
30257 twice as wide. This is only guaranteed to apply to integral vectors. */
30259 static inline enum machine_mode
30260 get_mode_wider_vector (enum machine_mode o)
30262 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
30263 enum machine_mode n = GET_MODE_WIDER_MODE (o);
30264 gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2);
30265 gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n));
30266 return n;
30269 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
30270 with all elements equal to VAR. Return true if successful. */
30272 static bool
30273 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
30274 rtx target, rtx val)
30276 bool ok;
30278 switch (mode)
30280 case V2SImode:
30281 case V2SFmode:
30282 if (!mmx_ok)
30283 return false;
30284 /* FALLTHRU */
30286 case V4DFmode:
30287 case V4DImode:
30288 case V8SFmode:
30289 case V8SImode:
30290 case V2DFmode:
30291 case V2DImode:
30292 case V4SFmode:
30293 case V4SImode:
30295 rtx insn, dup;
30297 /* First attempt to recognize VAL as-is. */
30298 dup = gen_rtx_VEC_DUPLICATE (mode, val);
30299 insn = emit_insn (gen_rtx_SET (VOIDmode, target, dup));
30300 if (recog_memoized (insn) < 0)
30302 rtx seq;
30303 /* If that fails, force VAL into a register. */
30305 start_sequence ();
30306 XEXP (dup, 0) = force_reg (GET_MODE_INNER (mode), val);
30307 seq = get_insns ();
30308 end_sequence ();
30309 if (seq)
30310 emit_insn_before (seq, insn);
30312 ok = recog_memoized (insn) >= 0;
30313 gcc_assert (ok);
30316 return true;
30318 case V4HImode:
30319 if (!mmx_ok)
30320 return false;
30321 if (TARGET_SSE || TARGET_3DNOW_A)
30323 rtx x;
30325 val = gen_lowpart (SImode, val);
30326 x = gen_rtx_TRUNCATE (HImode, val);
30327 x = gen_rtx_VEC_DUPLICATE (mode, x);
30328 emit_insn (gen_rtx_SET (VOIDmode, target, x));
30329 return true;
30331 goto widen;
30333 case V8QImode:
30334 if (!mmx_ok)
30335 return false;
30336 goto widen;
30338 case V8HImode:
30339 if (TARGET_SSE2)
30341 struct expand_vec_perm_d dperm;
30342 rtx tmp1, tmp2;
30344 permute:
30345 memset (&dperm, 0, sizeof (dperm));
30346 dperm.target = target;
30347 dperm.vmode = mode;
30348 dperm.nelt = GET_MODE_NUNITS (mode);
30349 dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
30351 /* Extend to SImode using a paradoxical SUBREG. */
30352 tmp1 = gen_reg_rtx (SImode);
30353 emit_move_insn (tmp1, gen_lowpart (SImode, val));
30355 /* Insert the SImode value as low element of a V4SImode vector. */
30356 tmp2 = gen_lowpart (V4SImode, dperm.op0);
30357 emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
30359 ok = (expand_vec_perm_1 (&dperm)
30360 || expand_vec_perm_broadcast_1 (&dperm));
30361 gcc_assert (ok);
30362 return ok;
30364 goto widen;
30366 case V16QImode:
30367 if (TARGET_SSE2)
30368 goto permute;
30369 goto widen;
30371 widen:
30372 /* Replicate the value once into the next wider mode and recurse. */
30374 enum machine_mode smode, wsmode, wvmode;
30375 rtx x;
30377 smode = GET_MODE_INNER (mode);
30378 wvmode = get_mode_wider_vector (mode);
30379 wsmode = GET_MODE_INNER (wvmode);
30381 val = convert_modes (wsmode, smode, val, true);
30382 x = expand_simple_binop (wsmode, ASHIFT, val,
30383 GEN_INT (GET_MODE_BITSIZE (smode)),
30384 NULL_RTX, 1, OPTAB_LIB_WIDEN);
30385 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
30387 x = gen_lowpart (wvmode, target);
30388 ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
30389 gcc_assert (ok);
30390 return ok;
30393 case V16HImode:
30394 case V32QImode:
30396 enum machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
30397 rtx x = gen_reg_rtx (hvmode);
30399 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
30400 gcc_assert (ok);
30402 x = gen_rtx_VEC_CONCAT (mode, x, x);
30403 emit_insn (gen_rtx_SET (VOIDmode, target, x));
30405 return true;
30407 default:
30408 return false;
30412 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
30413 whose ONE_VAR element is VAR, and other elements are zero. Return true
30414 if successful. */
30416 static bool
30417 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
30418 rtx target, rtx var, int one_var)
30420 enum machine_mode vsimode;
30421 rtx new_target;
30422 rtx x, tmp;
30423 bool use_vector_set = false;
30425 switch (mode)
30427 case V2DImode:
30428 /* For SSE4.1, we normally use vector set. But if the second
30429 element is zero and inter-unit moves are OK, we use movq
30430 instead. */
30431 use_vector_set = (TARGET_64BIT
30432 && TARGET_SSE4_1
30433 && !(TARGET_INTER_UNIT_MOVES
30434 && one_var == 0));
30435 break;
30436 case V16QImode:
30437 case V4SImode:
30438 case V4SFmode:
30439 use_vector_set = TARGET_SSE4_1;
30440 break;
30441 case V8HImode:
30442 use_vector_set = TARGET_SSE2;
30443 break;
30444 case V4HImode:
30445 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
30446 break;
30447 case V32QImode:
30448 case V16HImode:
30449 case V8SImode:
30450 case V8SFmode:
30451 case V4DFmode:
30452 use_vector_set = TARGET_AVX;
30453 break;
30454 case V4DImode:
30455 /* Use ix86_expand_vector_set in 64bit mode only. */
30456 use_vector_set = TARGET_AVX && TARGET_64BIT;
30457 break;
30458 default:
30459 break;
30462 if (use_vector_set)
30464 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
30465 var = force_reg (GET_MODE_INNER (mode), var);
30466 ix86_expand_vector_set (mmx_ok, target, var, one_var);
30467 return true;
30470 switch (mode)
30472 case V2SFmode:
30473 case V2SImode:
30474 if (!mmx_ok)
30475 return false;
30476 /* FALLTHRU */
30478 case V2DFmode:
30479 case V2DImode:
30480 if (one_var != 0)
30481 return false;
30482 var = force_reg (GET_MODE_INNER (mode), var);
30483 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
30484 emit_insn (gen_rtx_SET (VOIDmode, target, x));
30485 return true;
30487 case V4SFmode:
30488 case V4SImode:
30489 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
30490 new_target = gen_reg_rtx (mode);
30491 else
30492 new_target = target;
30493 var = force_reg (GET_MODE_INNER (mode), var);
30494 x = gen_rtx_VEC_DUPLICATE (mode, var);
30495 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
30496 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
30497 if (one_var != 0)
30499 /* We need to shuffle the value to the correct position, so
30500 create a new pseudo to store the intermediate result. */
30502 /* With SSE2, we can use the integer shuffle insns. */
30503 if (mode != V4SFmode && TARGET_SSE2)
30505 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
30506 const1_rtx,
30507 GEN_INT (one_var == 1 ? 0 : 1),
30508 GEN_INT (one_var == 2 ? 0 : 1),
30509 GEN_INT (one_var == 3 ? 0 : 1)));
30510 if (target != new_target)
30511 emit_move_insn (target, new_target);
30512 return true;
30515 /* Otherwise convert the intermediate result to V4SFmode and
30516 use the SSE1 shuffle instructions. */
30517 if (mode != V4SFmode)
30519 tmp = gen_reg_rtx (V4SFmode);
30520 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
30522 else
30523 tmp = new_target;
30525 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
30526 const1_rtx,
30527 GEN_INT (one_var == 1 ? 0 : 1),
30528 GEN_INT (one_var == 2 ? 0+4 : 1+4),
30529 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
30531 if (mode != V4SFmode)
30532 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
30533 else if (tmp != target)
30534 emit_move_insn (target, tmp);
30536 else if (target != new_target)
30537 emit_move_insn (target, new_target);
30538 return true;
30540 case V8HImode:
30541 case V16QImode:
30542 vsimode = V4SImode;
30543 goto widen;
30544 case V4HImode:
30545 case V8QImode:
30546 if (!mmx_ok)
30547 return false;
30548 vsimode = V2SImode;
30549 goto widen;
30550 widen:
30551 if (one_var != 0)
30552 return false;
30554 /* Zero extend the variable element to SImode and recurse. */
30555 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
30557 x = gen_reg_rtx (vsimode);
30558 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
30559 var, one_var))
30560 gcc_unreachable ();
30562 emit_move_insn (target, gen_lowpart (mode, x));
30563 return true;
30565 default:
30566 return false;
30570 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
30571 consisting of the values in VALS. It is known that all elements
30572 except ONE_VAR are constants. Return true if successful. */
30574 static bool
30575 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
30576 rtx target, rtx vals, int one_var)
30578 rtx var = XVECEXP (vals, 0, one_var);
30579 enum machine_mode wmode;
30580 rtx const_vec, x;
30582 const_vec = copy_rtx (vals);
30583 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
30584 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
30586 switch (mode)
30588 case V2DFmode:
30589 case V2DImode:
30590 case V2SFmode:
30591 case V2SImode:
30592 /* For the two element vectors, it's just as easy to use
30593 the general case. */
30594 return false;
30596 case V4DImode:
30597 /* Use ix86_expand_vector_set in 64bit mode only. */
30598 if (!TARGET_64BIT)
30599 return false;
30600 case V4DFmode:
30601 case V8SFmode:
30602 case V8SImode:
30603 case V16HImode:
30604 case V32QImode:
30605 case V4SFmode:
30606 case V4SImode:
30607 case V8HImode:
30608 case V4HImode:
30609 break;
30611 case V16QImode:
30612 if (TARGET_SSE4_1)
30613 break;
30614 wmode = V8HImode;
30615 goto widen;
30616 case V8QImode:
30617 wmode = V4HImode;
30618 goto widen;
30619 widen:
30620 /* There's no way to set one QImode entry easily. Combine
30621 the variable value with its adjacent constant value, and
30622 promote to an HImode set. */
30623 x = XVECEXP (vals, 0, one_var ^ 1);
30624 if (one_var & 1)
30626 var = convert_modes (HImode, QImode, var, true);
30627 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
30628 NULL_RTX, 1, OPTAB_LIB_WIDEN);
30629 x = GEN_INT (INTVAL (x) & 0xff);
30631 else
30633 var = convert_modes (HImode, QImode, var, true);
30634 x = gen_int_mode (INTVAL (x) << 8, HImode);
30636 if (x != const0_rtx)
30637 var = expand_simple_binop (HImode, IOR, var, x, var,
30638 1, OPTAB_LIB_WIDEN);
30640 x = gen_reg_rtx (wmode);
30641 emit_move_insn (x, gen_lowpart (wmode, const_vec));
30642 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
30644 emit_move_insn (target, gen_lowpart (mode, x));
30645 return true;
30647 default:
30648 return false;
30651 emit_move_insn (target, const_vec);
30652 ix86_expand_vector_set (mmx_ok, target, var, one_var);
30653 return true;
30656 /* A subroutine of ix86_expand_vector_init_general. Use vector
30657 concatenate to handle the most general case: all values variable,
30658 and none identical. */
30660 static void
30661 ix86_expand_vector_init_concat (enum machine_mode mode,
30662 rtx target, rtx *ops, int n)
30664 enum machine_mode cmode, hmode = VOIDmode;
30665 rtx first[8], second[4];
30666 rtvec v;
30667 int i, j;
30669 switch (n)
30671 case 2:
30672 switch (mode)
30674 case V8SImode:
30675 cmode = V4SImode;
30676 break;
30677 case V8SFmode:
30678 cmode = V4SFmode;
30679 break;
30680 case V4DImode:
30681 cmode = V2DImode;
30682 break;
30683 case V4DFmode:
30684 cmode = V2DFmode;
30685 break;
30686 case V4SImode:
30687 cmode = V2SImode;
30688 break;
30689 case V4SFmode:
30690 cmode = V2SFmode;
30691 break;
30692 case V2DImode:
30693 cmode = DImode;
30694 break;
30695 case V2SImode:
30696 cmode = SImode;
30697 break;
30698 case V2DFmode:
30699 cmode = DFmode;
30700 break;
30701 case V2SFmode:
30702 cmode = SFmode;
30703 break;
30704 default:
30705 gcc_unreachable ();
30708 if (!register_operand (ops[1], cmode))
30709 ops[1] = force_reg (cmode, ops[1]);
30710 if (!register_operand (ops[0], cmode))
30711 ops[0] = force_reg (cmode, ops[0]);
30712 emit_insn (gen_rtx_SET (VOIDmode, target,
30713 gen_rtx_VEC_CONCAT (mode, ops[0],
30714 ops[1])));
30715 break;
30717 case 4:
30718 switch (mode)
30720 case V4DImode:
30721 cmode = V2DImode;
30722 break;
30723 case V4DFmode:
30724 cmode = V2DFmode;
30725 break;
30726 case V4SImode:
30727 cmode = V2SImode;
30728 break;
30729 case V4SFmode:
30730 cmode = V2SFmode;
30731 break;
30732 default:
30733 gcc_unreachable ();
30735 goto half;
30737 case 8:
30738 switch (mode)
30740 case V8SImode:
30741 cmode = V2SImode;
30742 hmode = V4SImode;
30743 break;
30744 case V8SFmode:
30745 cmode = V2SFmode;
30746 hmode = V4SFmode;
30747 break;
30748 default:
30749 gcc_unreachable ();
30751 goto half;
30753 half:
30754 /* FIXME: We process inputs backward to help RA. PR 36222. */
30755 i = n - 1;
30756 j = (n >> 1) - 1;
30757 for (; i > 0; i -= 2, j--)
30759 first[j] = gen_reg_rtx (cmode);
30760 v = gen_rtvec (2, ops[i - 1], ops[i]);
30761 ix86_expand_vector_init (false, first[j],
30762 gen_rtx_PARALLEL (cmode, v));
30765 n >>= 1;
30766 if (n > 2)
30768 gcc_assert (hmode != VOIDmode);
30769 for (i = j = 0; i < n; i += 2, j++)
30771 second[j] = gen_reg_rtx (hmode);
30772 ix86_expand_vector_init_concat (hmode, second [j],
30773 &first [i], 2);
30775 n >>= 1;
30776 ix86_expand_vector_init_concat (mode, target, second, n);
30778 else
30779 ix86_expand_vector_init_concat (mode, target, first, n);
30780 break;
30782 default:
30783 gcc_unreachable ();
30787 /* A subroutine of ix86_expand_vector_init_general. Use vector
30788 interleave to handle the most general case: all values variable,
30789 and none identical. */
30791 static void
30792 ix86_expand_vector_init_interleave (enum machine_mode mode,
30793 rtx target, rtx *ops, int n)
30795 enum machine_mode first_imode, second_imode, third_imode, inner_mode;
30796 int i, j;
30797 rtx op0, op1;
30798 rtx (*gen_load_even) (rtx, rtx, rtx);
30799 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
30800 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
30802 switch (mode)
30804 case V8HImode:
30805 gen_load_even = gen_vec_setv8hi;
30806 gen_interleave_first_low = gen_vec_interleave_lowv4si;
30807 gen_interleave_second_low = gen_vec_interleave_lowv2di;
30808 inner_mode = HImode;
30809 first_imode = V4SImode;
30810 second_imode = V2DImode;
30811 third_imode = VOIDmode;
30812 break;
30813 case V16QImode:
30814 gen_load_even = gen_vec_setv16qi;
30815 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
30816 gen_interleave_second_low = gen_vec_interleave_lowv4si;
30817 inner_mode = QImode;
30818 first_imode = V8HImode;
30819 second_imode = V4SImode;
30820 third_imode = V2DImode;
30821 break;
30822 default:
30823 gcc_unreachable ();
30826 for (i = 0; i < n; i++)
30828 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
30829 op0 = gen_reg_rtx (SImode);
30830 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
30832 /* Insert the SImode value as low element of V4SImode vector. */
30833 op1 = gen_reg_rtx (V4SImode);
30834 op0 = gen_rtx_VEC_MERGE (V4SImode,
30835 gen_rtx_VEC_DUPLICATE (V4SImode,
30836 op0),
30837 CONST0_RTX (V4SImode),
30838 const1_rtx);
30839 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
30841 /* Cast the V4SImode vector back to a vector in orignal mode. */
30842 op0 = gen_reg_rtx (mode);
30843 emit_move_insn (op0, gen_lowpart (mode, op1));
30845 /* Load even elements into the second positon. */
30846 emit_insn (gen_load_even (op0,
30847 force_reg (inner_mode,
30848 ops [i + i + 1]),
30849 const1_rtx));
30851 /* Cast vector to FIRST_IMODE vector. */
30852 ops[i] = gen_reg_rtx (first_imode);
30853 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
30856 /* Interleave low FIRST_IMODE vectors. */
30857 for (i = j = 0; i < n; i += 2, j++)
30859 op0 = gen_reg_rtx (first_imode);
30860 emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1]));
30862 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
30863 ops[j] = gen_reg_rtx (second_imode);
30864 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
30867 /* Interleave low SECOND_IMODE vectors. */
30868 switch (second_imode)
30870 case V4SImode:
30871 for (i = j = 0; i < n / 2; i += 2, j++)
30873 op0 = gen_reg_rtx (second_imode);
30874 emit_insn (gen_interleave_second_low (op0, ops[i],
30875 ops[i + 1]));
30877 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
30878 vector. */
30879 ops[j] = gen_reg_rtx (third_imode);
30880 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
30882 second_imode = V2DImode;
30883 gen_interleave_second_low = gen_vec_interleave_lowv2di;
30884 /* FALLTHRU */
30886 case V2DImode:
30887 op0 = gen_reg_rtx (second_imode);
30888 emit_insn (gen_interleave_second_low (op0, ops[0],
30889 ops[1]));
30891 /* Cast the SECOND_IMODE vector back to a vector on original
30892 mode. */
30893 emit_insn (gen_rtx_SET (VOIDmode, target,
30894 gen_lowpart (mode, op0)));
30895 break;
30897 default:
30898 gcc_unreachable ();
30902 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
30903 all values variable, and none identical. */
30905 static void
30906 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
30907 rtx target, rtx vals)
30909 rtx ops[32], op0, op1;
30910 enum machine_mode half_mode = VOIDmode;
30911 int n, i;
30913 switch (mode)
30915 case V2SFmode:
30916 case V2SImode:
30917 if (!mmx_ok && !TARGET_SSE)
30918 break;
30919 /* FALLTHRU */
30921 case V8SFmode:
30922 case V8SImode:
30923 case V4DFmode:
30924 case V4DImode:
30925 case V4SFmode:
30926 case V4SImode:
30927 case V2DFmode:
30928 case V2DImode:
30929 n = GET_MODE_NUNITS (mode);
30930 for (i = 0; i < n; i++)
30931 ops[i] = XVECEXP (vals, 0, i);
30932 ix86_expand_vector_init_concat (mode, target, ops, n);
30933 return;
30935 case V32QImode:
30936 half_mode = V16QImode;
30937 goto half;
30939 case V16HImode:
30940 half_mode = V8HImode;
30941 goto half;
30943 half:
30944 n = GET_MODE_NUNITS (mode);
30945 for (i = 0; i < n; i++)
30946 ops[i] = XVECEXP (vals, 0, i);
30947 op0 = gen_reg_rtx (half_mode);
30948 op1 = gen_reg_rtx (half_mode);
30949 ix86_expand_vector_init_interleave (half_mode, op0, ops,
30950 n >> 2);
30951 ix86_expand_vector_init_interleave (half_mode, op1,
30952 &ops [n >> 1], n >> 2);
30953 emit_insn (gen_rtx_SET (VOIDmode, target,
30954 gen_rtx_VEC_CONCAT (mode, op0, op1)));
30955 return;
30957 case V16QImode:
30958 if (!TARGET_SSE4_1)
30959 break;
30960 /* FALLTHRU */
30962 case V8HImode:
30963 if (!TARGET_SSE2)
30964 break;
30966 /* Don't use ix86_expand_vector_init_interleave if we can't
30967 move from GPR to SSE register directly. */
30968 if (!TARGET_INTER_UNIT_MOVES)
30969 break;
30971 n = GET_MODE_NUNITS (mode);
30972 for (i = 0; i < n; i++)
30973 ops[i] = XVECEXP (vals, 0, i);
30974 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
30975 return;
30977 case V4HImode:
30978 case V8QImode:
30979 break;
30981 default:
30982 gcc_unreachable ();
30986 int i, j, n_elts, n_words, n_elt_per_word;
30987 enum machine_mode inner_mode;
30988 rtx words[4], shift;
30990 inner_mode = GET_MODE_INNER (mode);
30991 n_elts = GET_MODE_NUNITS (mode);
30992 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
30993 n_elt_per_word = n_elts / n_words;
30994 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
30996 for (i = 0; i < n_words; ++i)
30998 rtx word = NULL_RTX;
31000 for (j = 0; j < n_elt_per_word; ++j)
31002 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
31003 elt = convert_modes (word_mode, inner_mode, elt, true);
31005 if (j == 0)
31006 word = elt;
31007 else
31009 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
31010 word, 1, OPTAB_LIB_WIDEN);
31011 word = expand_simple_binop (word_mode, IOR, word, elt,
31012 word, 1, OPTAB_LIB_WIDEN);
31016 words[i] = word;
31019 if (n_words == 1)
31020 emit_move_insn (target, gen_lowpart (mode, words[0]));
31021 else if (n_words == 2)
31023 rtx tmp = gen_reg_rtx (mode);
31024 emit_clobber (tmp);
31025 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
31026 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
31027 emit_move_insn (target, tmp);
31029 else if (n_words == 4)
31031 rtx tmp = gen_reg_rtx (V4SImode);
31032 gcc_assert (word_mode == SImode);
31033 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
31034 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
31035 emit_move_insn (target, gen_lowpart (mode, tmp));
31037 else
31038 gcc_unreachable ();
31042 /* Initialize vector TARGET via VALS. Suppress the use of MMX
31043 instructions unless MMX_OK is true. */
31045 void
31046 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
31048 enum machine_mode mode = GET_MODE (target);
31049 enum machine_mode inner_mode = GET_MODE_INNER (mode);
31050 int n_elts = GET_MODE_NUNITS (mode);
31051 int n_var = 0, one_var = -1;
31052 bool all_same = true, all_const_zero = true;
31053 int i;
31054 rtx x;
31056 for (i = 0; i < n_elts; ++i)
31058 x = XVECEXP (vals, 0, i);
31059 if (!(CONST_INT_P (x)
31060 || GET_CODE (x) == CONST_DOUBLE
31061 || GET_CODE (x) == CONST_FIXED))
31062 n_var++, one_var = i;
31063 else if (x != CONST0_RTX (inner_mode))
31064 all_const_zero = false;
31065 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
31066 all_same = false;
31069 /* Constants are best loaded from the constant pool. */
31070 if (n_var == 0)
31072 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
31073 return;
31076 /* If all values are identical, broadcast the value. */
31077 if (all_same
31078 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
31079 XVECEXP (vals, 0, 0)))
31080 return;
31082 /* Values where only one field is non-constant are best loaded from
31083 the pool and overwritten via move later. */
31084 if (n_var == 1)
31086 if (all_const_zero
31087 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
31088 XVECEXP (vals, 0, one_var),
31089 one_var))
31090 return;
31092 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
31093 return;
31096 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
31099 void
31100 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
31102 enum machine_mode mode = GET_MODE (target);
31103 enum machine_mode inner_mode = GET_MODE_INNER (mode);
31104 enum machine_mode half_mode;
31105 bool use_vec_merge = false;
31106 rtx tmp;
31107 static rtx (*gen_extract[6][2]) (rtx, rtx)
31109 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
31110 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
31111 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
31112 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
31113 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
31114 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
31116 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
31118 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
31119 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
31120 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
31121 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
31122 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
31123 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
31125 int i, j, n;
31127 switch (mode)
31129 case V2SFmode:
31130 case V2SImode:
31131 if (mmx_ok)
31133 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
31134 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
31135 if (elt == 0)
31136 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
31137 else
31138 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
31139 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
31140 return;
31142 break;
31144 case V2DImode:
31145 use_vec_merge = TARGET_SSE4_1;
31146 if (use_vec_merge)
31147 break;
31149 case V2DFmode:
31151 rtx op0, op1;
31153 /* For the two element vectors, we implement a VEC_CONCAT with
31154 the extraction of the other element. */
31156 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
31157 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
31159 if (elt == 0)
31160 op0 = val, op1 = tmp;
31161 else
31162 op0 = tmp, op1 = val;
31164 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
31165 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
31167 return;
31169 case V4SFmode:
31170 use_vec_merge = TARGET_SSE4_1;
31171 if (use_vec_merge)
31172 break;
31174 switch (elt)
31176 case 0:
31177 use_vec_merge = true;
31178 break;
31180 case 1:
31181 /* tmp = target = A B C D */
31182 tmp = copy_to_reg (target);
31183 /* target = A A B B */
31184 emit_insn (gen_vec_interleave_lowv4sf (target, target, target));
31185 /* target = X A B B */
31186 ix86_expand_vector_set (false, target, val, 0);
31187 /* target = A X C D */
31188 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
31189 const1_rtx, const0_rtx,
31190 GEN_INT (2+4), GEN_INT (3+4)));
31191 return;
31193 case 2:
31194 /* tmp = target = A B C D */
31195 tmp = copy_to_reg (target);
31196 /* tmp = X B C D */
31197 ix86_expand_vector_set (false, tmp, val, 0);
31198 /* target = A B X D */
31199 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
31200 const0_rtx, const1_rtx,
31201 GEN_INT (0+4), GEN_INT (3+4)));
31202 return;
31204 case 3:
31205 /* tmp = target = A B C D */
31206 tmp = copy_to_reg (target);
31207 /* tmp = X B C D */
31208 ix86_expand_vector_set (false, tmp, val, 0);
31209 /* target = A B X D */
31210 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
31211 const0_rtx, const1_rtx,
31212 GEN_INT (2+4), GEN_INT (0+4)));
31213 return;
31215 default:
31216 gcc_unreachable ();
31218 break;
31220 case V4SImode:
31221 use_vec_merge = TARGET_SSE4_1;
31222 if (use_vec_merge)
31223 break;
31225 /* Element 0 handled by vec_merge below. */
31226 if (elt == 0)
31228 use_vec_merge = true;
31229 break;
31232 if (TARGET_SSE2)
31234 /* With SSE2, use integer shuffles to swap element 0 and ELT,
31235 store into element 0, then shuffle them back. */
31237 rtx order[4];
31239 order[0] = GEN_INT (elt);
31240 order[1] = const1_rtx;
31241 order[2] = const2_rtx;
31242 order[3] = GEN_INT (3);
31243 order[elt] = const0_rtx;
31245 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
31246 order[1], order[2], order[3]));
31248 ix86_expand_vector_set (false, target, val, 0);
31250 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
31251 order[1], order[2], order[3]));
31253 else
31255 /* For SSE1, we have to reuse the V4SF code. */
31256 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
31257 gen_lowpart (SFmode, val), elt);
31259 return;
31261 case V8HImode:
31262 use_vec_merge = TARGET_SSE2;
31263 break;
31264 case V4HImode:
31265 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
31266 break;
31268 case V16QImode:
31269 use_vec_merge = TARGET_SSE4_1;
31270 break;
31272 case V8QImode:
31273 break;
31275 case V32QImode:
31276 half_mode = V16QImode;
31277 j = 0;
31278 n = 16;
31279 goto half;
31281 case V16HImode:
31282 half_mode = V8HImode;
31283 j = 1;
31284 n = 8;
31285 goto half;
31287 case V8SImode:
31288 half_mode = V4SImode;
31289 j = 2;
31290 n = 4;
31291 goto half;
31293 case V4DImode:
31294 half_mode = V2DImode;
31295 j = 3;
31296 n = 2;
31297 goto half;
31299 case V8SFmode:
31300 half_mode = V4SFmode;
31301 j = 4;
31302 n = 4;
31303 goto half;
31305 case V4DFmode:
31306 half_mode = V2DFmode;
31307 j = 5;
31308 n = 2;
31309 goto half;
31311 half:
31312 /* Compute offset. */
31313 i = elt / n;
31314 elt %= n;
31316 gcc_assert (i <= 1);
31318 /* Extract the half. */
31319 tmp = gen_reg_rtx (half_mode);
31320 emit_insn (gen_extract[j][i] (tmp, target));
31322 /* Put val in tmp at elt. */
31323 ix86_expand_vector_set (false, tmp, val, elt);
31325 /* Put it back. */
31326 emit_insn (gen_insert[j][i] (target, target, tmp));
31327 return;
31329 default:
31330 break;
31333 if (use_vec_merge)
31335 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
31336 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
31337 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
31339 else
31341 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
31343 emit_move_insn (mem, target);
31345 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
31346 emit_move_insn (tmp, val);
31348 emit_move_insn (target, mem);
31352 void
31353 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
31355 enum machine_mode mode = GET_MODE (vec);
31356 enum machine_mode inner_mode = GET_MODE_INNER (mode);
31357 bool use_vec_extr = false;
31358 rtx tmp;
31360 switch (mode)
31362 case V2SImode:
31363 case V2SFmode:
31364 if (!mmx_ok)
31365 break;
31366 /* FALLTHRU */
31368 case V2DFmode:
31369 case V2DImode:
31370 use_vec_extr = true;
31371 break;
31373 case V4SFmode:
31374 use_vec_extr = TARGET_SSE4_1;
31375 if (use_vec_extr)
31376 break;
31378 switch (elt)
31380 case 0:
31381 tmp = vec;
31382 break;
31384 case 1:
31385 case 3:
31386 tmp = gen_reg_rtx (mode);
31387 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
31388 GEN_INT (elt), GEN_INT (elt),
31389 GEN_INT (elt+4), GEN_INT (elt+4)));
31390 break;
31392 case 2:
31393 tmp = gen_reg_rtx (mode);
31394 emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec));
31395 break;
31397 default:
31398 gcc_unreachable ();
31400 vec = tmp;
31401 use_vec_extr = true;
31402 elt = 0;
31403 break;
31405 case V4SImode:
31406 use_vec_extr = TARGET_SSE4_1;
31407 if (use_vec_extr)
31408 break;
31410 if (TARGET_SSE2)
31412 switch (elt)
31414 case 0:
31415 tmp = vec;
31416 break;
31418 case 1:
31419 case 3:
31420 tmp = gen_reg_rtx (mode);
31421 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
31422 GEN_INT (elt), GEN_INT (elt),
31423 GEN_INT (elt), GEN_INT (elt)));
31424 break;
31426 case 2:
31427 tmp = gen_reg_rtx (mode);
31428 emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec));
31429 break;
31431 default:
31432 gcc_unreachable ();
31434 vec = tmp;
31435 use_vec_extr = true;
31436 elt = 0;
31438 else
31440 /* For SSE1, we have to reuse the V4SF code. */
31441 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
31442 gen_lowpart (V4SFmode, vec), elt);
31443 return;
31445 break;
31447 case V8HImode:
31448 use_vec_extr = TARGET_SSE2;
31449 break;
31450 case V4HImode:
31451 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
31452 break;
31454 case V16QImode:
31455 use_vec_extr = TARGET_SSE4_1;
31456 break;
31458 case V8QImode:
31459 /* ??? Could extract the appropriate HImode element and shift. */
31460 default:
31461 break;
31464 if (use_vec_extr)
31466 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
31467 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
31469 /* Let the rtl optimizers know about the zero extension performed. */
31470 if (inner_mode == QImode || inner_mode == HImode)
31472 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
31473 target = gen_lowpart (SImode, target);
31476 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
31478 else
31480 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
31482 emit_move_insn (mem, vec);
31484 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
31485 emit_move_insn (target, tmp);
31489 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
31490 pattern to reduce; DEST is the destination; IN is the input vector. */
31492 void
31493 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
31495 rtx tmp1, tmp2, tmp3;
31497 tmp1 = gen_reg_rtx (V4SFmode);
31498 tmp2 = gen_reg_rtx (V4SFmode);
31499 tmp3 = gen_reg_rtx (V4SFmode);
31501 emit_insn (gen_sse_movhlps (tmp1, in, in));
31502 emit_insn (fn (tmp2, tmp1, in));
31504 emit_insn (gen_sse_shufps_v4sf (tmp3, tmp2, tmp2,
31505 const1_rtx, const1_rtx,
31506 GEN_INT (1+4), GEN_INT (1+4)));
31507 emit_insn (fn (dest, tmp2, tmp3));
31510 /* Target hook for scalar_mode_supported_p. */
31511 static bool
31512 ix86_scalar_mode_supported_p (enum machine_mode mode)
31514 if (DECIMAL_FLOAT_MODE_P (mode))
31515 return default_decimal_float_supported_p ();
31516 else if (mode == TFmode)
31517 return true;
31518 else
31519 return default_scalar_mode_supported_p (mode);
31522 /* Implements target hook vector_mode_supported_p. */
31523 static bool
31524 ix86_vector_mode_supported_p (enum machine_mode mode)
31526 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
31527 return true;
31528 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
31529 return true;
31530 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
31531 return true;
31532 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
31533 return true;
31534 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
31535 return true;
31536 return false;
31539 /* Target hook for c_mode_for_suffix. */
31540 static enum machine_mode
31541 ix86_c_mode_for_suffix (char suffix)
31543 if (suffix == 'q')
31544 return TFmode;
31545 if (suffix == 'w')
31546 return XFmode;
31548 return VOIDmode;
31551 /* Worker function for TARGET_MD_ASM_CLOBBERS.
31553 We do this in the new i386 backend to maintain source compatibility
31554 with the old cc0-based compiler. */
31556 static tree
31557 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
31558 tree inputs ATTRIBUTE_UNUSED,
31559 tree clobbers)
31561 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
31562 clobbers);
31563 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
31564 clobbers);
31565 return clobbers;
31568 /* Implements target vector targetm.asm.encode_section_info. This
31569 is not used by netware. */
31571 static void ATTRIBUTE_UNUSED
31572 ix86_encode_section_info (tree decl, rtx rtl, int first)
31574 default_encode_section_info (decl, rtl, first);
31576 if (TREE_CODE (decl) == VAR_DECL
31577 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
31578 && ix86_in_large_data_p (decl))
31579 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
31582 /* Worker function for REVERSE_CONDITION. */
31584 enum rtx_code
31585 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
31587 return (mode != CCFPmode && mode != CCFPUmode
31588 ? reverse_condition (code)
31589 : reverse_condition_maybe_unordered (code));
31592 /* Output code to perform an x87 FP register move, from OPERANDS[1]
31593 to OPERANDS[0]. */
31595 const char *
31596 output_387_reg_move (rtx insn, rtx *operands)
31598 if (REG_P (operands[0]))
31600 if (REG_P (operands[1])
31601 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
31603 if (REGNO (operands[0]) == FIRST_STACK_REG)
31604 return output_387_ffreep (operands, 0);
31605 return "fstp\t%y0";
31607 if (STACK_TOP_P (operands[0]))
31608 return "fld%Z1\t%y1";
31609 return "fst\t%y0";
31611 else if (MEM_P (operands[0]))
31613 gcc_assert (REG_P (operands[1]));
31614 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
31615 return "fstp%Z0\t%y0";
31616 else
31618 /* There is no non-popping store to memory for XFmode.
31619 So if we need one, follow the store with a load. */
31620 if (GET_MODE (operands[0]) == XFmode)
31621 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
31622 else
31623 return "fst%Z0\t%y0";
31626 else
31627 gcc_unreachable();
31630 /* Output code to perform a conditional jump to LABEL, if C2 flag in
31631 FP status register is set. */
31633 void
31634 ix86_emit_fp_unordered_jump (rtx label)
31636 rtx reg = gen_reg_rtx (HImode);
31637 rtx temp;
31639 emit_insn (gen_x86_fnstsw_1 (reg));
31641 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
31643 emit_insn (gen_x86_sahf_1 (reg));
31645 temp = gen_rtx_REG (CCmode, FLAGS_REG);
31646 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
31648 else
31650 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
31652 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
31653 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
31656 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
31657 gen_rtx_LABEL_REF (VOIDmode, label),
31658 pc_rtx);
31659 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
31661 emit_jump_insn (temp);
31662 predict_jump (REG_BR_PROB_BASE * 10 / 100);
31665 /* Output code to perform a log1p XFmode calculation. */
31667 void ix86_emit_i387_log1p (rtx op0, rtx op1)
31669 rtx label1 = gen_label_rtx ();
31670 rtx label2 = gen_label_rtx ();
31672 rtx tmp = gen_reg_rtx (XFmode);
31673 rtx tmp2 = gen_reg_rtx (XFmode);
31674 rtx test;
31676 emit_insn (gen_absxf2 (tmp, op1));
31677 test = gen_rtx_GE (VOIDmode, tmp,
31678 CONST_DOUBLE_FROM_REAL_VALUE (
31679 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
31680 XFmode));
31681 emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
31683 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
31684 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
31685 emit_jump (label2);
31687 emit_label (label1);
31688 emit_move_insn (tmp, CONST1_RTX (XFmode));
31689 emit_insn (gen_addxf3 (tmp, op1, tmp));
31690 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
31691 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
31693 emit_label (label2);
31696 /* Output code to perform a Newton-Rhapson approximation of a single precision
31697 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
31699 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
31701 rtx x0, x1, e0, e1, two;
31703 x0 = gen_reg_rtx (mode);
31704 e0 = gen_reg_rtx (mode);
31705 e1 = gen_reg_rtx (mode);
31706 x1 = gen_reg_rtx (mode);
31708 two = CONST_DOUBLE_FROM_REAL_VALUE (dconst2, SFmode);
31710 if (VECTOR_MODE_P (mode))
31711 two = ix86_build_const_vector (mode, true, two);
31713 two = force_reg (mode, two);
31715 /* a / b = a * rcp(b) * (2.0 - b * rcp(b)) */
31717 /* x0 = rcp(b) estimate */
31718 emit_insn (gen_rtx_SET (VOIDmode, x0,
31719 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
31720 UNSPEC_RCP)));
31721 /* e0 = x0 * a */
31722 emit_insn (gen_rtx_SET (VOIDmode, e0,
31723 gen_rtx_MULT (mode, x0, a)));
31724 /* e1 = x0 * b */
31725 emit_insn (gen_rtx_SET (VOIDmode, e1,
31726 gen_rtx_MULT (mode, x0, b)));
31727 /* x1 = 2. - e1 */
31728 emit_insn (gen_rtx_SET (VOIDmode, x1,
31729 gen_rtx_MINUS (mode, two, e1)));
31730 /* res = e0 * x1 */
31731 emit_insn (gen_rtx_SET (VOIDmode, res,
31732 gen_rtx_MULT (mode, e0, x1)));
31735 /* Output code to perform a Newton-Rhapson approximation of a
31736 single precision floating point [reciprocal] square root. */
31738 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
31739 bool recip)
31741 rtx x0, e0, e1, e2, e3, mthree, mhalf;
31742 REAL_VALUE_TYPE r;
31744 x0 = gen_reg_rtx (mode);
31745 e0 = gen_reg_rtx (mode);
31746 e1 = gen_reg_rtx (mode);
31747 e2 = gen_reg_rtx (mode);
31748 e3 = gen_reg_rtx (mode);
31750 real_from_integer (&r, VOIDmode, -3, -1, 0);
31751 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
31753 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
31754 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
31756 if (VECTOR_MODE_P (mode))
31758 mthree = ix86_build_const_vector (mode, true, mthree);
31759 mhalf = ix86_build_const_vector (mode, true, mhalf);
31762 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
31763 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
31765 /* x0 = rsqrt(a) estimate */
31766 emit_insn (gen_rtx_SET (VOIDmode, x0,
31767 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
31768 UNSPEC_RSQRT)));
31770 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
31771 if (!recip)
31773 rtx zero, mask;
31775 zero = gen_reg_rtx (mode);
31776 mask = gen_reg_rtx (mode);
31778 zero = force_reg (mode, CONST0_RTX(mode));
31779 emit_insn (gen_rtx_SET (VOIDmode, mask,
31780 gen_rtx_NE (mode, zero, a)));
31782 emit_insn (gen_rtx_SET (VOIDmode, x0,
31783 gen_rtx_AND (mode, x0, mask)));
31786 /* e0 = x0 * a */
31787 emit_insn (gen_rtx_SET (VOIDmode, e0,
31788 gen_rtx_MULT (mode, x0, a)));
31789 /* e1 = e0 * x0 */
31790 emit_insn (gen_rtx_SET (VOIDmode, e1,
31791 gen_rtx_MULT (mode, e0, x0)));
31793 /* e2 = e1 - 3. */
31794 mthree = force_reg (mode, mthree);
31795 emit_insn (gen_rtx_SET (VOIDmode, e2,
31796 gen_rtx_PLUS (mode, e1, mthree)));
31798 mhalf = force_reg (mode, mhalf);
31799 if (recip)
31800 /* e3 = -.5 * x0 */
31801 emit_insn (gen_rtx_SET (VOIDmode, e3,
31802 gen_rtx_MULT (mode, x0, mhalf)));
31803 else
31804 /* e3 = -.5 * e0 */
31805 emit_insn (gen_rtx_SET (VOIDmode, e3,
31806 gen_rtx_MULT (mode, e0, mhalf)));
31807 /* ret = e2 * e3 */
31808 emit_insn (gen_rtx_SET (VOIDmode, res,
31809 gen_rtx_MULT (mode, e2, e3)));
31812 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
31814 static void ATTRIBUTE_UNUSED
31815 i386_solaris_elf_named_section (const char *name, unsigned int flags,
31816 tree decl)
31818 /* With Binutils 2.15, the "@unwind" marker must be specified on
31819 every occurrence of the ".eh_frame" section, not just the first
31820 one. */
31821 if (TARGET_64BIT
31822 && strcmp (name, ".eh_frame") == 0)
31824 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
31825 flags & SECTION_WRITE ? "aw" : "a");
31826 return;
31828 default_elf_asm_named_section (name, flags, decl);
31831 /* Return the mangling of TYPE if it is an extended fundamental type. */
31833 static const char *
31834 ix86_mangle_type (const_tree type)
31836 type = TYPE_MAIN_VARIANT (type);
31838 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
31839 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
31840 return NULL;
31842 switch (TYPE_MODE (type))
31844 case TFmode:
31845 /* __float128 is "g". */
31846 return "g";
31847 case XFmode:
31848 /* "long double" or __float80 is "e". */
31849 return "e";
31850 default:
31851 return NULL;
31855 /* For 32-bit code we can save PIC register setup by using
31856 __stack_chk_fail_local hidden function instead of calling
31857 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
31858 register, so it is better to call __stack_chk_fail directly. */
31860 static tree
31861 ix86_stack_protect_fail (void)
31863 return TARGET_64BIT
31864 ? default_external_stack_protect_fail ()
31865 : default_hidden_stack_protect_fail ();
31868 /* Select a format to encode pointers in exception handling data. CODE
31869 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
31870 true if the symbol may be affected by dynamic relocations.
31872 ??? All x86 object file formats are capable of representing this.
31873 After all, the relocation needed is the same as for the call insn.
31874 Whether or not a particular assembler allows us to enter such, I
31875 guess we'll have to see. */
31877 asm_preferred_eh_data_format (int code, int global)
31879 if (flag_pic)
31881 int type = DW_EH_PE_sdata8;
31882 if (!TARGET_64BIT
31883 || ix86_cmodel == CM_SMALL_PIC
31884 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
31885 type = DW_EH_PE_sdata4;
31886 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
31888 if (ix86_cmodel == CM_SMALL
31889 || (ix86_cmodel == CM_MEDIUM && code))
31890 return DW_EH_PE_udata4;
31891 return DW_EH_PE_absptr;
31894 /* Expand copysign from SIGN to the positive value ABS_VALUE
31895 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
31896 the sign-bit. */
31897 static void
31898 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
31900 enum machine_mode mode = GET_MODE (sign);
31901 rtx sgn = gen_reg_rtx (mode);
31902 if (mask == NULL_RTX)
31904 enum machine_mode vmode;
31906 if (mode == SFmode)
31907 vmode = V4SFmode;
31908 else if (mode == DFmode)
31909 vmode = V2DFmode;
31910 else
31911 vmode = mode;
31913 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), false);
31914 if (!VECTOR_MODE_P (mode))
31916 /* We need to generate a scalar mode mask in this case. */
31917 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
31918 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
31919 mask = gen_reg_rtx (mode);
31920 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
31923 else
31924 mask = gen_rtx_NOT (mode, mask);
31925 emit_insn (gen_rtx_SET (VOIDmode, sgn,
31926 gen_rtx_AND (mode, mask, sign)));
31927 emit_insn (gen_rtx_SET (VOIDmode, result,
31928 gen_rtx_IOR (mode, abs_value, sgn)));
31931 /* Expand fabs (OP0) and return a new rtx that holds the result. The
31932 mask for masking out the sign-bit is stored in *SMASK, if that is
31933 non-null. */
31934 static rtx
31935 ix86_expand_sse_fabs (rtx op0, rtx *smask)
31937 enum machine_mode vmode, mode = GET_MODE (op0);
31938 rtx xa, mask;
31940 xa = gen_reg_rtx (mode);
31941 if (mode == SFmode)
31942 vmode = V4SFmode;
31943 else if (mode == DFmode)
31944 vmode = V2DFmode;
31945 else
31946 vmode = mode;
31947 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), true);
31948 if (!VECTOR_MODE_P (mode))
31950 /* We need to generate a scalar mode mask in this case. */
31951 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
31952 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
31953 mask = gen_reg_rtx (mode);
31954 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
31956 emit_insn (gen_rtx_SET (VOIDmode, xa,
31957 gen_rtx_AND (mode, op0, mask)));
31959 if (smask)
31960 *smask = mask;
31962 return xa;
31965 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
31966 swapping the operands if SWAP_OPERANDS is true. The expanded
31967 code is a forward jump to a newly created label in case the
31968 comparison is true. The generated label rtx is returned. */
31969 static rtx
31970 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
31971 bool swap_operands)
31973 rtx label, tmp;
31975 if (swap_operands)
31977 tmp = op0;
31978 op0 = op1;
31979 op1 = tmp;
31982 label = gen_label_rtx ();
31983 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
31984 emit_insn (gen_rtx_SET (VOIDmode, tmp,
31985 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
31986 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
31987 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
31988 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
31989 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
31990 JUMP_LABEL (tmp) = label;
31992 return label;
31995 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
31996 using comparison code CODE. Operands are swapped for the comparison if
31997 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
31998 static rtx
31999 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
32000 bool swap_operands)
32002 enum machine_mode mode = GET_MODE (op0);
32003 rtx mask = gen_reg_rtx (mode);
32005 if (swap_operands)
32007 rtx tmp = op0;
32008 op0 = op1;
32009 op1 = tmp;
32012 if (mode == DFmode)
32013 emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1,
32014 gen_rtx_fmt_ee (code, mode, op0, op1)));
32015 else
32016 emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1,
32017 gen_rtx_fmt_ee (code, mode, op0, op1)));
32019 return mask;
32022 /* Generate and return a rtx of mode MODE for 2**n where n is the number
32023 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
32024 static rtx
32025 ix86_gen_TWO52 (enum machine_mode mode)
32027 REAL_VALUE_TYPE TWO52r;
32028 rtx TWO52;
32030 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
32031 TWO52 = const_double_from_real_value (TWO52r, mode);
32032 TWO52 = force_reg (mode, TWO52);
32034 return TWO52;
32037 /* Expand SSE sequence for computing lround from OP1 storing
32038 into OP0. */
32039 void
32040 ix86_expand_lround (rtx op0, rtx op1)
32042 /* C code for the stuff we're doing below:
32043 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
32044 return (long)tmp;
32046 enum machine_mode mode = GET_MODE (op1);
32047 const struct real_format *fmt;
32048 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
32049 rtx adj;
32051 /* load nextafter (0.5, 0.0) */
32052 fmt = REAL_MODE_FORMAT (mode);
32053 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
32054 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
32056 /* adj = copysign (0.5, op1) */
32057 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
32058 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
32060 /* adj = op1 + adj */
32061 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
32063 /* op0 = (imode)adj */
32064 expand_fix (op0, adj, 0);
32067 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
32068 into OPERAND0. */
32069 void
32070 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
32072 /* C code for the stuff we're doing below (for do_floor):
32073 xi = (long)op1;
32074 xi -= (double)xi > op1 ? 1 : 0;
32075 return xi;
32077 enum machine_mode fmode = GET_MODE (op1);
32078 enum machine_mode imode = GET_MODE (op0);
32079 rtx ireg, freg, label, tmp;
32081 /* reg = (long)op1 */
32082 ireg = gen_reg_rtx (imode);
32083 expand_fix (ireg, op1, 0);
32085 /* freg = (double)reg */
32086 freg = gen_reg_rtx (fmode);
32087 expand_float (freg, ireg, 0);
32089 /* ireg = (freg > op1) ? ireg - 1 : ireg */
32090 label = ix86_expand_sse_compare_and_jump (UNLE,
32091 freg, op1, !do_floor);
32092 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
32093 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
32094 emit_move_insn (ireg, tmp);
32096 emit_label (label);
32097 LABEL_NUSES (label) = 1;
32099 emit_move_insn (op0, ireg);
32102 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
32103 result in OPERAND0. */
32104 void
32105 ix86_expand_rint (rtx operand0, rtx operand1)
32107 /* C code for the stuff we're doing below:
32108 xa = fabs (operand1);
32109 if (!isless (xa, 2**52))
32110 return operand1;
32111 xa = xa + 2**52 - 2**52;
32112 return copysign (xa, operand1);
32114 enum machine_mode mode = GET_MODE (operand0);
32115 rtx res, xa, label, TWO52, mask;
32117 res = gen_reg_rtx (mode);
32118 emit_move_insn (res, operand1);
32120 /* xa = abs (operand1) */
32121 xa = ix86_expand_sse_fabs (res, &mask);
32123 /* if (!isless (xa, TWO52)) goto label; */
32124 TWO52 = ix86_gen_TWO52 (mode);
32125 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
32127 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
32128 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
32130 ix86_sse_copysign_to_positive (res, xa, res, mask);
32132 emit_label (label);
32133 LABEL_NUSES (label) = 1;
32135 emit_move_insn (operand0, res);
32138 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
32139 into OPERAND0. */
32140 void
32141 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
32143 /* C code for the stuff we expand below.
32144 double xa = fabs (x), x2;
32145 if (!isless (xa, TWO52))
32146 return x;
32147 xa = xa + TWO52 - TWO52;
32148 x2 = copysign (xa, x);
32149 Compensate. Floor:
32150 if (x2 > x)
32151 x2 -= 1;
32152 Compensate. Ceil:
32153 if (x2 < x)
32154 x2 -= -1;
32155 return x2;
32157 enum machine_mode mode = GET_MODE (operand0);
32158 rtx xa, TWO52, tmp, label, one, res, mask;
32160 TWO52 = ix86_gen_TWO52 (mode);
32162 /* Temporary for holding the result, initialized to the input
32163 operand to ease control flow. */
32164 res = gen_reg_rtx (mode);
32165 emit_move_insn (res, operand1);
32167 /* xa = abs (operand1) */
32168 xa = ix86_expand_sse_fabs (res, &mask);
32170 /* if (!isless (xa, TWO52)) goto label; */
32171 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
32173 /* xa = xa + TWO52 - TWO52; */
32174 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
32175 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
32177 /* xa = copysign (xa, operand1) */
32178 ix86_sse_copysign_to_positive (xa, xa, res, mask);
32180 /* generate 1.0 or -1.0 */
32181 one = force_reg (mode,
32182 const_double_from_real_value (do_floor
32183 ? dconst1 : dconstm1, mode));
32185 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
32186 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
32187 emit_insn (gen_rtx_SET (VOIDmode, tmp,
32188 gen_rtx_AND (mode, one, tmp)));
32189 /* We always need to subtract here to preserve signed zero. */
32190 tmp = expand_simple_binop (mode, MINUS,
32191 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
32192 emit_move_insn (res, tmp);
32194 emit_label (label);
32195 LABEL_NUSES (label) = 1;
32197 emit_move_insn (operand0, res);
32200 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
32201 into OPERAND0. */
32202 void
32203 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
32205 /* C code for the stuff we expand below.
32206 double xa = fabs (x), x2;
32207 if (!isless (xa, TWO52))
32208 return x;
32209 x2 = (double)(long)x;
32210 Compensate. Floor:
32211 if (x2 > x)
32212 x2 -= 1;
32213 Compensate. Ceil:
32214 if (x2 < x)
32215 x2 += 1;
32216 if (HONOR_SIGNED_ZEROS (mode))
32217 return copysign (x2, x);
32218 return x2;
32220 enum machine_mode mode = GET_MODE (operand0);
32221 rtx xa, xi, TWO52, tmp, label, one, res, mask;
32223 TWO52 = ix86_gen_TWO52 (mode);
32225 /* Temporary for holding the result, initialized to the input
32226 operand to ease control flow. */
32227 res = gen_reg_rtx (mode);
32228 emit_move_insn (res, operand1);
32230 /* xa = abs (operand1) */
32231 xa = ix86_expand_sse_fabs (res, &mask);
32233 /* if (!isless (xa, TWO52)) goto label; */
32234 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
32236 /* xa = (double)(long)x */
32237 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
32238 expand_fix (xi, res, 0);
32239 expand_float (xa, xi, 0);
32241 /* generate 1.0 */
32242 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
32244 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
32245 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
32246 emit_insn (gen_rtx_SET (VOIDmode, tmp,
32247 gen_rtx_AND (mode, one, tmp)));
32248 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
32249 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
32250 emit_move_insn (res, tmp);
32252 if (HONOR_SIGNED_ZEROS (mode))
32253 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
32255 emit_label (label);
32256 LABEL_NUSES (label) = 1;
32258 emit_move_insn (operand0, res);
32261 /* Expand SSE sequence for computing round from OPERAND1 storing
32262 into OPERAND0. Sequence that works without relying on DImode truncation
32263 via cvttsd2siq that is only available on 64bit targets. */
32264 void
32265 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
32267 /* C code for the stuff we expand below.
32268 double xa = fabs (x), xa2, x2;
32269 if (!isless (xa, TWO52))
32270 return x;
32271 Using the absolute value and copying back sign makes
32272 -0.0 -> -0.0 correct.
32273 xa2 = xa + TWO52 - TWO52;
32274 Compensate.
32275 dxa = xa2 - xa;
32276 if (dxa <= -0.5)
32277 xa2 += 1;
32278 else if (dxa > 0.5)
32279 xa2 -= 1;
32280 x2 = copysign (xa2, x);
32281 return x2;
32283 enum machine_mode mode = GET_MODE (operand0);
32284 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
32286 TWO52 = ix86_gen_TWO52 (mode);
32288 /* Temporary for holding the result, initialized to the input
32289 operand to ease control flow. */
32290 res = gen_reg_rtx (mode);
32291 emit_move_insn (res, operand1);
32293 /* xa = abs (operand1) */
32294 xa = ix86_expand_sse_fabs (res, &mask);
32296 /* if (!isless (xa, TWO52)) goto label; */
32297 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
32299 /* xa2 = xa + TWO52 - TWO52; */
32300 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
32301 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
32303 /* dxa = xa2 - xa; */
32304 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
32306 /* generate 0.5, 1.0 and -0.5 */
32307 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
32308 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
32309 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
32310 0, OPTAB_DIRECT);
32312 /* Compensate. */
32313 tmp = gen_reg_rtx (mode);
32314 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
32315 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
32316 emit_insn (gen_rtx_SET (VOIDmode, tmp,
32317 gen_rtx_AND (mode, one, tmp)));
32318 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
32319 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
32320 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
32321 emit_insn (gen_rtx_SET (VOIDmode, tmp,
32322 gen_rtx_AND (mode, one, tmp)));
32323 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
32325 /* res = copysign (xa2, operand1) */
32326 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
32328 emit_label (label);
32329 LABEL_NUSES (label) = 1;
32331 emit_move_insn (operand0, res);
32334 /* Expand SSE sequence for computing trunc from OPERAND1 storing
32335 into OPERAND0. */
32336 void
32337 ix86_expand_trunc (rtx operand0, rtx operand1)
32339 /* C code for SSE variant we expand below.
32340 double xa = fabs (x), x2;
32341 if (!isless (xa, TWO52))
32342 return x;
32343 x2 = (double)(long)x;
32344 if (HONOR_SIGNED_ZEROS (mode))
32345 return copysign (x2, x);
32346 return x2;
32348 enum machine_mode mode = GET_MODE (operand0);
32349 rtx xa, xi, TWO52, label, res, mask;
32351 TWO52 = ix86_gen_TWO52 (mode);
32353 /* Temporary for holding the result, initialized to the input
32354 operand to ease control flow. */
32355 res = gen_reg_rtx (mode);
32356 emit_move_insn (res, operand1);
32358 /* xa = abs (operand1) */
32359 xa = ix86_expand_sse_fabs (res, &mask);
32361 /* if (!isless (xa, TWO52)) goto label; */
32362 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
32364 /* x = (double)(long)x */
32365 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
32366 expand_fix (xi, res, 0);
32367 expand_float (res, xi, 0);
32369 if (HONOR_SIGNED_ZEROS (mode))
32370 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
32372 emit_label (label);
32373 LABEL_NUSES (label) = 1;
32375 emit_move_insn (operand0, res);
32378 /* Expand SSE sequence for computing trunc from OPERAND1 storing
32379 into OPERAND0. */
32380 void
32381 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
32383 enum machine_mode mode = GET_MODE (operand0);
32384 rtx xa, mask, TWO52, label, one, res, smask, tmp;
32386 /* C code for SSE variant we expand below.
32387 double xa = fabs (x), x2;
32388 if (!isless (xa, TWO52))
32389 return x;
32390 xa2 = xa + TWO52 - TWO52;
32391 Compensate:
32392 if (xa2 > xa)
32393 xa2 -= 1.0;
32394 x2 = copysign (xa2, x);
32395 return x2;
32398 TWO52 = ix86_gen_TWO52 (mode);
32400 /* Temporary for holding the result, initialized to the input
32401 operand to ease control flow. */
32402 res = gen_reg_rtx (mode);
32403 emit_move_insn (res, operand1);
32405 /* xa = abs (operand1) */
32406 xa = ix86_expand_sse_fabs (res, &smask);
32408 /* if (!isless (xa, TWO52)) goto label; */
32409 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
32411 /* res = xa + TWO52 - TWO52; */
32412 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
32413 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
32414 emit_move_insn (res, tmp);
32416 /* generate 1.0 */
32417 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
32419 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
32420 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
32421 emit_insn (gen_rtx_SET (VOIDmode, mask,
32422 gen_rtx_AND (mode, mask, one)));
32423 tmp = expand_simple_binop (mode, MINUS,
32424 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
32425 emit_move_insn (res, tmp);
32427 /* res = copysign (res, operand1) */
32428 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
32430 emit_label (label);
32431 LABEL_NUSES (label) = 1;
32433 emit_move_insn (operand0, res);
32436 /* Expand SSE sequence for computing round from OPERAND1 storing
32437 into OPERAND0. */
32438 void
32439 ix86_expand_round (rtx operand0, rtx operand1)
32441 /* C code for the stuff we're doing below:
32442 double xa = fabs (x);
32443 if (!isless (xa, TWO52))
32444 return x;
32445 xa = (double)(long)(xa + nextafter (0.5, 0.0));
32446 return copysign (xa, x);
32448 enum machine_mode mode = GET_MODE (operand0);
32449 rtx res, TWO52, xa, label, xi, half, mask;
32450 const struct real_format *fmt;
32451 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
32453 /* Temporary for holding the result, initialized to the input
32454 operand to ease control flow. */
32455 res = gen_reg_rtx (mode);
32456 emit_move_insn (res, operand1);
32458 TWO52 = ix86_gen_TWO52 (mode);
32459 xa = ix86_expand_sse_fabs (res, &mask);
32460 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
32462 /* load nextafter (0.5, 0.0) */
32463 fmt = REAL_MODE_FORMAT (mode);
32464 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
32465 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
32467 /* xa = xa + 0.5 */
32468 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
32469 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
32471 /* xa = (double)(int64_t)xa */
32472 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
32473 expand_fix (xi, xa, 0);
32474 expand_float (xa, xi, 0);
32476 /* res = copysign (xa, operand1) */
32477 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
32479 emit_label (label);
32480 LABEL_NUSES (label) = 1;
32482 emit_move_insn (operand0, res);
32486 /* Table of valid machine attributes. */
32487 static const struct attribute_spec ix86_attribute_table[] =
32489 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
32490 /* Stdcall attribute says callee is responsible for popping arguments
32491 if they are not variable. */
32492 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
32493 /* Fastcall attribute says callee is responsible for popping arguments
32494 if they are not variable. */
32495 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
32496 /* Thiscall attribute says callee is responsible for popping arguments
32497 if they are not variable. */
32498 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
32499 /* Cdecl attribute says the callee is a normal C declaration */
32500 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
32501 /* Regparm attribute specifies how many integer arguments are to be
32502 passed in registers. */
32503 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
32504 /* Sseregparm attribute says we are using x86_64 calling conventions
32505 for FP arguments. */
32506 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
32507 /* force_align_arg_pointer says this function realigns the stack at entry. */
32508 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
32509 false, true, true, ix86_handle_cconv_attribute },
32510 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
32511 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
32512 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
32513 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
32514 #endif
32515 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
32516 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
32517 #ifdef SUBTARGET_ATTRIBUTE_TABLE
32518 SUBTARGET_ATTRIBUTE_TABLE,
32519 #endif
32520 /* ms_abi and sysv_abi calling convention function attributes. */
32521 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute },
32522 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute },
32523 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute },
32524 { "callee_pop_aggregate_return", 1, 1, false, true, true,
32525 ix86_handle_callee_pop_aggregate_return },
32526 /* End element. */
32527 { NULL, 0, 0, false, false, false, NULL }
32530 /* Implement targetm.vectorize.builtin_vectorization_cost. */
32531 static int
32532 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
32533 tree vectype ATTRIBUTE_UNUSED,
32534 int misalign ATTRIBUTE_UNUSED)
32536 switch (type_of_cost)
32538 case scalar_stmt:
32539 return ix86_cost->scalar_stmt_cost;
32541 case scalar_load:
32542 return ix86_cost->scalar_load_cost;
32544 case scalar_store:
32545 return ix86_cost->scalar_store_cost;
32547 case vector_stmt:
32548 return ix86_cost->vec_stmt_cost;
32550 case vector_load:
32551 return ix86_cost->vec_align_load_cost;
32553 case vector_store:
32554 return ix86_cost->vec_store_cost;
32556 case vec_to_scalar:
32557 return ix86_cost->vec_to_scalar_cost;
32559 case scalar_to_vec:
32560 return ix86_cost->scalar_to_vec_cost;
32562 case unaligned_load:
32563 case unaligned_store:
32564 return ix86_cost->vec_unalign_load_cost;
32566 case cond_branch_taken:
32567 return ix86_cost->cond_taken_branch_cost;
32569 case cond_branch_not_taken:
32570 return ix86_cost->cond_not_taken_branch_cost;
32572 case vec_perm:
32573 return 1;
32575 default:
32576 gcc_unreachable ();
32581 /* Implement targetm.vectorize.builtin_vec_perm. */
32583 static tree
32584 ix86_vectorize_builtin_vec_perm (tree vec_type, tree *mask_type)
32586 tree itype = TREE_TYPE (vec_type);
32587 bool u = TYPE_UNSIGNED (itype);
32588 enum machine_mode vmode = TYPE_MODE (vec_type);
32589 enum ix86_builtins fcode;
32590 bool ok = TARGET_SSE2;
32592 switch (vmode)
32594 case V4DFmode:
32595 ok = TARGET_AVX;
32596 fcode = IX86_BUILTIN_VEC_PERM_V4DF;
32597 goto get_di;
32598 case V2DFmode:
32599 fcode = IX86_BUILTIN_VEC_PERM_V2DF;
32600 get_di:
32601 itype = ix86_get_builtin_type (IX86_BT_DI);
32602 break;
32604 case V8SFmode:
32605 ok = TARGET_AVX;
32606 fcode = IX86_BUILTIN_VEC_PERM_V8SF;
32607 goto get_si;
32608 case V4SFmode:
32609 ok = TARGET_SSE;
32610 fcode = IX86_BUILTIN_VEC_PERM_V4SF;
32611 get_si:
32612 itype = ix86_get_builtin_type (IX86_BT_SI);
32613 break;
32615 case V2DImode:
32616 fcode = u ? IX86_BUILTIN_VEC_PERM_V2DI_U : IX86_BUILTIN_VEC_PERM_V2DI;
32617 break;
32618 case V4SImode:
32619 fcode = u ? IX86_BUILTIN_VEC_PERM_V4SI_U : IX86_BUILTIN_VEC_PERM_V4SI;
32620 break;
32621 case V8HImode:
32622 fcode = u ? IX86_BUILTIN_VEC_PERM_V8HI_U : IX86_BUILTIN_VEC_PERM_V8HI;
32623 break;
32624 case V16QImode:
32625 fcode = u ? IX86_BUILTIN_VEC_PERM_V16QI_U : IX86_BUILTIN_VEC_PERM_V16QI;
32626 break;
32627 default:
32628 ok = false;
32629 break;
32632 if (!ok)
32633 return NULL_TREE;
32635 *mask_type = itype;
32636 return ix86_builtins[(int) fcode];
32639 /* Return a vector mode with twice as many elements as VMODE. */
32640 /* ??? Consider moving this to a table generated by genmodes.c. */
32642 static enum machine_mode
32643 doublesize_vector_mode (enum machine_mode vmode)
32645 switch (vmode)
32647 case V2SFmode: return V4SFmode;
32648 case V1DImode: return V2DImode;
32649 case V2SImode: return V4SImode;
32650 case V4HImode: return V8HImode;
32651 case V8QImode: return V16QImode;
32653 case V2DFmode: return V4DFmode;
32654 case V4SFmode: return V8SFmode;
32655 case V2DImode: return V4DImode;
32656 case V4SImode: return V8SImode;
32657 case V8HImode: return V16HImode;
32658 case V16QImode: return V32QImode;
32660 case V4DFmode: return V8DFmode;
32661 case V8SFmode: return V16SFmode;
32662 case V4DImode: return V8DImode;
32663 case V8SImode: return V16SImode;
32664 case V16HImode: return V32HImode;
32665 case V32QImode: return V64QImode;
32667 default:
32668 gcc_unreachable ();
32672 /* Construct (set target (vec_select op0 (parallel perm))) and
32673 return true if that's a valid instruction in the active ISA. */
32675 static bool
32676 expand_vselect (rtx target, rtx op0, const unsigned char *perm, unsigned nelt)
32678 rtx rperm[MAX_VECT_LEN], x;
32679 unsigned i;
32681 for (i = 0; i < nelt; ++i)
32682 rperm[i] = GEN_INT (perm[i]);
32684 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, rperm));
32685 x = gen_rtx_VEC_SELECT (GET_MODE (target), op0, x);
32686 x = gen_rtx_SET (VOIDmode, target, x);
32688 x = emit_insn (x);
32689 if (recog_memoized (x) < 0)
32691 remove_insn (x);
32692 return false;
32694 return true;
32697 /* Similar, but generate a vec_concat from op0 and op1 as well. */
32699 static bool
32700 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
32701 const unsigned char *perm, unsigned nelt)
32703 enum machine_mode v2mode;
32704 rtx x;
32706 v2mode = doublesize_vector_mode (GET_MODE (op0));
32707 x = gen_rtx_VEC_CONCAT (v2mode, op0, op1);
32708 return expand_vselect (target, x, perm, nelt);
32711 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
32712 in terms of blendp[sd] / pblendw / pblendvb. */
32714 static bool
32715 expand_vec_perm_blend (struct expand_vec_perm_d *d)
32717 enum machine_mode vmode = d->vmode;
32718 unsigned i, mask, nelt = d->nelt;
32719 rtx target, op0, op1, x;
32721 if (!TARGET_SSE4_1 || d->op0 == d->op1)
32722 return false;
32723 if (!(GET_MODE_SIZE (vmode) == 16 || vmode == V4DFmode || vmode == V8SFmode))
32724 return false;
32726 /* This is a blend, not a permute. Elements must stay in their
32727 respective lanes. */
32728 for (i = 0; i < nelt; ++i)
32730 unsigned e = d->perm[i];
32731 if (!(e == i || e == i + nelt))
32732 return false;
32735 if (d->testing_p)
32736 return true;
32738 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
32739 decision should be extracted elsewhere, so that we only try that
32740 sequence once all budget==3 options have been tried. */
32742 /* For bytes, see if bytes move in pairs so we can use pblendw with
32743 an immediate argument, rather than pblendvb with a vector argument. */
32744 if (vmode == V16QImode)
32746 bool pblendw_ok = true;
32747 for (i = 0; i < 16 && pblendw_ok; i += 2)
32748 pblendw_ok = (d->perm[i] + 1 == d->perm[i + 1]);
32750 if (!pblendw_ok)
32752 rtx rperm[16], vperm;
32754 for (i = 0; i < nelt; ++i)
32755 rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx);
32757 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm));
32758 vperm = force_reg (V16QImode, vperm);
32760 emit_insn (gen_sse4_1_pblendvb (d->target, d->op0, d->op1, vperm));
32761 return true;
32765 target = d->target;
32766 op0 = d->op0;
32767 op1 = d->op1;
32768 mask = 0;
32770 switch (vmode)
32772 case V4DFmode:
32773 case V8SFmode:
32774 case V2DFmode:
32775 case V4SFmode:
32776 case V8HImode:
32777 for (i = 0; i < nelt; ++i)
32778 mask |= (d->perm[i] >= nelt) << i;
32779 break;
32781 case V2DImode:
32782 for (i = 0; i < 2; ++i)
32783 mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4);
32784 goto do_subreg;
32786 case V4SImode:
32787 for (i = 0; i < 4; ++i)
32788 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
32789 goto do_subreg;
32791 case V16QImode:
32792 for (i = 0; i < 8; ++i)
32793 mask |= (d->perm[i * 2] >= 16) << i;
32795 do_subreg:
32796 vmode = V8HImode;
32797 target = gen_lowpart (vmode, target);
32798 op0 = gen_lowpart (vmode, op0);
32799 op1 = gen_lowpart (vmode, op1);
32800 break;
32802 default:
32803 gcc_unreachable ();
32806 /* This matches five different patterns with the different modes. */
32807 x = gen_rtx_VEC_MERGE (vmode, op1, op0, GEN_INT (mask));
32808 x = gen_rtx_SET (VOIDmode, target, x);
32809 emit_insn (x);
32811 return true;
32814 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
32815 in terms of the variable form of vpermilps.
32817 Note that we will have already failed the immediate input vpermilps,
32818 which requires that the high and low part shuffle be identical; the
32819 variable form doesn't require that. */
32821 static bool
32822 expand_vec_perm_vpermil (struct expand_vec_perm_d *d)
32824 rtx rperm[8], vperm;
32825 unsigned i;
32827 if (!TARGET_AVX || d->vmode != V8SFmode || d->op0 != d->op1)
32828 return false;
32830 /* We can only permute within the 128-bit lane. */
32831 for (i = 0; i < 8; ++i)
32833 unsigned e = d->perm[i];
32834 if (i < 4 ? e >= 4 : e < 4)
32835 return false;
32838 if (d->testing_p)
32839 return true;
32841 for (i = 0; i < 8; ++i)
32843 unsigned e = d->perm[i];
32845 /* Within each 128-bit lane, the elements of op0 are numbered
32846 from 0 and the elements of op1 are numbered from 4. */
32847 if (e >= 8 + 4)
32848 e -= 8;
32849 else if (e >= 4)
32850 e -= 4;
32852 rperm[i] = GEN_INT (e);
32855 vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm));
32856 vperm = force_reg (V8SImode, vperm);
32857 emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm));
32859 return true;
32862 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
32863 in terms of pshufb or vpperm. */
32865 static bool
32866 expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
32868 unsigned i, nelt, eltsz;
32869 rtx rperm[16], vperm, target, op0, op1;
32871 if (!(d->op0 == d->op1 ? TARGET_SSSE3 : TARGET_XOP))
32872 return false;
32873 if (GET_MODE_SIZE (d->vmode) != 16)
32874 return false;
32876 if (d->testing_p)
32877 return true;
32879 nelt = d->nelt;
32880 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
32882 for (i = 0; i < nelt; ++i)
32884 unsigned j, e = d->perm[i];
32885 for (j = 0; j < eltsz; ++j)
32886 rperm[i * eltsz + j] = GEN_INT (e * eltsz + j);
32889 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm));
32890 vperm = force_reg (V16QImode, vperm);
32892 target = gen_lowpart (V16QImode, d->target);
32893 op0 = gen_lowpart (V16QImode, d->op0);
32894 if (d->op0 == d->op1)
32895 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
32896 else
32898 op1 = gen_lowpart (V16QImode, d->op1);
32899 emit_insn (gen_xop_pperm (target, op0, op1, vperm));
32902 return true;
32905 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
32906 in a single instruction. */
32908 static bool
32909 expand_vec_perm_1 (struct expand_vec_perm_d *d)
32911 unsigned i, nelt = d->nelt;
32912 unsigned char perm2[MAX_VECT_LEN];
32914 /* Check plain VEC_SELECT first, because AVX has instructions that could
32915 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
32916 input where SEL+CONCAT may not. */
32917 if (d->op0 == d->op1)
32919 int mask = nelt - 1;
32921 for (i = 0; i < nelt; i++)
32922 perm2[i] = d->perm[i] & mask;
32924 if (expand_vselect (d->target, d->op0, perm2, nelt))
32925 return true;
32927 /* There are plenty of patterns in sse.md that are written for
32928 SEL+CONCAT and are not replicated for a single op. Perhaps
32929 that should be changed, to avoid the nastiness here. */
32931 /* Recognize interleave style patterns, which means incrementing
32932 every other permutation operand. */
32933 for (i = 0; i < nelt; i += 2)
32935 perm2[i] = d->perm[i] & mask;
32936 perm2[i + 1] = (d->perm[i + 1] & mask) + nelt;
32938 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt))
32939 return true;
32941 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
32942 if (nelt >= 4)
32944 for (i = 0; i < nelt; i += 4)
32946 perm2[i + 0] = d->perm[i + 0] & mask;
32947 perm2[i + 1] = d->perm[i + 1] & mask;
32948 perm2[i + 2] = (d->perm[i + 2] & mask) + nelt;
32949 perm2[i + 3] = (d->perm[i + 3] & mask) + nelt;
32952 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt))
32953 return true;
32957 /* Finally, try the fully general two operand permute. */
32958 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt))
32959 return true;
32961 /* Recognize interleave style patterns with reversed operands. */
32962 if (d->op0 != d->op1)
32964 for (i = 0; i < nelt; ++i)
32966 unsigned e = d->perm[i];
32967 if (e >= nelt)
32968 e -= nelt;
32969 else
32970 e += nelt;
32971 perm2[i] = e;
32974 if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt))
32975 return true;
32978 /* Try the SSE4.1 blend variable merge instructions. */
32979 if (expand_vec_perm_blend (d))
32980 return true;
32982 /* Try one of the AVX vpermil variable permutations. */
32983 if (expand_vec_perm_vpermil (d))
32984 return true;
32986 /* Try the SSSE3 pshufb or XOP vpperm variable permutation. */
32987 if (expand_vec_perm_pshufb (d))
32988 return true;
32990 return false;
32993 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
32994 in terms of a pair of pshuflw + pshufhw instructions. */
32996 static bool
32997 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
32999 unsigned char perm2[MAX_VECT_LEN];
33000 unsigned i;
33001 bool ok;
33003 if (d->vmode != V8HImode || d->op0 != d->op1)
33004 return false;
33006 /* The two permutations only operate in 64-bit lanes. */
33007 for (i = 0; i < 4; ++i)
33008 if (d->perm[i] >= 4)
33009 return false;
33010 for (i = 4; i < 8; ++i)
33011 if (d->perm[i] < 4)
33012 return false;
33014 if (d->testing_p)
33015 return true;
33017 /* Emit the pshuflw. */
33018 memcpy (perm2, d->perm, 4);
33019 for (i = 4; i < 8; ++i)
33020 perm2[i] = i;
33021 ok = expand_vselect (d->target, d->op0, perm2, 8);
33022 gcc_assert (ok);
33024 /* Emit the pshufhw. */
33025 memcpy (perm2 + 4, d->perm + 4, 4);
33026 for (i = 0; i < 4; ++i)
33027 perm2[i] = i;
33028 ok = expand_vselect (d->target, d->target, perm2, 8);
33029 gcc_assert (ok);
33031 return true;
33034 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
33035 the permutation using the SSSE3 palignr instruction. This succeeds
33036 when all of the elements in PERM fit within one vector and we merely
33037 need to shift them down so that a single vector permutation has a
33038 chance to succeed. */
33040 static bool
33041 expand_vec_perm_palignr (struct expand_vec_perm_d *d)
33043 unsigned i, nelt = d->nelt;
33044 unsigned min, max;
33045 bool in_order, ok;
33046 rtx shift;
33048 /* Even with AVX, palignr only operates on 128-bit vectors. */
33049 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
33050 return false;
33052 min = nelt, max = 0;
33053 for (i = 0; i < nelt; ++i)
33055 unsigned e = d->perm[i];
33056 if (e < min)
33057 min = e;
33058 if (e > max)
33059 max = e;
33061 if (min == 0 || max - min >= nelt)
33062 return false;
33064 /* Given that we have SSSE3, we know we'll be able to implement the
33065 single operand permutation after the palignr with pshufb. */
33066 if (d->testing_p)
33067 return true;
33069 shift = GEN_INT (min * GET_MODE_BITSIZE (GET_MODE_INNER (d->vmode)));
33070 emit_insn (gen_ssse3_palignrti (gen_lowpart (TImode, d->target),
33071 gen_lowpart (TImode, d->op1),
33072 gen_lowpart (TImode, d->op0), shift));
33074 d->op0 = d->op1 = d->target;
33076 in_order = true;
33077 for (i = 0; i < nelt; ++i)
33079 unsigned e = d->perm[i] - min;
33080 if (e != i)
33081 in_order = false;
33082 d->perm[i] = e;
33085 /* Test for the degenerate case where the alignment by itself
33086 produces the desired permutation. */
33087 if (in_order)
33088 return true;
33090 ok = expand_vec_perm_1 (d);
33091 gcc_assert (ok);
33093 return ok;
33096 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
33097 a two vector permutation into a single vector permutation by using
33098 an interleave operation to merge the vectors. */
33100 static bool
33101 expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
33103 struct expand_vec_perm_d dremap, dfinal;
33104 unsigned i, nelt = d->nelt, nelt2 = nelt / 2;
33105 unsigned contents, h1, h2, h3, h4;
33106 unsigned char remap[2 * MAX_VECT_LEN];
33107 rtx seq;
33108 bool ok;
33110 if (d->op0 == d->op1)
33111 return false;
33113 /* The 256-bit unpck[lh]p[sd] instructions only operate within the 128-bit
33114 lanes. We can use similar techniques with the vperm2f128 instruction,
33115 but it requires slightly different logic. */
33116 if (GET_MODE_SIZE (d->vmode) != 16)
33117 return false;
33119 /* Examine from whence the elements come. */
33120 contents = 0;
33121 for (i = 0; i < nelt; ++i)
33122 contents |= 1u << d->perm[i];
33124 /* Split the two input vectors into 4 halves. */
33125 h1 = (1u << nelt2) - 1;
33126 h2 = h1 << nelt2;
33127 h3 = h2 << nelt2;
33128 h4 = h3 << nelt2;
33130 memset (remap, 0xff, sizeof (remap));
33131 dremap = *d;
33133 /* If the elements from the low halves use interleave low, and similarly
33134 for interleave high. If the elements are from mis-matched halves, we
33135 can use shufps for V4SF/V4SI or do a DImode shuffle. */
33136 if ((contents & (h1 | h3)) == contents)
33138 for (i = 0; i < nelt2; ++i)
33140 remap[i] = i * 2;
33141 remap[i + nelt] = i * 2 + 1;
33142 dremap.perm[i * 2] = i;
33143 dremap.perm[i * 2 + 1] = i + nelt;
33146 else if ((contents & (h2 | h4)) == contents)
33148 for (i = 0; i < nelt2; ++i)
33150 remap[i + nelt2] = i * 2;
33151 remap[i + nelt + nelt2] = i * 2 + 1;
33152 dremap.perm[i * 2] = i + nelt2;
33153 dremap.perm[i * 2 + 1] = i + nelt + nelt2;
33156 else if ((contents & (h1 | h4)) == contents)
33158 for (i = 0; i < nelt2; ++i)
33160 remap[i] = i;
33161 remap[i + nelt + nelt2] = i + nelt2;
33162 dremap.perm[i] = i;
33163 dremap.perm[i + nelt2] = i + nelt + nelt2;
33165 if (nelt != 4)
33167 dremap.vmode = V2DImode;
33168 dremap.nelt = 2;
33169 dremap.perm[0] = 0;
33170 dremap.perm[1] = 3;
33173 else if ((contents & (h2 | h3)) == contents)
33175 for (i = 0; i < nelt2; ++i)
33177 remap[i + nelt2] = i;
33178 remap[i + nelt] = i + nelt2;
33179 dremap.perm[i] = i + nelt2;
33180 dremap.perm[i + nelt2] = i + nelt;
33182 if (nelt != 4)
33184 dremap.vmode = V2DImode;
33185 dremap.nelt = 2;
33186 dremap.perm[0] = 1;
33187 dremap.perm[1] = 2;
33190 else
33191 return false;
33193 /* Use the remapping array set up above to move the elements from their
33194 swizzled locations into their final destinations. */
33195 dfinal = *d;
33196 for (i = 0; i < nelt; ++i)
33198 unsigned e = remap[d->perm[i]];
33199 gcc_assert (e < nelt);
33200 dfinal.perm[i] = e;
33202 dfinal.op0 = gen_reg_rtx (dfinal.vmode);
33203 dfinal.op1 = dfinal.op0;
33204 dremap.target = dfinal.op0;
33206 /* Test if the final remap can be done with a single insn. For V4SFmode or
33207 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
33208 start_sequence ();
33209 ok = expand_vec_perm_1 (&dfinal);
33210 seq = get_insns ();
33211 end_sequence ();
33213 if (!ok)
33214 return false;
33216 if (dremap.vmode != dfinal.vmode)
33218 dremap.target = gen_lowpart (dremap.vmode, dremap.target);
33219 dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0);
33220 dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1);
33223 ok = expand_vec_perm_1 (&dremap);
33224 gcc_assert (ok);
33226 emit_insn (seq);
33227 return true;
33230 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
33231 permutation with two pshufb insns and an ior. We should have already
33232 failed all two instruction sequences. */
33234 static bool
33235 expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
33237 rtx rperm[2][16], vperm, l, h, op, m128;
33238 unsigned int i, nelt, eltsz;
33240 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
33241 return false;
33242 gcc_assert (d->op0 != d->op1);
33244 nelt = d->nelt;
33245 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
33247 /* Generate two permutation masks. If the required element is within
33248 the given vector it is shuffled into the proper lane. If the required
33249 element is in the other vector, force a zero into the lane by setting
33250 bit 7 in the permutation mask. */
33251 m128 = GEN_INT (-128);
33252 for (i = 0; i < nelt; ++i)
33254 unsigned j, e = d->perm[i];
33255 unsigned which = (e >= nelt);
33256 if (e >= nelt)
33257 e -= nelt;
33259 for (j = 0; j < eltsz; ++j)
33261 rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j);
33262 rperm[1-which][i*eltsz + j] = m128;
33266 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0]));
33267 vperm = force_reg (V16QImode, vperm);
33269 l = gen_reg_rtx (V16QImode);
33270 op = gen_lowpart (V16QImode, d->op0);
33271 emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm));
33273 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1]));
33274 vperm = force_reg (V16QImode, vperm);
33276 h = gen_reg_rtx (V16QImode);
33277 op = gen_lowpart (V16QImode, d->op1);
33278 emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm));
33280 op = gen_lowpart (V16QImode, d->target);
33281 emit_insn (gen_iorv16qi3 (op, l, h));
33283 return true;
33286 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
33287 and extract-odd permutations. */
33289 static bool
33290 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
33292 rtx t1, t2, t3;
33294 switch (d->vmode)
33296 case V4DFmode:
33297 t1 = gen_reg_rtx (V4DFmode);
33298 t2 = gen_reg_rtx (V4DFmode);
33300 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
33301 emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20)));
33302 emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31)));
33304 /* Now an unpck[lh]pd will produce the result required. */
33305 if (odd)
33306 t3 = gen_avx_unpckhpd256 (d->target, t1, t2);
33307 else
33308 t3 = gen_avx_unpcklpd256 (d->target, t1, t2);
33309 emit_insn (t3);
33310 break;
33312 case V8SFmode:
33314 int mask = odd ? 0xdd : 0x88;
33316 t1 = gen_reg_rtx (V8SFmode);
33317 t2 = gen_reg_rtx (V8SFmode);
33318 t3 = gen_reg_rtx (V8SFmode);
33320 /* Shuffle within the 128-bit lanes to produce:
33321 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
33322 emit_insn (gen_avx_shufps256 (t1, d->op0, d->op1,
33323 GEN_INT (mask)));
33325 /* Shuffle the lanes around to produce:
33326 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
33327 emit_insn (gen_avx_vperm2f128v8sf3 (t2, t1, t1,
33328 GEN_INT (0x3)));
33330 /* Shuffle within the 128-bit lanes to produce:
33331 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
33332 emit_insn (gen_avx_shufps256 (t3, t1, t2, GEN_INT (0x44)));
33334 /* Shuffle within the 128-bit lanes to produce:
33335 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
33336 emit_insn (gen_avx_shufps256 (t2, t1, t2, GEN_INT (0xee)));
33338 /* Shuffle the lanes around to produce:
33339 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
33340 emit_insn (gen_avx_vperm2f128v8sf3 (d->target, t3, t2,
33341 GEN_INT (0x20)));
33343 break;
33345 case V2DFmode:
33346 case V4SFmode:
33347 case V2DImode:
33348 case V4SImode:
33349 /* These are always directly implementable by expand_vec_perm_1. */
33350 gcc_unreachable ();
33352 case V8HImode:
33353 if (TARGET_SSSE3)
33354 return expand_vec_perm_pshufb2 (d);
33355 else
33357 /* We need 2*log2(N)-1 operations to achieve odd/even
33358 with interleave. */
33359 t1 = gen_reg_rtx (V8HImode);
33360 t2 = gen_reg_rtx (V8HImode);
33361 emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1));
33362 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1));
33363 emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1));
33364 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1));
33365 if (odd)
33366 t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2);
33367 else
33368 t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2);
33369 emit_insn (t3);
33371 break;
33373 case V16QImode:
33374 if (TARGET_SSSE3)
33375 return expand_vec_perm_pshufb2 (d);
33376 else
33378 t1 = gen_reg_rtx (V16QImode);
33379 t2 = gen_reg_rtx (V16QImode);
33380 t3 = gen_reg_rtx (V16QImode);
33381 emit_insn (gen_vec_interleave_highv16qi (t1, d->op0, d->op1));
33382 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->op0, d->op1));
33383 emit_insn (gen_vec_interleave_highv16qi (t2, d->target, t1));
33384 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t1));
33385 emit_insn (gen_vec_interleave_highv16qi (t3, d->target, t2));
33386 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t2));
33387 if (odd)
33388 t3 = gen_vec_interleave_highv16qi (d->target, d->target, t3);
33389 else
33390 t3 = gen_vec_interleave_lowv16qi (d->target, d->target, t3);
33391 emit_insn (t3);
33393 break;
33395 default:
33396 gcc_unreachable ();
33399 return true;
33402 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
33403 extract-even and extract-odd permutations. */
33405 static bool
33406 expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
33408 unsigned i, odd, nelt = d->nelt;
33410 odd = d->perm[0];
33411 if (odd != 0 && odd != 1)
33412 return false;
33414 for (i = 1; i < nelt; ++i)
33415 if (d->perm[i] != 2 * i + odd)
33416 return false;
33418 return expand_vec_perm_even_odd_1 (d, odd);
33421 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
33422 permutations. We assume that expand_vec_perm_1 has already failed. */
33424 static bool
33425 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
33427 unsigned elt = d->perm[0], nelt2 = d->nelt / 2;
33428 enum machine_mode vmode = d->vmode;
33429 unsigned char perm2[4];
33430 rtx op0 = d->op0;
33431 bool ok;
33433 switch (vmode)
33435 case V4DFmode:
33436 case V8SFmode:
33437 /* These are special-cased in sse.md so that we can optionally
33438 use the vbroadcast instruction. They expand to two insns
33439 if the input happens to be in a register. */
33440 gcc_unreachable ();
33442 case V2DFmode:
33443 case V2DImode:
33444 case V4SFmode:
33445 case V4SImode:
33446 /* These are always implementable using standard shuffle patterns. */
33447 gcc_unreachable ();
33449 case V8HImode:
33450 case V16QImode:
33451 /* These can be implemented via interleave. We save one insn by
33452 stopping once we have promoted to V4SImode and then use pshufd. */
33455 optab otab = vec_interleave_low_optab;
33457 if (elt >= nelt2)
33459 otab = vec_interleave_high_optab;
33460 elt -= nelt2;
33462 nelt2 /= 2;
33464 op0 = expand_binop (vmode, otab, op0, op0, NULL, 0, OPTAB_DIRECT);
33465 vmode = get_mode_wider_vector (vmode);
33466 op0 = gen_lowpart (vmode, op0);
33468 while (vmode != V4SImode);
33470 memset (perm2, elt, 4);
33471 ok = expand_vselect (gen_lowpart (V4SImode, d->target), op0, perm2, 4);
33472 gcc_assert (ok);
33473 return true;
33475 default:
33476 gcc_unreachable ();
33480 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
33481 broadcast permutations. */
33483 static bool
33484 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
33486 unsigned i, elt, nelt = d->nelt;
33488 if (d->op0 != d->op1)
33489 return false;
33491 elt = d->perm[0];
33492 for (i = 1; i < nelt; ++i)
33493 if (d->perm[i] != elt)
33494 return false;
33496 return expand_vec_perm_broadcast_1 (d);
33499 /* The guts of ix86_expand_vec_perm_builtin, also used by the ok hook.
33500 With all of the interface bits taken care of, perform the expansion
33501 in D and return true on success. */
33503 static bool
33504 ix86_expand_vec_perm_builtin_1 (struct expand_vec_perm_d *d)
33506 /* Try a single instruction expansion. */
33507 if (expand_vec_perm_1 (d))
33508 return true;
33510 /* Try sequences of two instructions. */
33512 if (expand_vec_perm_pshuflw_pshufhw (d))
33513 return true;
33515 if (expand_vec_perm_palignr (d))
33516 return true;
33518 if (expand_vec_perm_interleave2 (d))
33519 return true;
33521 if (expand_vec_perm_broadcast (d))
33522 return true;
33524 /* Try sequences of three instructions. */
33526 if (expand_vec_perm_pshufb2 (d))
33527 return true;
33529 /* ??? Look for narrow permutations whose element orderings would
33530 allow the promotion to a wider mode. */
33532 /* ??? Look for sequences of interleave or a wider permute that place
33533 the data into the correct lanes for a half-vector shuffle like
33534 pshuf[lh]w or vpermilps. */
33536 /* ??? Look for sequences of interleave that produce the desired results.
33537 The combinatorics of punpck[lh] get pretty ugly... */
33539 if (expand_vec_perm_even_odd (d))
33540 return true;
33542 return false;
33545 /* Extract the values from the vector CST into the permutation array in D.
33546 Return 0 on error, 1 if all values from the permutation come from the
33547 first vector, 2 if all values from the second vector, and 3 otherwise. */
33549 static int
33550 extract_vec_perm_cst (struct expand_vec_perm_d *d, tree cst)
33552 tree list = TREE_VECTOR_CST_ELTS (cst);
33553 unsigned i, nelt = d->nelt;
33554 int ret = 0;
33556 for (i = 0; i < nelt; ++i, list = TREE_CHAIN (list))
33558 unsigned HOST_WIDE_INT e;
33560 if (!host_integerp (TREE_VALUE (list), 1))
33561 return 0;
33562 e = tree_low_cst (TREE_VALUE (list), 1);
33563 if (e >= 2 * nelt)
33564 return 0;
33566 ret |= (e < nelt ? 1 : 2);
33567 d->perm[i] = e;
33569 gcc_assert (list == NULL);
33571 /* For all elements from second vector, fold the elements to first. */
33572 if (ret == 2)
33573 for (i = 0; i < nelt; ++i)
33574 d->perm[i] -= nelt;
33576 return ret;
33579 static rtx
33580 ix86_expand_vec_perm_builtin (tree exp)
33582 struct expand_vec_perm_d d;
33583 tree arg0, arg1, arg2;
33585 arg0 = CALL_EXPR_ARG (exp, 0);
33586 arg1 = CALL_EXPR_ARG (exp, 1);
33587 arg2 = CALL_EXPR_ARG (exp, 2);
33589 d.vmode = TYPE_MODE (TREE_TYPE (arg0));
33590 d.nelt = GET_MODE_NUNITS (d.vmode);
33591 d.testing_p = false;
33592 gcc_assert (VECTOR_MODE_P (d.vmode));
33594 if (TREE_CODE (arg2) != VECTOR_CST)
33596 error_at (EXPR_LOCATION (exp),
33597 "vector permutation requires vector constant");
33598 goto exit_error;
33601 switch (extract_vec_perm_cst (&d, arg2))
33603 default:
33604 gcc_unreachable();
33606 case 0:
33607 error_at (EXPR_LOCATION (exp), "invalid vector permutation constant");
33608 goto exit_error;
33610 case 3:
33611 if (!operand_equal_p (arg0, arg1, 0))
33613 d.op0 = expand_expr (arg0, NULL_RTX, d.vmode, EXPAND_NORMAL);
33614 d.op0 = force_reg (d.vmode, d.op0);
33615 d.op1 = expand_expr (arg1, NULL_RTX, d.vmode, EXPAND_NORMAL);
33616 d.op1 = force_reg (d.vmode, d.op1);
33617 break;
33620 /* The elements of PERM do not suggest that only the first operand
33621 is used, but both operands are identical. Allow easier matching
33622 of the permutation by folding the permutation into the single
33623 input vector. */
33625 unsigned i, nelt = d.nelt;
33626 for (i = 0; i < nelt; ++i)
33627 if (d.perm[i] >= nelt)
33628 d.perm[i] -= nelt;
33630 /* FALLTHRU */
33632 case 1:
33633 d.op0 = expand_expr (arg0, NULL_RTX, d.vmode, EXPAND_NORMAL);
33634 d.op0 = force_reg (d.vmode, d.op0);
33635 d.op1 = d.op0;
33636 break;
33638 case 2:
33639 d.op0 = expand_expr (arg1, NULL_RTX, d.vmode, EXPAND_NORMAL);
33640 d.op0 = force_reg (d.vmode, d.op0);
33641 d.op1 = d.op0;
33642 break;
33645 d.target = gen_reg_rtx (d.vmode);
33646 if (ix86_expand_vec_perm_builtin_1 (&d))
33647 return d.target;
33649 /* For compiler generated permutations, we should never got here, because
33650 the compiler should also be checking the ok hook. But since this is a
33651 builtin the user has access too, so don't abort. */
33652 switch (d.nelt)
33654 case 2:
33655 sorry ("vector permutation (%d %d)", d.perm[0], d.perm[1]);
33656 break;
33657 case 4:
33658 sorry ("vector permutation (%d %d %d %d)",
33659 d.perm[0], d.perm[1], d.perm[2], d.perm[3]);
33660 break;
33661 case 8:
33662 sorry ("vector permutation (%d %d %d %d %d %d %d %d)",
33663 d.perm[0], d.perm[1], d.perm[2], d.perm[3],
33664 d.perm[4], d.perm[5], d.perm[6], d.perm[7]);
33665 break;
33666 case 16:
33667 sorry ("vector permutation "
33668 "(%d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d)",
33669 d.perm[0], d.perm[1], d.perm[2], d.perm[3],
33670 d.perm[4], d.perm[5], d.perm[6], d.perm[7],
33671 d.perm[8], d.perm[9], d.perm[10], d.perm[11],
33672 d.perm[12], d.perm[13], d.perm[14], d.perm[15]);
33673 break;
33674 default:
33675 gcc_unreachable ();
33677 exit_error:
33678 return CONST0_RTX (d.vmode);
33681 /* Implement targetm.vectorize.builtin_vec_perm_ok. */
33683 static bool
33684 ix86_vectorize_builtin_vec_perm_ok (tree vec_type, tree mask)
33686 struct expand_vec_perm_d d;
33687 int vec_mask;
33688 bool ret, one_vec;
33690 d.vmode = TYPE_MODE (vec_type);
33691 d.nelt = GET_MODE_NUNITS (d.vmode);
33692 d.testing_p = true;
33694 /* Given sufficient ISA support we can just return true here
33695 for selected vector modes. */
33696 if (GET_MODE_SIZE (d.vmode) == 16)
33698 /* All implementable with a single vpperm insn. */
33699 if (TARGET_XOP)
33700 return true;
33701 /* All implementable with 2 pshufb + 1 ior. */
33702 if (TARGET_SSSE3)
33703 return true;
33704 /* All implementable with shufpd or unpck[lh]pd. */
33705 if (d.nelt == 2)
33706 return true;
33709 vec_mask = extract_vec_perm_cst (&d, mask);
33711 /* This hook is cannot be called in response to something that the
33712 user does (unlike the builtin expander) so we shouldn't ever see
33713 an error generated from the extract. */
33714 gcc_assert (vec_mask > 0 && vec_mask <= 3);
33715 one_vec = (vec_mask != 3);
33717 /* Implementable with shufps or pshufd. */
33718 if (one_vec && (d.vmode == V4SFmode || d.vmode == V4SImode))
33719 return true;
33721 /* Otherwise we have to go through the motions and see if we can
33722 figure out how to generate the requested permutation. */
33723 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
33724 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
33725 if (!one_vec)
33726 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
33728 start_sequence ();
33729 ret = ix86_expand_vec_perm_builtin_1 (&d);
33730 end_sequence ();
33732 return ret;
33735 void
33736 ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
33738 struct expand_vec_perm_d d;
33739 unsigned i, nelt;
33741 d.target = targ;
33742 d.op0 = op0;
33743 d.op1 = op1;
33744 d.vmode = GET_MODE (targ);
33745 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
33746 d.testing_p = false;
33748 for (i = 0; i < nelt; ++i)
33749 d.perm[i] = i * 2 + odd;
33751 /* We'll either be able to implement the permutation directly... */
33752 if (expand_vec_perm_1 (&d))
33753 return;
33755 /* ... or we use the special-case patterns. */
33756 expand_vec_perm_even_odd_1 (&d, odd);
33759 /* This function returns the calling abi specific va_list type node.
33760 It returns the FNDECL specific va_list type. */
33762 static tree
33763 ix86_fn_abi_va_list (tree fndecl)
33765 if (!TARGET_64BIT)
33766 return va_list_type_node;
33767 gcc_assert (fndecl != NULL_TREE);
33769 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
33770 return ms_va_list_type_node;
33771 else
33772 return sysv_va_list_type_node;
33775 /* Returns the canonical va_list type specified by TYPE. If there
33776 is no valid TYPE provided, it return NULL_TREE. */
33778 static tree
33779 ix86_canonical_va_list_type (tree type)
33781 tree wtype, htype;
33783 /* Resolve references and pointers to va_list type. */
33784 if (TREE_CODE (type) == MEM_REF)
33785 type = TREE_TYPE (type);
33786 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
33787 type = TREE_TYPE (type);
33788 else if (POINTER_TYPE_P (type) && TREE_CODE (TREE_TYPE (type)) == ARRAY_TYPE)
33789 type = TREE_TYPE (type);
33791 if (TARGET_64BIT && va_list_type_node != NULL_TREE)
33793 wtype = va_list_type_node;
33794 gcc_assert (wtype != NULL_TREE);
33795 htype = type;
33796 if (TREE_CODE (wtype) == ARRAY_TYPE)
33798 /* If va_list is an array type, the argument may have decayed
33799 to a pointer type, e.g. by being passed to another function.
33800 In that case, unwrap both types so that we can compare the
33801 underlying records. */
33802 if (TREE_CODE (htype) == ARRAY_TYPE
33803 || POINTER_TYPE_P (htype))
33805 wtype = TREE_TYPE (wtype);
33806 htype = TREE_TYPE (htype);
33809 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
33810 return va_list_type_node;
33811 wtype = sysv_va_list_type_node;
33812 gcc_assert (wtype != NULL_TREE);
33813 htype = type;
33814 if (TREE_CODE (wtype) == ARRAY_TYPE)
33816 /* If va_list is an array type, the argument may have decayed
33817 to a pointer type, e.g. by being passed to another function.
33818 In that case, unwrap both types so that we can compare the
33819 underlying records. */
33820 if (TREE_CODE (htype) == ARRAY_TYPE
33821 || POINTER_TYPE_P (htype))
33823 wtype = TREE_TYPE (wtype);
33824 htype = TREE_TYPE (htype);
33827 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
33828 return sysv_va_list_type_node;
33829 wtype = ms_va_list_type_node;
33830 gcc_assert (wtype != NULL_TREE);
33831 htype = type;
33832 if (TREE_CODE (wtype) == ARRAY_TYPE)
33834 /* If va_list is an array type, the argument may have decayed
33835 to a pointer type, e.g. by being passed to another function.
33836 In that case, unwrap both types so that we can compare the
33837 underlying records. */
33838 if (TREE_CODE (htype) == ARRAY_TYPE
33839 || POINTER_TYPE_P (htype))
33841 wtype = TREE_TYPE (wtype);
33842 htype = TREE_TYPE (htype);
33845 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
33846 return ms_va_list_type_node;
33847 return NULL_TREE;
33849 return std_canonical_va_list_type (type);
33852 /* Iterate through the target-specific builtin types for va_list.
33853 IDX denotes the iterator, *PTREE is set to the result type of
33854 the va_list builtin, and *PNAME to its internal type.
33855 Returns zero if there is no element for this index, otherwise
33856 IDX should be increased upon the next call.
33857 Note, do not iterate a base builtin's name like __builtin_va_list.
33858 Used from c_common_nodes_and_builtins. */
33860 static int
33861 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
33863 if (TARGET_64BIT)
33865 switch (idx)
33867 default:
33868 break;
33870 case 0:
33871 *ptree = ms_va_list_type_node;
33872 *pname = "__builtin_ms_va_list";
33873 return 1;
33875 case 1:
33876 *ptree = sysv_va_list_type_node;
33877 *pname = "__builtin_sysv_va_list";
33878 return 1;
33882 return 0;
33885 #undef TARGET_SCHED_DISPATCH
33886 #define TARGET_SCHED_DISPATCH has_dispatch
33887 #undef TARGET_SCHED_DISPATCH_DO
33888 #define TARGET_SCHED_DISPATCH_DO do_dispatch
33890 /* The size of the dispatch window is the total number of bytes of
33891 object code allowed in a window. */
33892 #define DISPATCH_WINDOW_SIZE 16
33894 /* Number of dispatch windows considered for scheduling. */
33895 #define MAX_DISPATCH_WINDOWS 3
33897 /* Maximum number of instructions in a window. */
33898 #define MAX_INSN 4
33900 /* Maximum number of immediate operands in a window. */
33901 #define MAX_IMM 4
33903 /* Maximum number of immediate bits allowed in a window. */
33904 #define MAX_IMM_SIZE 128
33906 /* Maximum number of 32 bit immediates allowed in a window. */
33907 #define MAX_IMM_32 4
33909 /* Maximum number of 64 bit immediates allowed in a window. */
33910 #define MAX_IMM_64 2
33912 /* Maximum total of loads or prefetches allowed in a window. */
33913 #define MAX_LOAD 2
33915 /* Maximum total of stores allowed in a window. */
33916 #define MAX_STORE 1
33918 #undef BIG
33919 #define BIG 100
33922 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
33923 enum dispatch_group {
33924 disp_no_group = 0,
33925 disp_load,
33926 disp_store,
33927 disp_load_store,
33928 disp_prefetch,
33929 disp_imm,
33930 disp_imm_32,
33931 disp_imm_64,
33932 disp_branch,
33933 disp_cmp,
33934 disp_jcc,
33935 disp_last
33938 /* Number of allowable groups in a dispatch window. It is an array
33939 indexed by dispatch_group enum. 100 is used as a big number,
33940 because the number of these kind of operations does not have any
33941 effect in dispatch window, but we need them for other reasons in
33942 the table. */
33943 static unsigned int num_allowable_groups[disp_last] = {
33944 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG, BIG
33947 char group_name[disp_last + 1][16] = {
33948 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
33949 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
33950 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
33953 /* Instruction path. */
33954 enum insn_path {
33955 no_path = 0,
33956 path_single, /* Single micro op. */
33957 path_double, /* Double micro op. */
33958 path_multi, /* Instructions with more than 2 micro op.. */
33959 last_path
33962 /* sched_insn_info defines a window to the instructions scheduled in
33963 the basic block. It contains a pointer to the insn_info table and
33964 the instruction scheduled.
33966 Windows are allocated for each basic block and are linked
33967 together. */
33968 typedef struct sched_insn_info_s {
33969 rtx insn;
33970 enum dispatch_group group;
33971 enum insn_path path;
33972 int byte_len;
33973 int imm_bytes;
33974 } sched_insn_info;
33976 /* Linked list of dispatch windows. This is a two way list of
33977 dispatch windows of a basic block. It contains information about
33978 the number of uops in the window and the total number of
33979 instructions and of bytes in the object code for this dispatch
33980 window. */
33981 typedef struct dispatch_windows_s {
33982 int num_insn; /* Number of insn in the window. */
33983 int num_uops; /* Number of uops in the window. */
33984 int window_size; /* Number of bytes in the window. */
33985 int window_num; /* Window number between 0 or 1. */
33986 int num_imm; /* Number of immediates in an insn. */
33987 int num_imm_32; /* Number of 32 bit immediates in an insn. */
33988 int num_imm_64; /* Number of 64 bit immediates in an insn. */
33989 int imm_size; /* Total immediates in the window. */
33990 int num_loads; /* Total memory loads in the window. */
33991 int num_stores; /* Total memory stores in the window. */
33992 int violation; /* Violation exists in window. */
33993 sched_insn_info *window; /* Pointer to the window. */
33994 struct dispatch_windows_s *next;
33995 struct dispatch_windows_s *prev;
33996 } dispatch_windows;
33998 /* Immediate valuse used in an insn. */
33999 typedef struct imm_info_s
34001 int imm;
34002 int imm32;
34003 int imm64;
34004 } imm_info;
34006 static dispatch_windows *dispatch_window_list;
34007 static dispatch_windows *dispatch_window_list1;
34009 /* Get dispatch group of insn. */
34011 static enum dispatch_group
34012 get_mem_group (rtx insn)
34014 enum attr_memory memory;
34016 if (INSN_CODE (insn) < 0)
34017 return disp_no_group;
34018 memory = get_attr_memory (insn);
34019 if (memory == MEMORY_STORE)
34020 return disp_store;
34022 if (memory == MEMORY_LOAD)
34023 return disp_load;
34025 if (memory == MEMORY_BOTH)
34026 return disp_load_store;
34028 return disp_no_group;
34031 /* Return true if insn is a compare instruction. */
34033 static bool
34034 is_cmp (rtx insn)
34036 enum attr_type type;
34038 type = get_attr_type (insn);
34039 return (type == TYPE_TEST
34040 || type == TYPE_ICMP
34041 || type == TYPE_FCMP
34042 || GET_CODE (PATTERN (insn)) == COMPARE);
34045 /* Return true if a dispatch violation encountered. */
34047 static bool
34048 dispatch_violation (void)
34050 if (dispatch_window_list->next)
34051 return dispatch_window_list->next->violation;
34052 return dispatch_window_list->violation;
34055 /* Return true if insn is a branch instruction. */
34057 static bool
34058 is_branch (rtx insn)
34060 return (CALL_P (insn) || JUMP_P (insn));
34063 /* Return true if insn is a prefetch instruction. */
34065 static bool
34066 is_prefetch (rtx insn)
34068 return NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == PREFETCH;
34071 /* This function initializes a dispatch window and the list container holding a
34072 pointer to the window. */
34074 static void
34075 init_window (int window_num)
34077 int i;
34078 dispatch_windows *new_list;
34080 if (window_num == 0)
34081 new_list = dispatch_window_list;
34082 else
34083 new_list = dispatch_window_list1;
34085 new_list->num_insn = 0;
34086 new_list->num_uops = 0;
34087 new_list->window_size = 0;
34088 new_list->next = NULL;
34089 new_list->prev = NULL;
34090 new_list->window_num = window_num;
34091 new_list->num_imm = 0;
34092 new_list->num_imm_32 = 0;
34093 new_list->num_imm_64 = 0;
34094 new_list->imm_size = 0;
34095 new_list->num_loads = 0;
34096 new_list->num_stores = 0;
34097 new_list->violation = false;
34099 for (i = 0; i < MAX_INSN; i++)
34101 new_list->window[i].insn = NULL;
34102 new_list->window[i].group = disp_no_group;
34103 new_list->window[i].path = no_path;
34104 new_list->window[i].byte_len = 0;
34105 new_list->window[i].imm_bytes = 0;
34107 return;
34110 /* This function allocates and initializes a dispatch window and the
34111 list container holding a pointer to the window. */
34113 static dispatch_windows *
34114 allocate_window (void)
34116 dispatch_windows *new_list = XNEW (struct dispatch_windows_s);
34117 new_list->window = XNEWVEC (struct sched_insn_info_s, MAX_INSN + 1);
34119 return new_list;
34122 /* This routine initializes the dispatch scheduling information. It
34123 initiates building dispatch scheduler tables and constructs the
34124 first dispatch window. */
34126 static void
34127 init_dispatch_sched (void)
34129 /* Allocate a dispatch list and a window. */
34130 dispatch_window_list = allocate_window ();
34131 dispatch_window_list1 = allocate_window ();
34132 init_window (0);
34133 init_window (1);
34136 /* This function returns true if a branch is detected. End of a basic block
34137 does not have to be a branch, but here we assume only branches end a
34138 window. */
34140 static bool
34141 is_end_basic_block (enum dispatch_group group)
34143 return group == disp_branch;
34146 /* This function is called when the end of a window processing is reached. */
34148 static void
34149 process_end_window (void)
34151 gcc_assert (dispatch_window_list->num_insn <= MAX_INSN);
34152 if (dispatch_window_list->next)
34154 gcc_assert (dispatch_window_list1->num_insn <= MAX_INSN);
34155 gcc_assert (dispatch_window_list->window_size
34156 + dispatch_window_list1->window_size <= 48);
34157 init_window (1);
34159 init_window (0);
34162 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
34163 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
34164 for 48 bytes of instructions. Note that these windows are not dispatch
34165 windows that their sizes are DISPATCH_WINDOW_SIZE. */
34167 static dispatch_windows *
34168 allocate_next_window (int window_num)
34170 if (window_num == 0)
34172 if (dispatch_window_list->next)
34173 init_window (1);
34174 init_window (0);
34175 return dispatch_window_list;
34178 dispatch_window_list->next = dispatch_window_list1;
34179 dispatch_window_list1->prev = dispatch_window_list;
34181 return dispatch_window_list1;
34184 /* Increment the number of immediate operands of an instruction. */
34186 static int
34187 find_constant_1 (rtx *in_rtx, imm_info *imm_values)
34189 if (*in_rtx == 0)
34190 return 0;
34192 switch ( GET_CODE (*in_rtx))
34194 case CONST:
34195 case SYMBOL_REF:
34196 case CONST_INT:
34197 (imm_values->imm)++;
34198 if (x86_64_immediate_operand (*in_rtx, SImode))
34199 (imm_values->imm32)++;
34200 else
34201 (imm_values->imm64)++;
34202 break;
34204 case CONST_DOUBLE:
34205 (imm_values->imm)++;
34206 (imm_values->imm64)++;
34207 break;
34209 case CODE_LABEL:
34210 if (LABEL_KIND (*in_rtx) == LABEL_NORMAL)
34212 (imm_values->imm)++;
34213 (imm_values->imm32)++;
34215 break;
34217 default:
34218 break;
34221 return 0;
34224 /* Compute number of immediate operands of an instruction. */
34226 static void
34227 find_constant (rtx in_rtx, imm_info *imm_values)
34229 for_each_rtx (INSN_P (in_rtx) ? &PATTERN (in_rtx) : &in_rtx,
34230 (rtx_function) find_constant_1, (void *) imm_values);
34233 /* Return total size of immediate operands of an instruction along with number
34234 of corresponding immediate-operands. It initializes its parameters to zero
34235 befor calling FIND_CONSTANT.
34236 INSN is the input instruction. IMM is the total of immediates.
34237 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
34238 bit immediates. */
34240 static int
34241 get_num_immediates (rtx insn, int *imm, int *imm32, int *imm64)
34243 imm_info imm_values = {0, 0, 0};
34245 find_constant (insn, &imm_values);
34246 *imm = imm_values.imm;
34247 *imm32 = imm_values.imm32;
34248 *imm64 = imm_values.imm64;
34249 return imm_values.imm32 * 4 + imm_values.imm64 * 8;
34252 /* This function indicates if an operand of an instruction is an
34253 immediate. */
34255 static bool
34256 has_immediate (rtx insn)
34258 int num_imm_operand;
34259 int num_imm32_operand;
34260 int num_imm64_operand;
34262 if (insn)
34263 return get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
34264 &num_imm64_operand);
34265 return false;
34268 /* Return single or double path for instructions. */
34270 static enum insn_path
34271 get_insn_path (rtx insn)
34273 enum attr_amdfam10_decode path = get_attr_amdfam10_decode (insn);
34275 if ((int)path == 0)
34276 return path_single;
34278 if ((int)path == 1)
34279 return path_double;
34281 return path_multi;
34284 /* Return insn dispatch group. */
34286 static enum dispatch_group
34287 get_insn_group (rtx insn)
34289 enum dispatch_group group = get_mem_group (insn);
34290 if (group)
34291 return group;
34293 if (is_branch (insn))
34294 return disp_branch;
34296 if (is_cmp (insn))
34297 return disp_cmp;
34299 if (has_immediate (insn))
34300 return disp_imm;
34302 if (is_prefetch (insn))
34303 return disp_prefetch;
34305 return disp_no_group;
34308 /* Count number of GROUP restricted instructions in a dispatch
34309 window WINDOW_LIST. */
34311 static int
34312 count_num_restricted (rtx insn, dispatch_windows *window_list)
34314 enum dispatch_group group = get_insn_group (insn);
34315 int imm_size;
34316 int num_imm_operand;
34317 int num_imm32_operand;
34318 int num_imm64_operand;
34320 if (group == disp_no_group)
34321 return 0;
34323 if (group == disp_imm)
34325 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
34326 &num_imm64_operand);
34327 if (window_list->imm_size + imm_size > MAX_IMM_SIZE
34328 || num_imm_operand + window_list->num_imm > MAX_IMM
34329 || (num_imm32_operand > 0
34330 && (window_list->num_imm_32 + num_imm32_operand > MAX_IMM_32
34331 || window_list->num_imm_64 * 2 + num_imm32_operand > MAX_IMM_32))
34332 || (num_imm64_operand > 0
34333 && (window_list->num_imm_64 + num_imm64_operand > MAX_IMM_64
34334 || window_list->num_imm_32 + num_imm64_operand * 2 > MAX_IMM_32))
34335 || (window_list->imm_size + imm_size == MAX_IMM_SIZE
34336 && num_imm64_operand > 0
34337 && ((window_list->num_imm_64 > 0
34338 && window_list->num_insn >= 2)
34339 || window_list->num_insn >= 3)))
34340 return BIG;
34342 return 1;
34345 if ((group == disp_load_store
34346 && (window_list->num_loads >= MAX_LOAD
34347 || window_list->num_stores >= MAX_STORE))
34348 || ((group == disp_load
34349 || group == disp_prefetch)
34350 && window_list->num_loads >= MAX_LOAD)
34351 || (group == disp_store
34352 && window_list->num_stores >= MAX_STORE))
34353 return BIG;
34355 return 1;
34358 /* This function returns true if insn satisfies dispatch rules on the
34359 last window scheduled. */
34361 static bool
34362 fits_dispatch_window (rtx insn)
34364 dispatch_windows *window_list = dispatch_window_list;
34365 dispatch_windows *window_list_next = dispatch_window_list->next;
34366 unsigned int num_restrict;
34367 enum dispatch_group group = get_insn_group (insn);
34368 enum insn_path path = get_insn_path (insn);
34369 int sum;
34371 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
34372 instructions should be given the lowest priority in the
34373 scheduling process in Haifa scheduler to make sure they will be
34374 scheduled in the same dispatch window as the refrence to them. */
34375 if (group == disp_jcc || group == disp_cmp)
34376 return false;
34378 /* Check nonrestricted. */
34379 if (group == disp_no_group || group == disp_branch)
34380 return true;
34382 /* Get last dispatch window. */
34383 if (window_list_next)
34384 window_list = window_list_next;
34386 if (window_list->window_num == 1)
34388 sum = window_list->prev->window_size + window_list->window_size;
34390 if (sum == 32
34391 || (min_insn_size (insn) + sum) >= 48)
34392 /* Window 1 is full. Go for next window. */
34393 return true;
34396 num_restrict = count_num_restricted (insn, window_list);
34398 if (num_restrict > num_allowable_groups[group])
34399 return false;
34401 /* See if it fits in the first window. */
34402 if (window_list->window_num == 0)
34404 /* The first widow should have only single and double path
34405 uops. */
34406 if (path == path_double
34407 && (window_list->num_uops + 2) > MAX_INSN)
34408 return false;
34409 else if (path != path_single)
34410 return false;
34412 return true;
34415 /* Add an instruction INSN with NUM_UOPS micro-operations to the
34416 dispatch window WINDOW_LIST. */
34418 static void
34419 add_insn_window (rtx insn, dispatch_windows *window_list, int num_uops)
34421 int byte_len = min_insn_size (insn);
34422 int num_insn = window_list->num_insn;
34423 int imm_size;
34424 sched_insn_info *window = window_list->window;
34425 enum dispatch_group group = get_insn_group (insn);
34426 enum insn_path path = get_insn_path (insn);
34427 int num_imm_operand;
34428 int num_imm32_operand;
34429 int num_imm64_operand;
34431 if (!window_list->violation && group != disp_cmp
34432 && !fits_dispatch_window (insn))
34433 window_list->violation = true;
34435 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
34436 &num_imm64_operand);
34438 /* Initialize window with new instruction. */
34439 window[num_insn].insn = insn;
34440 window[num_insn].byte_len = byte_len;
34441 window[num_insn].group = group;
34442 window[num_insn].path = path;
34443 window[num_insn].imm_bytes = imm_size;
34445 window_list->window_size += byte_len;
34446 window_list->num_insn = num_insn + 1;
34447 window_list->num_uops = window_list->num_uops + num_uops;
34448 window_list->imm_size += imm_size;
34449 window_list->num_imm += num_imm_operand;
34450 window_list->num_imm_32 += num_imm32_operand;
34451 window_list->num_imm_64 += num_imm64_operand;
34453 if (group == disp_store)
34454 window_list->num_stores += 1;
34455 else if (group == disp_load
34456 || group == disp_prefetch)
34457 window_list->num_loads += 1;
34458 else if (group == disp_load_store)
34460 window_list->num_stores += 1;
34461 window_list->num_loads += 1;
34465 /* Adds a scheduled instruction, INSN, to the current dispatch window.
34466 If the total bytes of instructions or the number of instructions in
34467 the window exceed allowable, it allocates a new window. */
34469 static void
34470 add_to_dispatch_window (rtx insn)
34472 int byte_len;
34473 dispatch_windows *window_list;
34474 dispatch_windows *next_list;
34475 dispatch_windows *window0_list;
34476 enum insn_path path;
34477 enum dispatch_group insn_group;
34478 bool insn_fits;
34479 int num_insn;
34480 int num_uops;
34481 int window_num;
34482 int insn_num_uops;
34483 int sum;
34485 if (INSN_CODE (insn) < 0)
34486 return;
34488 byte_len = min_insn_size (insn);
34489 window_list = dispatch_window_list;
34490 next_list = window_list->next;
34491 path = get_insn_path (insn);
34492 insn_group = get_insn_group (insn);
34494 /* Get the last dispatch window. */
34495 if (next_list)
34496 window_list = dispatch_window_list->next;
34498 if (path == path_single)
34499 insn_num_uops = 1;
34500 else if (path == path_double)
34501 insn_num_uops = 2;
34502 else
34503 insn_num_uops = (int) path;
34505 /* If current window is full, get a new window.
34506 Window number zero is full, if MAX_INSN uops are scheduled in it.
34507 Window number one is full, if window zero's bytes plus window
34508 one's bytes is 32, or if the bytes of the new instruction added
34509 to the total makes it greater than 48, or it has already MAX_INSN
34510 instructions in it. */
34511 num_insn = window_list->num_insn;
34512 num_uops = window_list->num_uops;
34513 window_num = window_list->window_num;
34514 insn_fits = fits_dispatch_window (insn);
34516 if (num_insn >= MAX_INSN
34517 || num_uops + insn_num_uops > MAX_INSN
34518 || !(insn_fits))
34520 window_num = ~window_num & 1;
34521 window_list = allocate_next_window (window_num);
34524 if (window_num == 0)
34526 add_insn_window (insn, window_list, insn_num_uops);
34527 if (window_list->num_insn >= MAX_INSN
34528 && insn_group == disp_branch)
34530 process_end_window ();
34531 return;
34534 else if (window_num == 1)
34536 window0_list = window_list->prev;
34537 sum = window0_list->window_size + window_list->window_size;
34538 if (sum == 32
34539 || (byte_len + sum) >= 48)
34541 process_end_window ();
34542 window_list = dispatch_window_list;
34545 add_insn_window (insn, window_list, insn_num_uops);
34547 else
34548 gcc_unreachable ();
34550 if (is_end_basic_block (insn_group))
34552 /* End of basic block is reached do end-basic-block process. */
34553 process_end_window ();
34554 return;
34558 /* Print the dispatch window, WINDOW_NUM, to FILE. */
34560 DEBUG_FUNCTION static void
34561 debug_dispatch_window_file (FILE *file, int window_num)
34563 dispatch_windows *list;
34564 int i;
34566 if (window_num == 0)
34567 list = dispatch_window_list;
34568 else
34569 list = dispatch_window_list1;
34571 fprintf (file, "Window #%d:\n", list->window_num);
34572 fprintf (file, " num_insn = %d, num_uops = %d, window_size = %d\n",
34573 list->num_insn, list->num_uops, list->window_size);
34574 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
34575 list->num_imm, list->num_imm_32, list->num_imm_64, list->imm_size);
34577 fprintf (file, " num_loads = %d, num_stores = %d\n", list->num_loads,
34578 list->num_stores);
34579 fprintf (file, " insn info:\n");
34581 for (i = 0; i < MAX_INSN; i++)
34583 if (!list->window[i].insn)
34584 break;
34585 fprintf (file, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
34586 i, group_name[list->window[i].group],
34587 i, (void *)list->window[i].insn,
34588 i, list->window[i].path,
34589 i, list->window[i].byte_len,
34590 i, list->window[i].imm_bytes);
34594 /* Print to stdout a dispatch window. */
34596 DEBUG_FUNCTION void
34597 debug_dispatch_window (int window_num)
34599 debug_dispatch_window_file (stdout, window_num);
34602 /* Print INSN dispatch information to FILE. */
34604 DEBUG_FUNCTION static void
34605 debug_insn_dispatch_info_file (FILE *file, rtx insn)
34607 int byte_len;
34608 enum insn_path path;
34609 enum dispatch_group group;
34610 int imm_size;
34611 int num_imm_operand;
34612 int num_imm32_operand;
34613 int num_imm64_operand;
34615 if (INSN_CODE (insn) < 0)
34616 return;
34618 byte_len = min_insn_size (insn);
34619 path = get_insn_path (insn);
34620 group = get_insn_group (insn);
34621 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
34622 &num_imm64_operand);
34624 fprintf (file, " insn info:\n");
34625 fprintf (file, " group = %s, path = %d, byte_len = %d\n",
34626 group_name[group], path, byte_len);
34627 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
34628 num_imm_operand, num_imm32_operand, num_imm64_operand, imm_size);
34631 /* Print to STDERR the status of the ready list with respect to
34632 dispatch windows. */
34634 DEBUG_FUNCTION void
34635 debug_ready_dispatch (void)
34637 int i;
34638 int no_ready = number_in_ready ();
34640 fprintf (stdout, "Number of ready: %d\n", no_ready);
34642 for (i = 0; i < no_ready; i++)
34643 debug_insn_dispatch_info_file (stdout, get_ready_element (i));
34646 /* This routine is the driver of the dispatch scheduler. */
34648 static void
34649 do_dispatch (rtx insn, int mode)
34651 if (mode == DISPATCH_INIT)
34652 init_dispatch_sched ();
34653 else if (mode == ADD_TO_DISPATCH_WINDOW)
34654 add_to_dispatch_window (insn);
34657 /* Return TRUE if Dispatch Scheduling is supported. */
34659 static bool
34660 has_dispatch (rtx insn, int action)
34662 if (ix86_tune == PROCESSOR_BDVER1 && flag_dispatch_scheduler)
34663 switch (action)
34665 default:
34666 return false;
34668 case IS_DISPATCH_ON:
34669 return true;
34670 break;
34672 case IS_CMP:
34673 return is_cmp (insn);
34675 case DISPATCH_VIOLATION:
34676 return dispatch_violation ();
34678 case FITS_DISPATCH_WINDOW:
34679 return fits_dispatch_window (insn);
34682 return false;
34685 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
34686 place emms and femms instructions. */
34688 static enum machine_mode
34689 ix86_preferred_simd_mode (enum machine_mode mode)
34691 /* Disable double precision vectorizer if needed. */
34692 if (mode == DFmode && !TARGET_VECTORIZE_DOUBLE)
34693 return word_mode;
34695 if (!TARGET_AVX && !TARGET_SSE)
34696 return word_mode;
34698 switch (mode)
34700 case SFmode:
34701 return TARGET_AVX ? V8SFmode : V4SFmode;
34702 case DFmode:
34703 return TARGET_AVX ? V4DFmode : V2DFmode;
34704 case DImode:
34705 return V2DImode;
34706 case SImode:
34707 return V4SImode;
34708 case HImode:
34709 return V8HImode;
34710 case QImode:
34711 return V16QImode;
34713 default:;
34716 return word_mode;
34719 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
34720 vectors. */
34722 static unsigned int
34723 ix86_autovectorize_vector_sizes (void)
34725 return TARGET_AVX ? 32 | 16 : 0;
34728 /* Initialize the GCC target structure. */
34729 #undef TARGET_RETURN_IN_MEMORY
34730 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
34732 #undef TARGET_LEGITIMIZE_ADDRESS
34733 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
34735 #undef TARGET_ATTRIBUTE_TABLE
34736 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
34737 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
34738 # undef TARGET_MERGE_DECL_ATTRIBUTES
34739 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
34740 #endif
34742 #undef TARGET_COMP_TYPE_ATTRIBUTES
34743 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
34745 #undef TARGET_INIT_BUILTINS
34746 #define TARGET_INIT_BUILTINS ix86_init_builtins
34747 #undef TARGET_BUILTIN_DECL
34748 #define TARGET_BUILTIN_DECL ix86_builtin_decl
34749 #undef TARGET_EXPAND_BUILTIN
34750 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
34752 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
34753 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
34754 ix86_builtin_vectorized_function
34756 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
34757 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_vectorize_builtin_conversion
34759 #undef TARGET_BUILTIN_RECIPROCAL
34760 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
34762 #undef TARGET_ASM_FUNCTION_EPILOGUE
34763 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
34765 #undef TARGET_ENCODE_SECTION_INFO
34766 #ifndef SUBTARGET_ENCODE_SECTION_INFO
34767 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
34768 #else
34769 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
34770 #endif
34772 #undef TARGET_ASM_OPEN_PAREN
34773 #define TARGET_ASM_OPEN_PAREN ""
34774 #undef TARGET_ASM_CLOSE_PAREN
34775 #define TARGET_ASM_CLOSE_PAREN ""
34777 #undef TARGET_ASM_BYTE_OP
34778 #define TARGET_ASM_BYTE_OP ASM_BYTE
34780 #undef TARGET_ASM_ALIGNED_HI_OP
34781 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
34782 #undef TARGET_ASM_ALIGNED_SI_OP
34783 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
34784 #ifdef ASM_QUAD
34785 #undef TARGET_ASM_ALIGNED_DI_OP
34786 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
34787 #endif
34789 #undef TARGET_PROFILE_BEFORE_PROLOGUE
34790 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
34792 #undef TARGET_ASM_UNALIGNED_HI_OP
34793 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
34794 #undef TARGET_ASM_UNALIGNED_SI_OP
34795 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
34796 #undef TARGET_ASM_UNALIGNED_DI_OP
34797 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
34799 #undef TARGET_PRINT_OPERAND
34800 #define TARGET_PRINT_OPERAND ix86_print_operand
34801 #undef TARGET_PRINT_OPERAND_ADDRESS
34802 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
34803 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
34804 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
34805 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
34806 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
34808 #undef TARGET_SCHED_INIT_GLOBAL
34809 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
34810 #undef TARGET_SCHED_ADJUST_COST
34811 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
34812 #undef TARGET_SCHED_ISSUE_RATE
34813 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
34814 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
34815 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
34816 ia32_multipass_dfa_lookahead
34818 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
34819 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
34821 #ifdef HAVE_AS_TLS
34822 #undef TARGET_HAVE_TLS
34823 #define TARGET_HAVE_TLS true
34824 #endif
34825 #undef TARGET_CANNOT_FORCE_CONST_MEM
34826 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
34827 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
34828 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
34830 #undef TARGET_DELEGITIMIZE_ADDRESS
34831 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
34833 #undef TARGET_MS_BITFIELD_LAYOUT_P
34834 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
34836 #if TARGET_MACHO
34837 #undef TARGET_BINDS_LOCAL_P
34838 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
34839 #endif
34840 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
34841 #undef TARGET_BINDS_LOCAL_P
34842 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
34843 #endif
34845 #undef TARGET_ASM_OUTPUT_MI_THUNK
34846 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
34847 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
34848 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
34850 #undef TARGET_ASM_FILE_START
34851 #define TARGET_ASM_FILE_START x86_file_start
34853 #undef TARGET_DEFAULT_TARGET_FLAGS
34854 #define TARGET_DEFAULT_TARGET_FLAGS \
34855 (TARGET_DEFAULT \
34856 | TARGET_SUBTARGET_DEFAULT \
34857 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
34859 #undef TARGET_HANDLE_OPTION
34860 #define TARGET_HANDLE_OPTION ix86_handle_option
34862 #undef TARGET_OPTION_OVERRIDE
34863 #define TARGET_OPTION_OVERRIDE ix86_option_override
34864 #undef TARGET_OPTION_OPTIMIZATION_TABLE
34865 #define TARGET_OPTION_OPTIMIZATION_TABLE ix86_option_optimization_table
34866 #undef TARGET_OPTION_INIT_STRUCT
34867 #define TARGET_OPTION_INIT_STRUCT ix86_option_init_struct
34869 #undef TARGET_REGISTER_MOVE_COST
34870 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
34871 #undef TARGET_MEMORY_MOVE_COST
34872 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
34873 #undef TARGET_RTX_COSTS
34874 #define TARGET_RTX_COSTS ix86_rtx_costs
34875 #undef TARGET_ADDRESS_COST
34876 #define TARGET_ADDRESS_COST ix86_address_cost
34878 #undef TARGET_FIXED_CONDITION_CODE_REGS
34879 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
34880 #undef TARGET_CC_MODES_COMPATIBLE
34881 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
34883 #undef TARGET_MACHINE_DEPENDENT_REORG
34884 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
34886 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
34887 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
34889 #undef TARGET_BUILD_BUILTIN_VA_LIST
34890 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
34892 #undef TARGET_ENUM_VA_LIST_P
34893 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
34895 #undef TARGET_FN_ABI_VA_LIST
34896 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
34898 #undef TARGET_CANONICAL_VA_LIST_TYPE
34899 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
34901 #undef TARGET_EXPAND_BUILTIN_VA_START
34902 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
34904 #undef TARGET_MD_ASM_CLOBBERS
34905 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
34907 #undef TARGET_PROMOTE_PROTOTYPES
34908 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
34909 #undef TARGET_STRUCT_VALUE_RTX
34910 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
34911 #undef TARGET_SETUP_INCOMING_VARARGS
34912 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
34913 #undef TARGET_MUST_PASS_IN_STACK
34914 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
34915 #undef TARGET_FUNCTION_ARG_ADVANCE
34916 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
34917 #undef TARGET_FUNCTION_ARG
34918 #define TARGET_FUNCTION_ARG ix86_function_arg
34919 #undef TARGET_FUNCTION_ARG_BOUNDARY
34920 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
34921 #undef TARGET_PASS_BY_REFERENCE
34922 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
34923 #undef TARGET_INTERNAL_ARG_POINTER
34924 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
34925 #undef TARGET_UPDATE_STACK_BOUNDARY
34926 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
34927 #undef TARGET_GET_DRAP_RTX
34928 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
34929 #undef TARGET_STRICT_ARGUMENT_NAMING
34930 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
34931 #undef TARGET_STATIC_CHAIN
34932 #define TARGET_STATIC_CHAIN ix86_static_chain
34933 #undef TARGET_TRAMPOLINE_INIT
34934 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
34935 #undef TARGET_RETURN_POPS_ARGS
34936 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
34938 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
34939 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
34941 #undef TARGET_SCALAR_MODE_SUPPORTED_P
34942 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
34944 #undef TARGET_VECTOR_MODE_SUPPORTED_P
34945 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
34947 #undef TARGET_C_MODE_FOR_SUFFIX
34948 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
34950 #ifdef HAVE_AS_TLS
34951 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
34952 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
34953 #endif
34955 #ifdef SUBTARGET_INSERT_ATTRIBUTES
34956 #undef TARGET_INSERT_ATTRIBUTES
34957 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
34958 #endif
34960 #undef TARGET_MANGLE_TYPE
34961 #define TARGET_MANGLE_TYPE ix86_mangle_type
34963 #undef TARGET_STACK_PROTECT_FAIL
34964 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
34966 #undef TARGET_SUPPORTS_SPLIT_STACK
34967 #define TARGET_SUPPORTS_SPLIT_STACK ix86_supports_split_stack
34969 #undef TARGET_FUNCTION_VALUE
34970 #define TARGET_FUNCTION_VALUE ix86_function_value
34972 #undef TARGET_FUNCTION_VALUE_REGNO_P
34973 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
34975 #undef TARGET_SECONDARY_RELOAD
34976 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
34978 #undef TARGET_PREFERRED_RELOAD_CLASS
34979 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
34980 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
34981 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
34982 #undef TARGET_CLASS_LIKELY_SPILLED_P
34983 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
34985 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
34986 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
34987 ix86_builtin_vectorization_cost
34988 #undef TARGET_VECTORIZE_BUILTIN_VEC_PERM
34989 #define TARGET_VECTORIZE_BUILTIN_VEC_PERM \
34990 ix86_vectorize_builtin_vec_perm
34991 #undef TARGET_VECTORIZE_BUILTIN_VEC_PERM_OK
34992 #define TARGET_VECTORIZE_BUILTIN_VEC_PERM_OK \
34993 ix86_vectorize_builtin_vec_perm_ok
34994 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
34995 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
34996 ix86_preferred_simd_mode
34997 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
34998 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
34999 ix86_autovectorize_vector_sizes
35001 #undef TARGET_SET_CURRENT_FUNCTION
35002 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
35004 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
35005 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
35007 #undef TARGET_OPTION_SAVE
35008 #define TARGET_OPTION_SAVE ix86_function_specific_save
35010 #undef TARGET_OPTION_RESTORE
35011 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
35013 #undef TARGET_OPTION_PRINT
35014 #define TARGET_OPTION_PRINT ix86_function_specific_print
35016 #undef TARGET_CAN_INLINE_P
35017 #define TARGET_CAN_INLINE_P ix86_can_inline_p
35019 #undef TARGET_EXPAND_TO_RTL_HOOK
35020 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
35022 #undef TARGET_LEGITIMATE_ADDRESS_P
35023 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
35025 #undef TARGET_IRA_COVER_CLASSES
35026 #define TARGET_IRA_COVER_CLASSES i386_ira_cover_classes
35028 #undef TARGET_FRAME_POINTER_REQUIRED
35029 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
35031 #undef TARGET_CAN_ELIMINATE
35032 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
35034 #undef TARGET_EXTRA_LIVE_ON_ENTRY
35035 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
35037 #undef TARGET_ASM_CODE_END
35038 #define TARGET_ASM_CODE_END ix86_code_end
35040 #undef TARGET_CONDITIONAL_REGISTER_USAGE
35041 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
35043 struct gcc_target targetm = TARGET_INITIALIZER;
35045 #include "gt-i386.h"