Revert last patch.
[official-gcc.git] / gcc / config / i386 / i386.c
blob0a8b1f6f3108b850fe0f4dac8f3c06089e72eabd
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 92, 94-98, 1999 Free Software Foundation, Inc.
4 This file is part of GNU CC.
6 GNU CC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
11 GNU CC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GNU CC; see the file COPYING. If not, write to
18 the Free Software Foundation, 59 Temple Place - Suite 330,
19 Boston, MA 02111-1307, USA. */
21 #include <setjmp.h>
22 #include "config.h"
23 #include "system.h"
24 #include "rtl.h"
25 #include "tree.h"
26 #include "tm_p.h"
27 #include "regs.h"
28 #include "hard-reg-set.h"
29 #include "real.h"
30 #include "insn-config.h"
31 #include "conditions.h"
32 #include "insn-flags.h"
33 #include "output.h"
34 #include "insn-attr.h"
35 #include "flags.h"
36 #include "except.h"
37 #include "function.h"
38 #include "recog.h"
39 #include "expr.h"
40 #include "toplev.h"
41 #include "basic-block.h"
42 #include "ggc.h"
44 #ifdef EXTRA_CONSTRAINT
45 /* If EXTRA_CONSTRAINT is defined, then the 'S'
46 constraint in REG_CLASS_FROM_LETTER will no longer work, and various
47 asm statements that need 'S' for class SIREG will break. */
48 error EXTRA_CONSTRAINT conflicts with S constraint letter
49 /* The previous line used to be #error, but some compilers barf
50 even if the conditional was untrue. */
51 #endif
53 #ifndef CHECK_STACK_LIMIT
54 #define CHECK_STACK_LIMIT -1
55 #endif
57 /* Processor costs (relative to an add) */
58 struct processor_costs i386_cost = { /* 386 specific costs */
59 1, /* cost of an add instruction */
60 1, /* cost of a lea instruction */
61 3, /* variable shift costs */
62 2, /* constant shift costs */
63 6, /* cost of starting a multiply */
64 1, /* cost of multiply per each bit set */
65 23, /* cost of a divide/mod */
66 15 /* "large" insn */
69 struct processor_costs i486_cost = { /* 486 specific costs */
70 1, /* cost of an add instruction */
71 1, /* cost of a lea instruction */
72 3, /* variable shift costs */
73 2, /* constant shift costs */
74 12, /* cost of starting a multiply */
75 1, /* cost of multiply per each bit set */
76 40, /* cost of a divide/mod */
77 15 /* "large" insn */
80 struct processor_costs pentium_cost = {
81 1, /* cost of an add instruction */
82 1, /* cost of a lea instruction */
83 4, /* variable shift costs */
84 1, /* constant shift costs */
85 11, /* cost of starting a multiply */
86 0, /* cost of multiply per each bit set */
87 25, /* cost of a divide/mod */
88 8 /* "large" insn */
91 struct processor_costs pentiumpro_cost = {
92 1, /* cost of an add instruction */
93 1, /* cost of a lea instruction */
94 1, /* variable shift costs */
95 1, /* constant shift costs */
96 1, /* cost of starting a multiply */
97 0, /* cost of multiply per each bit set */
98 17, /* cost of a divide/mod */
99 8 /* "large" insn */
102 struct processor_costs k6_cost = {
103 1, /* cost of an add instruction */
104 2, /* cost of a lea instruction */
105 1, /* variable shift costs */
106 1, /* constant shift costs */
107 3, /* cost of starting a multiply */
108 0, /* cost of multiply per each bit set */
109 18, /* cost of a divide/mod */
110 8 /* "large" insn */
113 struct processor_costs *ix86_cost = &pentium_cost;
115 /* Processor feature/optimization bitmasks. */
116 #define m_386 (1<<PROCESSOR_I386)
117 #define m_486 (1<<PROCESSOR_I486)
118 #define m_PENT (1<<PROCESSOR_PENTIUM)
119 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
120 #define m_K6 (1<<PROCESSOR_K6)
122 const int x86_use_leave = m_386 | m_K6;
123 const int x86_push_memory = m_386 | m_K6;
124 const int x86_zero_extend_with_and = m_486 | m_PENT;
125 const int x86_movx = 0 /* m_386 | m_PPRO | m_K6 */;
126 const int x86_double_with_add = ~m_386;
127 const int x86_use_bit_test = m_386;
128 const int x86_unroll_strlen = m_486 | m_PENT;
129 const int x86_use_q_reg = m_PENT | m_PPRO | m_K6;
130 const int x86_use_any_reg = m_486;
131 const int x86_cmove = m_PPRO;
132 const int x86_deep_branch = m_PPRO | m_K6;
133 const int x86_use_sahf = m_PPRO | m_K6;
134 const int x86_partial_reg_stall = m_PPRO;
135 const int x86_use_loop = m_K6;
136 const int x86_use_fiop = ~m_PPRO;
137 const int x86_use_mov0 = m_K6;
138 const int x86_use_cltd = ~(m_PENT | m_K6);
139 const int x86_read_modify_write = ~m_PENT;
140 const int x86_read_modify = ~(m_PENT | m_PPRO);
141 const int x86_split_long_moves = m_PPRO;
143 #define AT_BP(mode) (gen_rtx_MEM ((mode), frame_pointer_rtx))
145 const char * const hi_reg_name[] = HI_REGISTER_NAMES;
146 const char * const qi_reg_name[] = QI_REGISTER_NAMES;
147 const char * const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
149 /* Array of the smallest class containing reg number REGNO, indexed by
150 REGNO. Used by REGNO_REG_CLASS in i386.h. */
152 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
154 /* ax, dx, cx, bx */
155 AREG, DREG, CREG, BREG,
156 /* si, di, bp, sp */
157 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
158 /* FP registers */
159 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
160 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
161 /* arg pointer */
162 INDEX_REGS,
163 /* flags, fpsr */
164 NO_REGS, NO_REGS
167 /* Test and compare insns in i386.md store the information needed to
168 generate branch and scc insns here. */
170 struct rtx_def *ix86_compare_op0 = NULL_RTX;
171 struct rtx_def *ix86_compare_op1 = NULL_RTX;
173 #define MAX_386_STACK_LOCALS 2
175 /* Define the structure for the machine field in struct function. */
176 struct machine_function
178 rtx stack_locals[(int) MAX_MACHINE_MODE][MAX_386_STACK_LOCALS];
181 #define ix86_stack_locals (current_function->machine->stack_locals)
183 /* which cpu are we scheduling for */
184 enum processor_type ix86_cpu;
186 /* which instruction set architecture to use. */
187 int ix86_arch;
189 /* Strings to hold which cpu and instruction set architecture to use. */
190 const char *ix86_cpu_string; /* for -mcpu=<xxx> */
191 const char *ix86_arch_string; /* for -march=<xxx> */
193 /* Register allocation order */
194 const char *ix86_reg_alloc_order;
195 static char regs_allocated[FIRST_PSEUDO_REGISTER];
197 /* # of registers to use to pass arguments. */
198 const char *ix86_regparm_string;
200 /* ix86_regparm_string as a number */
201 int ix86_regparm;
203 /* Alignment to use for loops and jumps: */
205 /* Power of two alignment for loops. */
206 const char *ix86_align_loops_string;
208 /* Power of two alignment for non-loop jumps. */
209 const char *ix86_align_jumps_string;
211 /* Power of two alignment for stack boundary in bytes. */
212 const char *ix86_preferred_stack_boundary_string;
214 /* Preferred alignment for stack boundary in bits. */
215 int ix86_preferred_stack_boundary;
217 /* Values 1-5: see jump.c */
218 int ix86_branch_cost;
219 const char *ix86_branch_cost_string;
221 /* Power of two alignment for functions. */
222 int ix86_align_funcs;
223 const char *ix86_align_funcs_string;
225 /* Power of two alignment for loops. */
226 int ix86_align_loops;
228 /* Power of two alignment for non-loop jumps. */
229 int ix86_align_jumps;
231 static void output_pic_addr_const PROTO ((FILE *, rtx, int));
232 static void put_condition_code PROTO ((enum rtx_code, enum machine_mode,
233 int, int, FILE *));
234 static enum rtx_code unsigned_comparison PROTO ((enum rtx_code code));
235 static rtx ix86_expand_int_compare PROTO ((enum rtx_code, rtx, rtx));
236 static rtx ix86_expand_fp_compare PROTO ((enum rtx_code, rtx, rtx, int));
237 static rtx ix86_expand_compare PROTO ((enum rtx_code, int));
238 static rtx gen_push PROTO ((rtx));
239 static int memory_address_length PROTO ((rtx addr));
240 static int ix86_flags_dependant PROTO ((rtx, rtx, enum attr_type));
241 static int ix86_agi_dependant PROTO ((rtx, rtx, enum attr_type));
242 static int ix86_safe_length PROTO ((rtx));
243 static enum attr_memory ix86_safe_memory PROTO ((rtx));
244 static enum attr_pent_pair ix86_safe_pent_pair PROTO ((rtx));
245 static enum attr_ppro_uops ix86_safe_ppro_uops PROTO ((rtx));
246 static void ix86_dump_ppro_packet PROTO ((FILE *));
247 static void ix86_reorder_insn PROTO ((rtx *, rtx *));
248 static rtx * ix86_pent_find_pair PROTO ((rtx *, rtx *, enum attr_pent_pair,
249 rtx));
250 static void ix86_init_machine_status PROTO ((struct function *));
251 static void ix86_mark_machine_status PROTO ((struct function *));
253 struct ix86_address
255 rtx base, index, disp;
256 HOST_WIDE_INT scale;
259 static int ix86_decompose_address PARAMS ((rtx, struct ix86_address *));
261 /* Sometimes certain combinations of command options do not make
262 sense on a particular target machine. You can define a macro
263 `OVERRIDE_OPTIONS' to take account of this. This macro, if
264 defined, is executed once just after all the command options have
265 been parsed.
267 Don't use this macro to turn on various extra optimizations for
268 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
270 void
271 override_options ()
273 /* Comes from final.c -- no real reason to change it. */
274 #define MAX_CODE_ALIGN 16
276 static struct ptt
278 struct processor_costs *cost; /* Processor costs */
279 int target_enable; /* Target flags to enable. */
280 int target_disable; /* Target flags to disable. */
281 int align_loop; /* Default alignments. */
282 int align_jump;
283 int align_func;
284 int branch_cost;
286 const processor_target_table[PROCESSOR_max] =
288 {&i386_cost, 0, 0, 2, 2, 2, 1},
289 {&i486_cost, 0, 0, 4, 4, 4, 1},
290 {&pentium_cost, 0, 0, -4, -4, -4, 1},
291 {&pentiumpro_cost, 0, 0, 4, -4, 4, 1},
292 {&k6_cost, 0, 0, -5, -5, 4, 1}
295 static struct pta
297 char *name; /* processor name or nickname. */
298 enum processor_type processor;
300 const processor_alias_table[] =
302 {"i386", PROCESSOR_I386},
303 {"i486", PROCESSOR_I486},
304 {"i586", PROCESSOR_PENTIUM},
305 {"pentium", PROCESSOR_PENTIUM},
306 {"i686", PROCESSOR_PENTIUMPRO},
307 {"pentiumpro", PROCESSOR_PENTIUMPRO},
308 {"k6", PROCESSOR_K6},
311 int const pta_size = sizeof(processor_alias_table)/sizeof(struct pta);
313 #ifdef SUBTARGET_OVERRIDE_OPTIONS
314 SUBTARGET_OVERRIDE_OPTIONS;
315 #endif
317 ix86_arch = PROCESSOR_I386;
318 ix86_cpu = (enum processor_type) TARGET_CPU_DEFAULT;
320 if (ix86_arch_string != 0)
322 int i;
323 for (i = 0; i < pta_size; i++)
324 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
326 ix86_arch = processor_alias_table[i].processor;
327 /* Default cpu tuning to the architecture. */
328 ix86_cpu = ix86_arch;
329 break;
331 if (i == pta_size)
332 error ("bad value (%s) for -march= switch", ix86_arch_string);
335 if (ix86_cpu_string != 0)
337 int i;
338 for (i = 0; i < pta_size; i++)
339 if (! strcmp (ix86_cpu_string, processor_alias_table[i].name))
341 ix86_cpu = processor_alias_table[i].processor;
342 break;
344 if (i == pta_size)
345 error ("bad value (%s) for -mcpu= switch", ix86_cpu_string);
348 ix86_cost = processor_target_table[ix86_cpu].cost;
349 target_flags |= processor_target_table[ix86_cpu].target_enable;
350 target_flags &= ~processor_target_table[ix86_cpu].target_disable;
352 /* Arrange to set up i386_stack_locals for all functions. */
353 init_machine_status = ix86_init_machine_status;
354 mark_machine_status = ix86_mark_machine_status;
356 /* Validate registers in register allocation order. */
357 if (ix86_reg_alloc_order)
359 int i, ch;
360 for (i = 0; (ch = ix86_reg_alloc_order[i]) != '\0'; i++)
362 int regno = 0;
364 switch (ch)
366 case 'a': regno = 0; break;
367 case 'd': regno = 1; break;
368 case 'c': regno = 2; break;
369 case 'b': regno = 3; break;
370 case 'S': regno = 4; break;
371 case 'D': regno = 5; break;
372 case 'B': regno = 6; break;
374 default: fatal ("Register '%c' is unknown", ch);
377 if (regs_allocated[regno])
378 fatal ("Register '%c' already specified in allocation order", ch);
380 regs_allocated[regno] = 1;
384 /* Validate -mregparm= value. */
385 if (ix86_regparm_string)
387 ix86_regparm = atoi (ix86_regparm_string);
388 if (ix86_regparm < 0 || ix86_regparm > REGPARM_MAX)
389 fatal ("-mregparm=%d is not between 0 and %d",
390 ix86_regparm, REGPARM_MAX);
393 /* Validate -malign-loops= value, or provide default. */
394 ix86_align_loops = processor_target_table[ix86_cpu].align_loop;
395 if (ix86_align_loops_string)
397 ix86_align_loops = atoi (ix86_align_loops_string);
398 if (ix86_align_loops < 0 || ix86_align_loops > MAX_CODE_ALIGN)
399 fatal ("-malign-loops=%d is not between 0 and %d",
400 ix86_align_loops, MAX_CODE_ALIGN);
403 /* Validate -malign-jumps= value, or provide default. */
404 ix86_align_jumps = processor_target_table[ix86_cpu].align_jump;
405 if (ix86_align_jumps_string)
407 ix86_align_jumps = atoi (ix86_align_jumps_string);
408 if (ix86_align_jumps < 0 || ix86_align_jumps > MAX_CODE_ALIGN)
409 fatal ("-malign-jumps=%d is not between 0 and %d",
410 ix86_align_jumps, MAX_CODE_ALIGN);
413 /* Validate -malign-functions= value, or provide default. */
414 ix86_align_funcs = processor_target_table[ix86_cpu].align_func;
415 if (ix86_align_funcs_string)
417 ix86_align_funcs = atoi (ix86_align_funcs_string);
418 if (ix86_align_funcs < 0 || ix86_align_funcs > MAX_CODE_ALIGN)
419 fatal ("-malign-functions=%d is not between 0 and %d",
420 ix86_align_funcs, MAX_CODE_ALIGN);
423 /* Validate -mpreferred_stack_boundary= value, or provide default.
424 The default of 128 bits is for Pentium III's SSE __m128. */
425 ix86_preferred_stack_boundary = 128;
426 if (ix86_preferred_stack_boundary_string)
428 int i = atoi (ix86_preferred_stack_boundary_string);
429 if (i < 2 || i > 31)
430 fatal ("-mpreferred_stack_boundary=%d is not between 2 and 31", i);
431 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
434 /* Validate -mbranch-cost= value, or provide default. */
435 ix86_branch_cost = processor_target_table[ix86_cpu].branch_cost;
436 if (ix86_branch_cost_string)
438 ix86_branch_cost = atoi (ix86_branch_cost_string);
439 if (ix86_branch_cost < 0 || ix86_branch_cost > 5)
440 fatal ("-mbranch-cost=%d is not between 0 and 5",
441 ix86_branch_cost);
444 /* Keep nonleaf frame pointers. */
445 if (TARGET_OMIT_LEAF_FRAME_POINTER)
446 flag_omit_frame_pointer = 1;
448 /* If we're doing fast math, we don't care about comparison order
449 wrt NaNs. This lets us use a shorter comparison sequence. */
450 if (flag_fast_math)
451 target_flags &= ~MASK_IEEE_FP;
453 /* If we're planning on using `loop', use it. */
454 if (TARGET_USE_LOOP && optimize)
455 flag_branch_on_count_reg = 1;
458 /* A C statement (sans semicolon) to choose the order in which to
459 allocate hard registers for pseudo-registers local to a basic
460 block.
462 Store the desired register order in the array `reg_alloc_order'.
463 Element 0 should be the register to allocate first; element 1, the
464 next register; and so on.
466 The macro body should not assume anything about the contents of
467 `reg_alloc_order' before execution of the macro.
469 On most machines, it is not necessary to define this macro. */
471 void
472 order_regs_for_local_alloc ()
474 int i, ch, order;
476 /* User specified the register allocation order. */
478 if (ix86_reg_alloc_order)
480 for (i = order = 0; (ch = ix86_reg_alloc_order[i]) != '\0'; i++)
482 int regno = 0;
484 switch (ch)
486 case 'a': regno = 0; break;
487 case 'd': regno = 1; break;
488 case 'c': regno = 2; break;
489 case 'b': regno = 3; break;
490 case 'S': regno = 4; break;
491 case 'D': regno = 5; break;
492 case 'B': regno = 6; break;
495 reg_alloc_order[order++] = regno;
498 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
500 if (! regs_allocated[i])
501 reg_alloc_order[order++] = i;
505 /* If user did not specify a register allocation order, use natural order. */
506 else
508 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
509 reg_alloc_order[i] = i;
513 void
514 optimization_options (level, size)
515 int level;
516 int size ATTRIBUTE_UNUSED;
518 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
519 make the problem with not enough registers even worse. */
520 #ifdef INSN_SCHEDULING
521 if (level > 1)
522 flag_schedule_insns = 0;
523 #endif
526 /* Return nonzero if the rtx is known aligned. */
527 /* ??? Unused. */
530 ix86_aligned_p (op)
531 rtx op;
533 struct ix86_address parts;
535 /* Registers and immediate operands are always "aligned". */
536 if (GET_CODE (op) != MEM)
537 return 1;
539 /* Don't even try to do any aligned optimizations with volatiles. */
540 if (MEM_VOLATILE_P (op))
541 return 0;
543 op = XEXP (op, 0);
545 /* Pushes and pops are only valid on the stack pointer. */
546 if (GET_CODE (op) == PRE_DEC
547 || GET_CODE (op) == POST_INC)
548 return 1;
550 /* Decode the address. */
551 if (! ix86_decompose_address (op, &parts))
552 abort ();
554 /* Look for some component that isn't known to be aligned. */
555 if (parts.index)
557 if (parts.scale < 4
558 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 4)
559 return 0;
561 if (parts.base)
563 if (REGNO_POINTER_ALIGN (REGNO (parts.index)) < 4)
564 return 0;
566 if (parts.disp)
568 if (GET_CODE (parts.disp) != CONST_INT
569 || (INTVAL (parts.disp) & 3) != 0)
570 return 0;
573 /* Didn't find one -- this must be an aligned address. */
574 return 1;
577 /* Return nonzero if IDENTIFIER with arguments ARGS is a valid machine specific
578 attribute for DECL. The attributes in ATTRIBUTES have previously been
579 assigned to DECL. */
582 ix86_valid_decl_attribute_p (decl, attributes, identifier, args)
583 tree decl ATTRIBUTE_UNUSED;
584 tree attributes ATTRIBUTE_UNUSED;
585 tree identifier ATTRIBUTE_UNUSED;
586 tree args ATTRIBUTE_UNUSED;
588 return 0;
591 /* Return nonzero if IDENTIFIER with arguments ARGS is a valid machine specific
592 attribute for TYPE. The attributes in ATTRIBUTES have previously been
593 assigned to TYPE. */
596 ix86_valid_type_attribute_p (type, attributes, identifier, args)
597 tree type;
598 tree attributes ATTRIBUTE_UNUSED;
599 tree identifier;
600 tree args;
602 if (TREE_CODE (type) != FUNCTION_TYPE
603 && TREE_CODE (type) != METHOD_TYPE
604 && TREE_CODE (type) != FIELD_DECL
605 && TREE_CODE (type) != TYPE_DECL)
606 return 0;
608 /* Stdcall attribute says callee is responsible for popping arguments
609 if they are not variable. */
610 if (is_attribute_p ("stdcall", identifier))
611 return (args == NULL_TREE);
613 /* Cdecl attribute says the callee is a normal C declaration. */
614 if (is_attribute_p ("cdecl", identifier))
615 return (args == NULL_TREE);
617 /* Regparm attribute specifies how many integer arguments are to be
618 passed in registers. */
619 if (is_attribute_p ("regparm", identifier))
621 tree cst;
623 if (! args || TREE_CODE (args) != TREE_LIST
624 || TREE_CHAIN (args) != NULL_TREE
625 || TREE_VALUE (args) == NULL_TREE)
626 return 0;
628 cst = TREE_VALUE (args);
629 if (TREE_CODE (cst) != INTEGER_CST)
630 return 0;
632 if (TREE_INT_CST_HIGH (cst) != 0
633 || TREE_INT_CST_LOW (cst) < 0
634 || TREE_INT_CST_LOW (cst) > REGPARM_MAX)
635 return 0;
637 return 1;
640 return 0;
643 /* Return 0 if the attributes for two types are incompatible, 1 if they
644 are compatible, and 2 if they are nearly compatible (which causes a
645 warning to be generated). */
648 ix86_comp_type_attributes (type1, type2)
649 tree type1;
650 tree type2;
652 /* Check for mismatch of non-default calling convention. */
653 char *rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
655 if (TREE_CODE (type1) != FUNCTION_TYPE)
656 return 1;
658 /* Check for mismatched return types (cdecl vs stdcall). */
659 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
660 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
661 return 0;
662 return 1;
665 /* Value is the number of bytes of arguments automatically
666 popped when returning from a subroutine call.
667 FUNDECL is the declaration node of the function (as a tree),
668 FUNTYPE is the data type of the function (as a tree),
669 or for a library call it is an identifier node for the subroutine name.
670 SIZE is the number of bytes of arguments passed on the stack.
672 On the 80386, the RTD insn may be used to pop them if the number
673 of args is fixed, but if the number is variable then the caller
674 must pop them all. RTD can't be used for library calls now
675 because the library is compiled with the Unix compiler.
676 Use of RTD is a selectable option, since it is incompatible with
677 standard Unix calling sequences. If the option is not selected,
678 the caller must always pop the args.
680 The attribute stdcall is equivalent to RTD on a per module basis. */
683 ix86_return_pops_args (fundecl, funtype, size)
684 tree fundecl;
685 tree funtype;
686 int size;
688 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
690 /* Cdecl functions override -mrtd, and never pop the stack. */
691 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
693 /* Stdcall functions will pop the stack if not variable args. */
694 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype)))
695 rtd = 1;
697 if (rtd
698 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
699 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
700 == void_type_node)))
701 return size;
704 /* Lose any fake structure return argument. */
705 if (aggregate_value_p (TREE_TYPE (funtype)))
706 return GET_MODE_SIZE (Pmode);
708 return 0;
711 /* Argument support functions. */
713 /* Initialize a variable CUM of type CUMULATIVE_ARGS
714 for a call to a function whose data type is FNTYPE.
715 For a library call, FNTYPE is 0. */
717 void
718 init_cumulative_args (cum, fntype, libname)
719 CUMULATIVE_ARGS *cum; /* Argument info to initialize */
720 tree fntype; /* tree ptr for function decl */
721 rtx libname; /* SYMBOL_REF of library name or 0 */
723 static CUMULATIVE_ARGS zero_cum;
724 tree param, next_param;
726 if (TARGET_DEBUG_ARG)
728 fprintf (stderr, "\ninit_cumulative_args (");
729 if (fntype)
730 fprintf (stderr, "fntype code = %s, ret code = %s",
731 tree_code_name[(int) TREE_CODE (fntype)],
732 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
733 else
734 fprintf (stderr, "no fntype");
736 if (libname)
737 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
740 *cum = zero_cum;
742 /* Set up the number of registers to use for passing arguments. */
743 cum->nregs = ix86_regparm;
744 if (fntype)
746 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype));
748 if (attr)
749 cum->nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
752 /* Determine if this function has variable arguments. This is
753 indicated by the last argument being 'void_type_mode' if there
754 are no variable arguments. If there are variable arguments, then
755 we won't pass anything in registers */
757 if (cum->nregs)
759 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
760 param != 0; param = next_param)
762 next_param = TREE_CHAIN (param);
763 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
764 cum->nregs = 0;
768 if (TARGET_DEBUG_ARG)
769 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
771 return;
774 /* Update the data in CUM to advance over an argument
775 of mode MODE and data type TYPE.
776 (TYPE is null for libcalls where that information may not be available.) */
778 void
779 function_arg_advance (cum, mode, type, named)
780 CUMULATIVE_ARGS *cum; /* current arg information */
781 enum machine_mode mode; /* current arg mode */
782 tree type; /* type of the argument or 0 if lib support */
783 int named; /* whether or not the argument was named */
785 int bytes
786 = (mode == BLKmode) ? int_size_in_bytes (type) : GET_MODE_SIZE (mode);
787 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
789 if (TARGET_DEBUG_ARG)
790 fprintf (stderr,
791 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
792 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
794 cum->words += words;
795 cum->nregs -= words;
796 cum->regno += words;
798 if (cum->nregs <= 0)
800 cum->nregs = 0;
801 cum->regno = 0;
804 return;
807 /* Define where to put the arguments to a function.
808 Value is zero to push the argument on the stack,
809 or a hard register in which to store the argument.
811 MODE is the argument's machine mode.
812 TYPE is the data type of the argument (as a tree).
813 This is null for libcalls where that information may
814 not be available.
815 CUM is a variable of type CUMULATIVE_ARGS which gives info about
816 the preceding args and about the function being called.
817 NAMED is nonzero if this argument is a named parameter
818 (otherwise it is an extra parameter matching an ellipsis). */
820 struct rtx_def *
821 function_arg (cum, mode, type, named)
822 CUMULATIVE_ARGS *cum; /* current arg information */
823 enum machine_mode mode; /* current arg mode */
824 tree type; /* type of the argument or 0 if lib support */
825 int named; /* != 0 for normal args, == 0 for ... args */
827 rtx ret = NULL_RTX;
828 int bytes
829 = (mode == BLKmode) ? int_size_in_bytes (type) : GET_MODE_SIZE (mode);
830 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
832 switch (mode)
834 /* For now, pass fp/complex values on the stack. */
835 default:
836 break;
838 case BLKmode:
839 case DImode:
840 case SImode:
841 case HImode:
842 case QImode:
843 if (words <= cum->nregs)
844 ret = gen_rtx_REG (mode, cum->regno);
845 break;
848 if (TARGET_DEBUG_ARG)
850 fprintf (stderr,
851 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d",
852 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
854 if (ret)
855 fprintf (stderr, ", reg=%%e%s", reg_names[ REGNO(ret) ]);
856 else
857 fprintf (stderr, ", stack");
859 fprintf (stderr, " )\n");
862 return ret;
865 /* Returns 1 if OP is either a symbol reference or a sum of a symbol
866 reference and a constant. */
869 symbolic_operand (op, mode)
870 register rtx op;
871 enum machine_mode mode ATTRIBUTE_UNUSED;
873 switch (GET_CODE (op))
875 case SYMBOL_REF:
876 case LABEL_REF:
877 return 1;
879 case CONST:
880 op = XEXP (op, 0);
881 if (GET_CODE (op) == SYMBOL_REF
882 || GET_CODE (op) == LABEL_REF
883 || (GET_CODE (op) == UNSPEC
884 && XINT (op, 1) >= 6
885 && XINT (op, 1) <= 7))
886 return 1;
887 if (GET_CODE (op) != PLUS
888 || GET_CODE (XEXP (op, 1)) != CONST_INT)
889 return 0;
891 op = XEXP (op, 0);
892 if (GET_CODE (op) == SYMBOL_REF
893 || GET_CODE (op) == LABEL_REF)
894 return 1;
895 /* Only @GOTOFF gets offsets. */
896 if (GET_CODE (op) != UNSPEC
897 || XINT (op, 1) != 7)
898 return 0;
900 op = XVECEXP (op, 0, 0);
901 if (GET_CODE (op) == SYMBOL_REF
902 || GET_CODE (op) == LABEL_REF)
903 return 1;
904 return 0;
906 default:
907 return 0;
911 /* Return true if the operand contains a @GOT or @GOTOFF reference. */
914 pic_symbolic_operand (op, mode)
915 register rtx op;
916 enum machine_mode mode ATTRIBUTE_UNUSED;
918 if (GET_CODE (op) == CONST)
920 op = XEXP (op, 0);
921 if (GET_CODE (op) == UNSPEC)
922 return 1;
923 if (GET_CODE (op) != PLUS
924 || GET_CODE (XEXP (op, 1)) != CONST_INT)
925 return 0;
926 op = XEXP (op, 0);
927 if (GET_CODE (op) == UNSPEC)
928 return 1;
930 return 0;
933 /* Test for a valid operand for a call instruction. Don't allow the
934 arg pointer register or virtual regs since they may decay into
935 reg + const, which the patterns can't handle. */
938 call_insn_operand (op, mode)
939 rtx op;
940 enum machine_mode mode ATTRIBUTE_UNUSED;
942 if (GET_CODE (op) != MEM)
943 return 0;
944 op = XEXP (op, 0);
946 /* Disallow indirect through a virtual register. This leads to
947 compiler aborts when trying to eliminate them. */
948 if (GET_CODE (op) == REG
949 && (op == arg_pointer_rtx
950 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
951 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
952 return 0;
954 /* Disallow `call 1234'. Due to varying assembler lameness this
955 gets either rejected or translated to `call .+1234'. */
956 if (GET_CODE (op) == CONST_INT)
957 return 0;
959 /* Otherwise we can allow any general_operand in the address. */
960 return general_operand (op, Pmode);
963 /* Like call_insn_operand but allow (mem (symbol_ref ...)) even if pic. */
966 expander_call_insn_operand (op, mode)
967 rtx op;
968 enum machine_mode mode;
970 if (GET_CODE (op) == MEM
971 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF)
972 return 1;
974 return call_insn_operand (op, mode);
978 constant_call_address_operand (op, mode)
979 rtx op;
980 enum machine_mode mode ATTRIBUTE_UNUSED;
982 return GET_CODE (op) == MEM && CONSTANT_ADDRESS_P (XEXP (op, 0));
985 /* Match exactly zero and one. */
987 int
988 const0_operand (op, mode)
989 register rtx op;
990 enum machine_mode mode;
992 return op == CONST0_RTX (mode);
995 int
996 const1_operand (op, mode)
997 register rtx op;
998 enum machine_mode mode ATTRIBUTE_UNUSED;
1000 return op == const1_rtx;
1003 /* Match 2, 4, or 8. Used for leal multiplicands. */
1006 const248_operand (op, mode)
1007 register rtx op;
1008 enum machine_mode mode ATTRIBUTE_UNUSED;
1010 return (GET_CODE (op) == CONST_INT
1011 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
1014 /* True if this is a constant appropriate for an increment or decremenmt. */
1017 incdec_operand (op, mode)
1018 register rtx op;
1019 enum machine_mode mode;
1021 if (op == const1_rtx || op == constm1_rtx)
1022 return 1;
1023 if (GET_CODE (op) != CONST_INT)
1024 return 0;
1025 if (mode == SImode && INTVAL (op) == (HOST_WIDE_INT) 0xffffffff)
1026 return 1;
1027 if (mode == HImode && INTVAL (op) == (HOST_WIDE_INT) 0xffff)
1028 return 1;
1029 if (mode == QImode && INTVAL (op) == (HOST_WIDE_INT) 0xff)
1030 return 1;
1031 return 0;
1034 /* Return false if this is the stack pointer, or any other fake
1035 register eliminable to the stack pointer. Otherwise, this is
1036 a register operand.
1038 This is used to prevent esp from being used as an index reg.
1039 Which would only happen in pathological cases. */
1042 reg_no_sp_operand (op, mode)
1043 register rtx op;
1044 enum machine_mode mode;
1046 rtx t = op;
1047 if (GET_CODE (t) == SUBREG)
1048 t = SUBREG_REG (t);
1049 if (t == stack_pointer_rtx || t == arg_pointer_rtx)
1050 return 0;
1052 return register_operand (op, mode);
1055 /* Return true if op is a Q_REGS class register. */
1058 q_regs_operand (op, mode)
1059 register rtx op;
1060 enum machine_mode mode;
1062 if (mode != VOIDmode && GET_MODE (op) != mode)
1063 return 0;
1064 if (GET_CODE (op) == SUBREG)
1065 op = SUBREG_REG (op);
1066 return QI_REG_P (op);
1069 /* Return true if op is a NON_Q_REGS class register. */
1072 non_q_regs_operand (op, mode)
1073 register rtx op;
1074 enum machine_mode mode;
1076 if (mode != VOIDmode && GET_MODE (op) != mode)
1077 return 0;
1078 if (GET_CODE (op) == SUBREG)
1079 op = SUBREG_REG (op);
1080 return NON_QI_REG_P (op);
1083 /* Return 1 if OP is a comparison operator that can use the condition code
1084 generated by a logical operation, which characteristicly does not set
1085 overflow or carry. To be used with CCNOmode. */
1088 no_comparison_operator (op, mode)
1089 register rtx op;
1090 enum machine_mode mode;
1092 return ((mode == VOIDmode || GET_MODE (op) == mode)
1093 && GET_RTX_CLASS (GET_CODE (op)) == '<'
1094 && GET_CODE (op) != LE
1095 && GET_CODE (op) != GT);
1098 /* Return 1 if OP is a comparison operator that can be issued by fcmov. */
1101 fcmov_comparison_operator (op, mode)
1102 register rtx op;
1103 enum machine_mode mode;
1105 return ((mode == VOIDmode || GET_MODE (op) == mode)
1106 && GET_RTX_CLASS (GET_CODE (op)) == '<'
1107 && GET_CODE (op) == unsigned_condition (GET_CODE (op)));
1110 /* Nearly general operand, but accept any const_double, since we wish
1111 to be able to drop them into memory rather than have them get pulled
1112 into registers. */
1115 cmp_fp_expander_operand (op, mode)
1116 register rtx op;
1117 enum machine_mode mode;
1119 if (mode != VOIDmode && mode != GET_MODE (op))
1120 return 0;
1121 if (GET_CODE (op) == CONST_DOUBLE)
1122 return 1;
1123 return general_operand (op, mode);
1126 /* Match an SI or HImode register for a zero_extract. */
1129 ext_register_operand (op, mode)
1130 register rtx op;
1131 enum machine_mode mode ATTRIBUTE_UNUSED;
1133 if (GET_MODE (op) != SImode && GET_MODE (op) != HImode)
1134 return 0;
1135 return register_operand (op, VOIDmode);
1138 /* Return 1 if this is a valid binary floating-point operation.
1139 OP is the expression matched, and MODE is its mode. */
1142 binary_fp_operator (op, mode)
1143 register rtx op;
1144 enum machine_mode mode;
1146 if (mode != VOIDmode && mode != GET_MODE (op))
1147 return 0;
1149 switch (GET_CODE (op))
1151 case PLUS:
1152 case MINUS:
1153 case MULT:
1154 case DIV:
1155 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
1157 default:
1158 return 0;
1163 mult_operator(op, mode)
1164 register rtx op;
1165 enum machine_mode mode ATTRIBUTE_UNUSED;
1167 return GET_CODE (op) == MULT;
1171 div_operator(op, mode)
1172 register rtx op;
1173 enum machine_mode mode ATTRIBUTE_UNUSED;
1175 return GET_CODE (op) == DIV;
1179 arith_or_logical_operator (op, mode)
1180 rtx op;
1181 enum machine_mode mode;
1183 return ((mode == VOIDmode || GET_MODE (op) == mode)
1184 && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
1185 || GET_RTX_CLASS (GET_CODE (op)) == '2'));
1188 /* Returns 1 if OP is memory operand with a displacement. */
1191 memory_displacement_operand (op, mode)
1192 register rtx op;
1193 enum machine_mode mode;
1195 struct ix86_address parts;
1197 if (! memory_operand (op, mode))
1198 return 0;
1200 if (! ix86_decompose_address (XEXP (op, 0), &parts))
1201 abort ();
1203 return parts.disp != NULL_RTX;
1206 /* To avoid problems when jump re-emits comparisons like testqi_ext_0,
1207 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
1209 ??? It seems likely that this will only work because cmpsi is an
1210 expander, and no actual insns use this. */
1213 cmpsi_operand (op, mode)
1214 rtx op;
1215 enum machine_mode mode;
1217 if (general_operand (op, mode))
1218 return 1;
1220 if (GET_CODE (op) == AND
1221 && GET_MODE (op) == SImode
1222 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
1223 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
1224 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
1225 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
1226 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
1227 && GET_CODE (XEXP (op, 1)) == CONST_INT)
1228 return 1;
1230 return 0;
1233 /* Returns 1 if OP is memory operand that can not be represented by the
1234 modRM array. */
1237 long_memory_operand (op, mode)
1238 register rtx op;
1239 enum machine_mode mode;
1241 if (! memory_operand (op, mode))
1242 return 0;
1244 return memory_address_length (op) != 0;
1247 /* Return true if the constant is something that can be loaded with
1248 a special instruction. Only handle 0.0 and 1.0; others are less
1249 worthwhile. */
1252 standard_80387_constant_p (x)
1253 rtx x;
1255 if (GET_CODE (x) != CONST_DOUBLE)
1256 return -1;
1258 #if ! defined (REAL_IS_NOT_DOUBLE) || defined (REAL_ARITHMETIC)
1260 REAL_VALUE_TYPE d;
1261 jmp_buf handler;
1262 int is0, is1;
1264 if (setjmp (handler))
1265 return 0;
1267 set_float_handler (handler);
1268 REAL_VALUE_FROM_CONST_DOUBLE (d, x);
1269 is0 = REAL_VALUES_EQUAL (d, dconst0) && !REAL_VALUE_MINUS_ZERO (d);
1270 is1 = REAL_VALUES_EQUAL (d, dconst1);
1271 set_float_handler (NULL_PTR);
1273 if (is0)
1274 return 1;
1276 if (is1)
1277 return 2;
1279 /* Note that on the 80387, other constants, such as pi,
1280 are much slower to load as standard constants
1281 than to load from doubles in memory! */
1282 /* ??? Not true on K6: all constants are equal cost. */
1284 #endif
1286 return 0;
1289 /* Returns 1 if OP contains a symbol reference */
1292 symbolic_reference_mentioned_p (op)
1293 rtx op;
1295 register const char *fmt;
1296 register int i;
1298 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
1299 return 1;
1301 fmt = GET_RTX_FORMAT (GET_CODE (op));
1302 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
1304 if (fmt[i] == 'E')
1306 register int j;
1308 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
1309 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
1310 return 1;
1313 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
1314 return 1;
1317 return 0;
1320 /* Return 1 if it is appropriate to emit `ret' instructions in the
1321 body of a function. Do this only if the epilogue is simple, needing a
1322 couple of insns. Prior to reloading, we can't tell how many registers
1323 must be saved, so return 0 then. Return 0 if there is no frame
1324 marker to de-allocate.
1326 If NON_SAVING_SETJMP is defined and true, then it is not possible
1327 for the epilogue to be simple, so return 0. This is a special case
1328 since NON_SAVING_SETJMP will not cause regs_ever_live to change
1329 until final, but jump_optimize may need to know sooner if a
1330 `return' is OK. */
1333 ix86_can_use_return_insn_p ()
1335 int regno;
1336 int nregs = 0;
1337 int reglimit = (frame_pointer_needed
1338 ? FRAME_POINTER_REGNUM : STACK_POINTER_REGNUM);
1339 int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
1340 || current_function_uses_const_pool);
1342 #ifdef NON_SAVING_SETJMP
1343 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
1344 return 0;
1345 #endif
1347 if (! reload_completed)
1348 return 0;
1350 for (regno = reglimit - 1; regno >= 0; regno--)
1351 if ((regs_ever_live[regno] && ! call_used_regs[regno])
1352 || (regno == PIC_OFFSET_TABLE_REGNUM && pic_reg_used))
1353 nregs++;
1355 return nregs == 0 || ! frame_pointer_needed;
1358 static char *pic_label_name;
1359 static int pic_label_output;
1360 static char *global_offset_table_name;
1362 /* This function generates code for -fpic that loads %ebx with
1363 the return address of the caller and then returns. */
1365 void
1366 asm_output_function_prefix (file, name)
1367 FILE *file;
1368 char *name ATTRIBUTE_UNUSED;
1370 rtx xops[2];
1371 int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
1372 || current_function_uses_const_pool);
1373 xops[0] = pic_offset_table_rtx;
1374 xops[1] = stack_pointer_rtx;
1376 /* Deep branch prediction favors having a return for every call. */
1377 if (pic_reg_used && TARGET_DEEP_BRANCH_PREDICTION)
1379 if (!pic_label_output)
1381 /* This used to call ASM_DECLARE_FUNCTION_NAME() but since it's an
1382 internal (non-global) label that's being emitted, it didn't make
1383 sense to have .type information for local labels. This caused
1384 the SCO OpenServer 5.0.4 ELF assembler grief (why are you giving
1385 me debug info for a label that you're declaring non-global?) this
1386 was changed to call ASM_OUTPUT_LABEL() instead. */
1388 ASM_OUTPUT_LABEL (file, pic_label_name);
1390 xops[1] = gen_rtx_MEM (SImode, xops[1]);
1391 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
1392 output_asm_insn ("ret", xops);
1394 pic_label_output = 1;
1399 void
1400 load_pic_register ()
1402 rtx gotsym, pclab;
1404 if (global_offset_table_name == NULL)
1406 global_offset_table_name =
1407 ggc_alloc_string ("_GLOBAL_OFFSET_TABLE_", 21);
1408 ggc_add_string_root (&global_offset_table_name, 1);
1410 gotsym = gen_rtx_SYMBOL_REF (Pmode, global_offset_table_name);
1412 if (TARGET_DEEP_BRANCH_PREDICTION)
1414 if (pic_label_name == NULL)
1416 pic_label_name = ggc_alloc_string (NULL, 32);
1417 ggc_add_string_root (&pic_label_name, 1);
1418 ASM_GENERATE_INTERNAL_LABEL (pic_label_name, "LPR", 0);
1420 pclab = gen_rtx_MEM (QImode, gen_rtx_SYMBOL_REF (Pmode, pic_label_name));
1422 else
1424 pclab = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
1427 emit_insn (gen_prologue_get_pc (pic_offset_table_rtx, pclab));
1429 if (! TARGET_DEEP_BRANCH_PREDICTION)
1430 emit_insn (gen_popsi1 (pic_offset_table_rtx));
1432 emit_insn (gen_prologue_set_got (pic_offset_table_rtx, gotsym, pclab));
1435 /* Generate an SImode "push" pattern for input ARG. */
1437 static rtx
1438 gen_push (arg)
1439 rtx arg;
1441 return gen_rtx_SET (VOIDmode,
1442 gen_rtx_MEM (SImode,
1443 gen_rtx_PRE_DEC (SImode,
1444 stack_pointer_rtx)),
1445 arg);
1448 /* Compute the size of local storage taking into consideration the
1449 desired stack alignment which is to be maintained. Also determine
1450 the number of registers saved below the local storage. */
1452 HOST_WIDE_INT
1453 ix86_compute_frame_size (size, nregs_on_stack)
1454 HOST_WIDE_INT size;
1455 int *nregs_on_stack;
1457 int limit;
1458 int nregs;
1459 int regno;
1460 int padding;
1461 int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
1462 || current_function_uses_const_pool);
1463 HOST_WIDE_INT total_size;
1465 limit = frame_pointer_needed
1466 ? FRAME_POINTER_REGNUM : STACK_POINTER_REGNUM;
1468 nregs = 0;
1470 for (regno = limit - 1; regno >= 0; regno--)
1471 if ((regs_ever_live[regno] && ! call_used_regs[regno])
1472 || (regno == PIC_OFFSET_TABLE_REGNUM && pic_reg_used))
1473 nregs++;
1475 padding = 0;
1476 total_size = size + (nregs * UNITS_PER_WORD);
1478 #ifdef PREFERRED_STACK_BOUNDARY
1480 int offset;
1481 int preferred_alignment = PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT;
1483 offset = 4;
1484 if (frame_pointer_needed)
1485 offset += UNITS_PER_WORD;
1487 total_size += offset;
1489 padding = ((total_size + preferred_alignment - 1)
1490 & -preferred_alignment) - total_size;
1492 if (padding < (((offset + preferred_alignment - 1)
1493 & -preferred_alignment) - offset))
1494 padding += preferred_alignment;
1496 /* Don't bother aligning the stack of a leaf function
1497 which doesn't allocate any stack slots. */
1498 if (size == 0 && current_function_is_leaf)
1499 padding = 0;
1501 #endif
1503 if (nregs_on_stack)
1504 *nregs_on_stack = nregs;
1506 return size + padding;
1509 /* Expand the prologue into a bunch of separate insns. */
1511 void
1512 ix86_expand_prologue ()
1514 register int regno;
1515 int limit;
1516 int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
1517 || current_function_uses_const_pool);
1518 HOST_WIDE_INT tsize = ix86_compute_frame_size (get_frame_size (), (int *)0);
1519 rtx insn;
1521 /* Note: AT&T enter does NOT have reversed args. Enter is probably
1522 slower on all targets. Also sdb doesn't like it. */
1524 if (frame_pointer_needed)
1526 insn = emit_insn (gen_push (frame_pointer_rtx));
1527 RTX_FRAME_RELATED_P (insn) = 1;
1529 insn = emit_move_insn (frame_pointer_rtx, stack_pointer_rtx);
1530 RTX_FRAME_RELATED_P (insn) = 1;
1533 if (tsize == 0)
1535 else if (! TARGET_STACK_PROBE || tsize < CHECK_STACK_LIMIT)
1537 if (frame_pointer_needed)
1538 insn = emit_insn (gen_prologue_allocate_stack (stack_pointer_rtx,
1539 stack_pointer_rtx,
1540 GEN_INT (-tsize),
1541 frame_pointer_rtx));
1542 else
1543 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
1544 GEN_INT (-tsize)));
1545 RTX_FRAME_RELATED_P (insn) = 1;
1547 else
1549 /* ??? Is this only valid for Win32? */
1551 rtx arg0, sym;
1553 arg0 = gen_rtx_REG (SImode, 0);
1554 emit_move_insn (arg0, GEN_INT (tsize));
1556 sym = gen_rtx_MEM (FUNCTION_MODE,
1557 gen_rtx_SYMBOL_REF (Pmode, "_alloca"));
1558 insn = emit_call_insn (gen_call (sym, const0_rtx));
1560 CALL_INSN_FUNCTION_USAGE (insn)
1561 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, arg0),
1562 CALL_INSN_FUNCTION_USAGE (insn));
1565 limit = (frame_pointer_needed ? FRAME_POINTER_REGNUM : STACK_POINTER_REGNUM);
1566 for (regno = limit - 1; regno >= 0; regno--)
1567 if ((regs_ever_live[regno] && ! call_used_regs[regno])
1568 || (regno == PIC_OFFSET_TABLE_REGNUM && pic_reg_used))
1570 insn = emit_insn (gen_push (gen_rtx_REG (SImode, regno)));
1571 RTX_FRAME_RELATED_P (insn) = 1;
1574 #ifdef SUBTARGET_PROLOGUE
1575 SUBTARGET_PROLOGUE;
1576 #endif
1578 if (pic_reg_used)
1579 load_pic_register ();
1581 /* If we are profiling, make sure no instructions are scheduled before
1582 the call to mcount. However, if -fpic, the above call will have
1583 done that. */
1584 if ((profile_flag || profile_block_flag) && ! pic_reg_used)
1585 emit_insn (gen_blockage ());
1588 /* Restore function stack, frame, and registers. */
1590 void
1591 ix86_expand_epilogue ()
1593 register int regno;
1594 register int limit;
1595 int nregs;
1596 int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
1597 || current_function_uses_const_pool);
1598 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
1599 HOST_WIDE_INT offset;
1600 HOST_WIDE_INT tsize = ix86_compute_frame_size (get_frame_size (), &nregs);
1602 /* SP is often unreliable so we may have to go off the frame pointer. */
1604 offset = -(tsize + nregs * UNITS_PER_WORD);
1606 /* If we're only restoring one register and sp is not valid then
1607 using a move instruction to restore the register since it's
1608 less work than reloading sp and popping the register. Otherwise,
1609 restore sp (if necessary) and pop the registers. */
1611 limit = (frame_pointer_needed
1612 ? FRAME_POINTER_REGNUM : STACK_POINTER_REGNUM);
1614 if (nregs > 1 || sp_valid)
1616 if ( !sp_valid )
1618 rtx addr_offset;
1619 addr_offset = adj_offsettable_operand (AT_BP (QImode), offset);
1620 addr_offset = XEXP (addr_offset, 0);
1622 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, addr_offset));
1625 for (regno = 0; regno < limit; regno++)
1626 if ((regs_ever_live[regno] && ! call_used_regs[regno])
1627 || (regno == PIC_OFFSET_TABLE_REGNUM && pic_reg_used))
1629 emit_insn (gen_popsi1 (gen_rtx_REG (SImode, regno)));
1632 else
1634 for (regno = 0; regno < limit; regno++)
1635 if ((regs_ever_live[regno] && ! call_used_regs[regno])
1636 || (regno == PIC_OFFSET_TABLE_REGNUM && pic_reg_used))
1638 emit_move_insn (gen_rtx_REG (SImode, regno),
1639 adj_offsettable_operand (AT_BP (Pmode), offset));
1640 offset += 4;
1644 if (frame_pointer_needed)
1646 /* If not an i386, mov & pop is faster than "leave". */
1647 if (TARGET_USE_LEAVE)
1648 emit_insn (gen_leave());
1649 else
1651 emit_insn (gen_epilogue_deallocate_stack (stack_pointer_rtx,
1652 frame_pointer_rtx));
1653 emit_insn (gen_popsi1 (frame_pointer_rtx));
1656 else if (tsize)
1658 /* Intel's docs say that for 4 or 8 bytes of stack frame one should
1659 use `pop' and not `add'. */
1660 int use_pop = tsize == 4;
1661 rtx edx = 0, ecx;
1663 /* Use two pops only for the Pentium processors. */
1664 if (tsize == 8 && !TARGET_386 && !TARGET_486)
1666 rtx retval = current_function_return_rtx;
1668 edx = gen_rtx_REG (SImode, 1);
1670 /* This case is a bit more complex. Since we cannot pop into
1671 %ecx twice we need a second register. But this is only
1672 available if the return value is not of DImode in which
1673 case the %edx register is not available. */
1674 use_pop = (retval == NULL
1675 || ! reg_overlap_mentioned_p (edx, retval));
1678 if (use_pop)
1680 ecx = gen_rtx_REG (SImode, 2);
1682 /* We have to prevent the two pops here from being scheduled.
1683 GCC otherwise would try in some situation to put other
1684 instructions in between them which has a bad effect. */
1685 emit_insn (gen_blockage ());
1686 emit_insn (gen_popsi1 (ecx));
1687 if (tsize == 8)
1688 emit_insn (gen_popsi1 (edx));
1690 else
1692 /* If there is no frame pointer, we must still release the frame. */
1693 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
1694 GEN_INT (tsize)));
1698 #ifdef FUNCTION_BLOCK_PROFILER_EXIT
1699 if (profile_block_flag == 2)
1701 FUNCTION_BLOCK_PROFILER_EXIT;
1703 #endif
1705 if (current_function_pops_args && current_function_args_size)
1707 rtx popc = GEN_INT (current_function_pops_args);
1709 /* i386 can only pop 32K bytes (maybe 64K? Is it signed?). If
1710 asked to pop more, pop return address, do explicit add, and jump
1711 indirectly to the caller. */
1713 if (current_function_pops_args >= 32768)
1715 rtx ecx = gen_rtx_REG (SImode, 2);
1717 emit_insn (gen_popsi1 (ecx));
1718 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
1719 emit_indirect_jump (ecx);
1721 else
1722 emit_jump_insn (gen_return_pop_internal (popc));
1724 else
1725 emit_jump_insn (gen_return_internal ());
1728 /* Extract the parts of an RTL expression that is a valid memory address
1729 for an instruction. Return false if the structure of the address is
1730 grossly off. */
1732 static int
1733 ix86_decompose_address (addr, out)
1734 register rtx addr;
1735 struct ix86_address *out;
1737 rtx base = NULL_RTX;
1738 rtx index = NULL_RTX;
1739 rtx disp = NULL_RTX;
1740 HOST_WIDE_INT scale = 1;
1741 rtx scale_rtx = NULL_RTX;
1743 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
1744 base = addr;
1745 else if (GET_CODE (addr) == PLUS)
1747 rtx op0 = XEXP (addr, 0);
1748 rtx op1 = XEXP (addr, 1);
1749 enum rtx_code code0 = GET_CODE (op0);
1750 enum rtx_code code1 = GET_CODE (op1);
1752 if (code0 == REG || code0 == SUBREG)
1754 if (code1 == REG || code1 == SUBREG)
1755 index = op0, base = op1; /* index + base */
1756 else
1757 base = op0, disp = op1; /* base + displacement */
1759 else if (code0 == MULT)
1761 index = XEXP (op0, 0);
1762 scale_rtx = XEXP (op0, 1);
1763 if (code1 == REG || code1 == SUBREG)
1764 base = op1; /* index*scale + base */
1765 else
1766 disp = op1; /* index*scale + disp */
1768 else if (code0 == PLUS && GET_CODE (XEXP (op0, 0)) == MULT)
1770 index = XEXP (XEXP (op0, 0), 0); /* index*scale + base + disp */
1771 scale_rtx = XEXP (XEXP (op0, 0), 1);
1772 base = XEXP (op0, 1);
1773 disp = op1;
1775 else if (code0 == PLUS)
1777 index = XEXP (op0, 0); /* index + base + disp */
1778 base = XEXP (op0, 1);
1779 disp = op1;
1781 else
1782 return FALSE;
1784 else if (GET_CODE (addr) == MULT)
1786 index = XEXP (addr, 0); /* index*scale */
1787 scale_rtx = XEXP (addr, 1);
1789 else if (GET_CODE (addr) == ASHIFT)
1791 rtx tmp;
1793 /* We're called for lea too, which implements ashift on occasion. */
1794 index = XEXP (addr, 0);
1795 tmp = XEXP (addr, 1);
1796 if (GET_CODE (tmp) != CONST_INT)
1797 return FALSE;
1798 scale = INTVAL (tmp);
1799 if ((unsigned HOST_WIDE_INT) scale > 3)
1800 return FALSE;
1801 scale = 1 << scale;
1803 else
1804 disp = addr; /* displacement */
1806 /* Extract the integral value of scale. */
1807 if (scale_rtx)
1809 if (GET_CODE (scale_rtx) != CONST_INT)
1810 return FALSE;
1811 scale = INTVAL (scale_rtx);
1814 /* Allow arg pointer and stack pointer as index if there is not scaling */
1815 if (base && index && scale == 1
1816 && (index == arg_pointer_rtx || index == stack_pointer_rtx))
1818 rtx tmp = base;
1819 base = index;
1820 index = tmp;
1823 /* Special case: %ebp cannot be encoded as a base without a displacement. */
1824 if (base == frame_pointer_rtx && !disp)
1825 disp = const0_rtx;
1827 /* Special case: on K6, [%esi] makes the instruction vector decoded.
1828 Avoid this by transforming to [%esi+0]. */
1829 if (ix86_cpu == PROCESSOR_K6 && !optimize_size
1830 && base && !index && !disp
1831 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
1832 disp = const0_rtx;
1834 /* Special case: encode reg+reg instead of reg*2. */
1835 if (!base && index && scale && scale == 2)
1836 base = index, scale = 1;
1838 /* Special case: scaling cannot be encoded without base or displacement. */
1839 if (!base && !disp && index && scale != 1)
1840 disp = const0_rtx;
1842 out->base = base;
1843 out->index = index;
1844 out->disp = disp;
1845 out->scale = scale;
1847 return TRUE;
1850 /* Determine if a given CONST RTX is a valid memory displacement
1851 in PIC mode. */
1854 legitimate_pic_address_disp_p (disp)
1855 register rtx disp;
1857 if (GET_CODE (disp) != CONST)
1858 return 0;
1859 disp = XEXP (disp, 0);
1861 if (GET_CODE (disp) == PLUS)
1863 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
1864 return 0;
1865 disp = XEXP (disp, 0);
1868 if (GET_CODE (disp) != UNSPEC
1869 || XVECLEN (disp, 0) != 1)
1870 return 0;
1872 /* Must be @GOT or @GOTOFF. */
1873 if (XINT (disp, 1) != 6
1874 && XINT (disp, 1) != 7)
1875 return 0;
1877 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
1878 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
1879 return 0;
1881 return 1;
1884 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
1885 memory address for an instruction. The MODE argument is the machine mode
1886 for the MEM expression that wants to use this address.
1888 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
1889 convert common non-canonical forms to canonical form so that they will
1890 be recognized. */
1893 legitimate_address_p (mode, addr, strict)
1894 enum machine_mode mode;
1895 register rtx addr;
1896 int strict;
1898 struct ix86_address parts;
1899 rtx base, index, disp;
1900 HOST_WIDE_INT scale;
1901 const char *reason = NULL;
1902 rtx reason_rtx = NULL_RTX;
1904 if (TARGET_DEBUG_ADDR)
1906 fprintf (stderr,
1907 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
1908 GET_MODE_NAME (mode), strict);
1909 debug_rtx (addr);
1912 if (! ix86_decompose_address (addr, &parts))
1914 reason = "decomposition failed";
1915 goto error;
1918 base = parts.base;
1919 index = parts.index;
1920 disp = parts.disp;
1921 scale = parts.scale;
1923 /* Validate base register.
1925 Don't allow SUBREG's here, it can lead to spill failures when the base
1926 is one word out of a two word structure, which is represented internally
1927 as a DImode int. */
1929 if (base)
1931 reason_rtx = base;
1933 if (GET_CODE (base) != REG)
1935 reason = "base is not a register";
1936 goto error;
1939 if (GET_MODE (base) != Pmode)
1941 reason = "base is not in Pmode";
1942 goto error;
1945 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (base))
1946 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (base)))
1948 reason = "base is not valid";
1949 goto error;
1953 /* Validate index register.
1955 Don't allow SUBREG's here, it can lead to spill failures when the index
1956 is one word out of a two word structure, which is represented internally
1957 as a DImode int. */
1959 if (index)
1961 reason_rtx = index;
1963 if (GET_CODE (index) != REG)
1965 reason = "index is not a register";
1966 goto error;
1969 if (GET_MODE (index) != Pmode)
1971 reason = "index is not in Pmode";
1972 goto error;
1975 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (index))
1976 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (index)))
1978 reason = "index is not valid";
1979 goto error;
1983 /* Validate scale factor. */
1984 if (scale != 1)
1986 reason_rtx = GEN_INT (scale);
1987 if (!index)
1989 reason = "scale without index";
1990 goto error;
1993 if (scale != 2 && scale != 4 && scale != 8)
1995 reason = "scale is not a valid multiplier";
1996 goto error;
2000 /* Validate displacement. */
2001 if (disp)
2003 reason_rtx = disp;
2005 if (!CONSTANT_ADDRESS_P (disp))
2007 reason = "displacement is not constant";
2008 goto error;
2011 if (GET_CODE (disp) == CONST_DOUBLE)
2013 reason = "displacement is a const_double";
2014 goto error;
2017 if (flag_pic && SYMBOLIC_CONST (disp))
2019 if (! legitimate_pic_address_disp_p (disp))
2021 reason = "displacement is an invalid pic construct";
2022 goto error;
2025 /* Verify that a symbolic pic displacement includes
2026 the pic_offset_table_rtx register. */
2027 if (base != pic_offset_table_rtx
2028 && (index != pic_offset_table_rtx || scale != 1))
2030 reason = "pic displacement against invalid base";
2031 goto error;
2034 else if (HALF_PIC_P ())
2036 if (! HALF_PIC_ADDRESS_P (disp)
2037 || (base != NULL_RTX || index != NULL_RTX))
2039 reason = "displacement is an invalid half-pic reference";
2040 goto error;
2045 /* Everything looks valid. */
2046 if (TARGET_DEBUG_ADDR)
2047 fprintf (stderr, "Success.\n");
2048 return TRUE;
2050 error:
2051 if (TARGET_DEBUG_ADDR)
2053 fprintf (stderr, "Error: %s\n", reason);
2054 debug_rtx (reason_rtx);
2056 return FALSE;
2059 /* Return a legitimate reference for ORIG (an address) using the
2060 register REG. If REG is 0, a new pseudo is generated.
2062 There are two types of references that must be handled:
2064 1. Global data references must load the address from the GOT, via
2065 the PIC reg. An insn is emitted to do this load, and the reg is
2066 returned.
2068 2. Static data references, constant pool addresses, and code labels
2069 compute the address as an offset from the GOT, whose base is in
2070 the PIC reg. Static data objects have SYMBOL_REF_FLAG set to
2071 differentiate them from global data objects. The returned
2072 address is the PIC reg + an unspec constant.
2074 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
2075 reg also appears in the address. */
2078 legitimize_pic_address (orig, reg)
2079 rtx orig;
2080 rtx reg;
2082 rtx addr = orig;
2083 rtx new = orig;
2084 rtx base;
2086 if (GET_CODE (addr) == LABEL_REF
2087 || (GET_CODE (addr) == SYMBOL_REF
2088 && (CONSTANT_POOL_ADDRESS_P (addr)
2089 || SYMBOL_REF_FLAG (addr))))
2091 /* This symbol may be referenced via a displacement from the PIC
2092 base address (@GOTOFF). */
2094 current_function_uses_pic_offset_table = 1;
2095 new = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, addr), 7);
2096 new = gen_rtx_CONST (VOIDmode, new);
2097 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
2099 if (reg != 0)
2101 emit_move_insn (reg, new);
2102 new = reg;
2105 else if (GET_CODE (addr) == SYMBOL_REF)
2107 /* This symbol must be referenced via a load from the
2108 Global Offset Table (@GOT). */
2110 current_function_uses_pic_offset_table = 1;
2111 new = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, addr), 6);
2112 new = gen_rtx_CONST (VOIDmode, new);
2113 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
2114 new = gen_rtx_MEM (Pmode, new);
2115 RTX_UNCHANGING_P (new) = 1;
2117 if (reg == 0)
2118 reg = gen_reg_rtx (Pmode);
2119 emit_move_insn (reg, new);
2120 new = reg;
2122 else
2124 if (GET_CODE (addr) == CONST)
2126 addr = XEXP (addr, 0);
2127 if (GET_CODE (addr) == UNSPEC)
2129 /* Check that the unspec is one of the ones we generate? */
2131 else if (GET_CODE (addr) != PLUS)
2132 abort();
2134 if (GET_CODE (addr) == PLUS)
2136 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
2138 /* Check first to see if this is a constant offset from a @GOTOFF
2139 symbol reference. */
2140 if ((GET_CODE (op0) == LABEL_REF
2141 || (GET_CODE (op0) == SYMBOL_REF
2142 && (CONSTANT_POOL_ADDRESS_P (op0)
2143 || SYMBOL_REF_FLAG (op0))))
2144 && GET_CODE (op1) == CONST_INT)
2146 current_function_uses_pic_offset_table = 1;
2147 new = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, op0), 7);
2148 new = gen_rtx_PLUS (VOIDmode, new, op1);
2149 new = gen_rtx_CONST (VOIDmode, new);
2150 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
2152 if (reg != 0)
2154 emit_move_insn (reg, new);
2155 new = reg;
2158 else
2160 base = legitimize_pic_address (XEXP (addr, 0), reg);
2161 new = legitimize_pic_address (XEXP (addr, 1),
2162 base == reg ? NULL_RTX : reg);
2164 if (GET_CODE (new) == CONST_INT)
2165 new = plus_constant (base, INTVAL (new));
2166 else
2168 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
2170 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
2171 new = XEXP (new, 1);
2173 new = gen_rtx_PLUS (Pmode, base, new);
2178 return new;
2181 /* Try machine-dependent ways of modifying an illegitimate address
2182 to be legitimate. If we find one, return the new, valid address.
2183 This macro is used in only one place: `memory_address' in explow.c.
2185 OLDX is the address as it was before break_out_memory_refs was called.
2186 In some cases it is useful to look at this to decide what needs to be done.
2188 MODE and WIN are passed so that this macro can use
2189 GO_IF_LEGITIMATE_ADDRESS.
2191 It is always safe for this macro to do nothing. It exists to recognize
2192 opportunities to optimize the output.
2194 For the 80386, we handle X+REG by loading X into a register R and
2195 using R+REG. R will go in a general reg and indexing will be used.
2196 However, if REG is a broken-out memory address or multiplication,
2197 nothing needs to be done because REG can certainly go in a general reg.
2199 When -fpic is used, special handling is needed for symbolic references.
2200 See comments by legitimize_pic_address in i386.c for details. */
2203 legitimize_address (x, oldx, mode)
2204 register rtx x;
2205 register rtx oldx ATTRIBUTE_UNUSED;
2206 enum machine_mode mode;
2208 int changed = 0;
2209 unsigned log;
2211 if (TARGET_DEBUG_ADDR)
2213 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
2214 GET_MODE_NAME (mode));
2215 debug_rtx (x);
2218 if (flag_pic && SYMBOLIC_CONST (x))
2219 return legitimize_pic_address (x, 0);
2221 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
2222 if (GET_CODE (x) == ASHIFT
2223 && GET_CODE (XEXP (x, 1)) == CONST_INT
2224 && (log = (unsigned)exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
2226 changed = 1;
2227 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
2228 GEN_INT (1 << log));
2231 if (GET_CODE (x) == PLUS)
2233 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
2235 if (GET_CODE (XEXP (x, 0)) == ASHIFT
2236 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
2237 && (log = (unsigned)exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
2239 changed = 1;
2240 XEXP (x, 0) = gen_rtx_MULT (Pmode,
2241 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
2242 GEN_INT (1 << log));
2245 if (GET_CODE (XEXP (x, 1)) == ASHIFT
2246 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
2247 && (log = (unsigned)exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
2249 changed = 1;
2250 XEXP (x, 1) = gen_rtx_MULT (Pmode,
2251 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
2252 GEN_INT (1 << log));
2255 /* Put multiply first if it isn't already. */
2256 if (GET_CODE (XEXP (x, 1)) == MULT)
2258 rtx tmp = XEXP (x, 0);
2259 XEXP (x, 0) = XEXP (x, 1);
2260 XEXP (x, 1) = tmp;
2261 changed = 1;
2264 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
2265 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
2266 created by virtual register instantiation, register elimination, and
2267 similar optimizations. */
2268 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
2270 changed = 1;
2271 x = gen_rtx_PLUS (Pmode,
2272 gen_rtx_PLUS (Pmode, XEXP (x, 0),
2273 XEXP (XEXP (x, 1), 0)),
2274 XEXP (XEXP (x, 1), 1));
2277 /* Canonicalize
2278 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
2279 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
2280 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
2281 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
2282 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
2283 && CONSTANT_P (XEXP (x, 1)))
2285 rtx constant;
2286 rtx other = NULL_RTX;
2288 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
2290 constant = XEXP (x, 1);
2291 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
2293 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
2295 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
2296 other = XEXP (x, 1);
2298 else
2299 constant = 0;
2301 if (constant)
2303 changed = 1;
2304 x = gen_rtx_PLUS (Pmode,
2305 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
2306 XEXP (XEXP (XEXP (x, 0), 1), 0)),
2307 plus_constant (other, INTVAL (constant)));
2311 if (changed && legitimate_address_p (mode, x, FALSE))
2312 return x;
2314 if (GET_CODE (XEXP (x, 0)) == MULT)
2316 changed = 1;
2317 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
2320 if (GET_CODE (XEXP (x, 1)) == MULT)
2322 changed = 1;
2323 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
2326 if (changed
2327 && GET_CODE (XEXP (x, 1)) == REG
2328 && GET_CODE (XEXP (x, 0)) == REG)
2329 return x;
2331 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
2333 changed = 1;
2334 x = legitimize_pic_address (x, 0);
2337 if (changed && legitimate_address_p (mode, x, FALSE))
2338 return x;
2340 if (GET_CODE (XEXP (x, 0)) == REG)
2342 register rtx temp = gen_reg_rtx (Pmode);
2343 register rtx val = force_operand (XEXP (x, 1), temp);
2344 if (val != temp)
2345 emit_move_insn (temp, val);
2347 XEXP (x, 1) = temp;
2348 return x;
2351 else if (GET_CODE (XEXP (x, 1)) == REG)
2353 register rtx temp = gen_reg_rtx (Pmode);
2354 register rtx val = force_operand (XEXP (x, 0), temp);
2355 if (val != temp)
2356 emit_move_insn (temp, val);
2358 XEXP (x, 0) = temp;
2359 return x;
2363 return x;
2366 /* Print an integer constant expression in assembler syntax. Addition
2367 and subtraction are the only arithmetic that may appear in these
2368 expressions. FILE is the stdio stream to write to, X is the rtx, and
2369 CODE is the operand print code from the output string. */
2371 static void
2372 output_pic_addr_const (file, x, code)
2373 FILE *file;
2374 rtx x;
2375 int code;
2377 char buf[256];
2379 switch (GET_CODE (x))
2381 case PC:
2382 if (flag_pic)
2383 putc ('.', file);
2384 else
2385 abort ();
2386 break;
2388 case SYMBOL_REF:
2389 assemble_name (file, XSTR (x, 0));
2390 if (code == 'P' && ! SYMBOL_REF_FLAG (x))
2391 fputs ("@PLT", file);
2392 break;
2394 case LABEL_REF:
2395 x = XEXP (x, 0);
2396 /* FALLTHRU */
2397 case CODE_LABEL:
2398 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
2399 assemble_name (asm_out_file, buf);
2400 break;
2402 case CONST_INT:
2403 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
2404 break;
2406 case CONST:
2407 /* This used to output parentheses around the expression,
2408 but that does not work on the 386 (either ATT or BSD assembler). */
2409 output_pic_addr_const (file, XEXP (x, 0), code);
2410 break;
2412 case CONST_DOUBLE:
2413 if (GET_MODE (x) == VOIDmode)
2415 /* We can use %d if the number is <32 bits and positive. */
2416 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
2417 fprintf (file, "0x%lx%08lx",
2418 (unsigned long) CONST_DOUBLE_HIGH (x),
2419 (unsigned long) CONST_DOUBLE_LOW (x));
2420 else
2421 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
2423 else
2424 /* We can't handle floating point constants;
2425 PRINT_OPERAND must handle them. */
2426 output_operand_lossage ("floating constant misused");
2427 break;
2429 case PLUS:
2430 /* Some assemblers need integer constants to appear first. */
2431 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
2433 output_pic_addr_const (file, XEXP (x, 0), code);
2434 putc ('+', file);
2435 output_pic_addr_const (file, XEXP (x, 1), code);
2437 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
2439 output_pic_addr_const (file, XEXP (x, 1), code);
2440 putc ('+', file);
2441 output_pic_addr_const (file, XEXP (x, 0), code);
2443 else
2444 abort ();
2445 break;
2447 case MINUS:
2448 putc (ASSEMBLER_DIALECT ? '(' : '[', file);
2449 output_pic_addr_const (file, XEXP (x, 0), code);
2450 putc ('-', file);
2451 output_pic_addr_const (file, XEXP (x, 1), code);
2452 putc (ASSEMBLER_DIALECT ? ')' : ']', file);
2453 break;
2455 case UNSPEC:
2456 if (XVECLEN (x, 0) != 1)
2457 abort ();
2458 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
2459 switch (XINT (x, 1))
2461 case 6:
2462 fputs ("@GOT", file);
2463 break;
2464 case 7:
2465 fputs ("@GOTOFF", file);
2466 break;
2467 case 8:
2468 fputs ("@PLT", file);
2469 break;
2470 default:
2471 output_operand_lossage ("invalid UNSPEC as operand");
2472 break;
2474 break;
2476 default:
2477 output_operand_lossage ("invalid expression as operand");
2481 static void
2482 put_condition_code (code, mode, reverse, fp, file)
2483 enum rtx_code code;
2484 enum machine_mode mode;
2485 int reverse, fp;
2486 FILE *file;
2488 const char *suffix;
2490 if (reverse)
2491 code = reverse_condition (code);
2493 switch (code)
2495 case EQ:
2496 suffix = "e";
2497 break;
2498 case NE:
2499 suffix = "ne";
2500 break;
2501 case GT:
2502 if (mode == CCNOmode)
2503 abort ();
2504 suffix = "g";
2505 break;
2506 case GTU:
2507 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
2508 Those same assemblers have the same but opposite losage on cmov. */
2509 suffix = fp ? "nbe" : "a";
2510 break;
2511 case LT:
2512 if (mode == CCNOmode)
2513 suffix = "s";
2514 else
2515 suffix = "l";
2516 break;
2517 case LTU:
2518 suffix = "b";
2519 break;
2520 case GE:
2521 if (mode == CCNOmode)
2522 suffix = "ns";
2523 else
2524 suffix = "ge";
2525 break;
2526 case GEU:
2527 /* ??? As above. */
2528 suffix = fp ? "nb" : "ae";
2529 break;
2530 case LE:
2531 if (mode == CCNOmode)
2532 abort ();
2533 suffix = "le";
2534 break;
2535 case LEU:
2536 suffix = "be";
2537 break;
2538 default:
2539 abort ();
2541 fputs (suffix, file);
2544 void
2545 print_reg (x, code, file)
2546 rtx x;
2547 int code;
2548 FILE *file;
2550 if (REGNO (x) == ARG_POINTER_REGNUM
2551 || REGNO (x) == FLAGS_REG
2552 || REGNO (x) == FPSR_REG)
2553 abort ();
2555 if (ASSEMBLER_DIALECT == 0 || USER_LABEL_PREFIX[0] == 0)
2556 putc ('%', file);
2558 if (code == 'w')
2559 code = 2;
2560 else if (code == 'b')
2561 code = 1;
2562 else if (code == 'k')
2563 code = 4;
2564 else if (code == 'y')
2565 code = 3;
2566 else if (code == 'h')
2567 code = 0;
2568 else
2569 code = GET_MODE_SIZE (GET_MODE (x));
2571 switch (code)
2573 case 3:
2574 if (STACK_TOP_P (x))
2576 fputs ("st(0)", file);
2577 break;
2579 /* FALLTHRU */
2580 case 4:
2581 case 8:
2582 case 12:
2583 if (! FP_REG_P (x))
2584 putc ('e', file);
2585 /* FALLTHRU */
2586 case 2:
2587 fputs (hi_reg_name[REGNO (x)], file);
2588 break;
2589 case 1:
2590 fputs (qi_reg_name[REGNO (x)], file);
2591 break;
2592 case 0:
2593 fputs (qi_high_reg_name[REGNO (x)], file);
2594 break;
2595 default:
2596 abort ();
2600 /* Meaning of CODE:
2601 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
2602 C -- print opcode suffix for set/cmov insn.
2603 c -- like C, but print reversed condition
2604 R -- print the prefix for register names.
2605 z -- print the opcode suffix for the size of the current operand.
2606 * -- print a star (in certain assembler syntax)
2607 w -- print the operand as if it's a "word" (HImode) even if it isn't.
2608 s -- print a shift double count, followed by the assemblers argument
2609 delimiter.
2610 b -- print the QImode name of the register for the indicated operand.
2611 %b0 would print %al if operands[0] is reg 0.
2612 w -- likewise, print the HImode name of the register.
2613 k -- likewise, print the SImode name of the register.
2614 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
2615 y -- print "st(0)" instead of "st" as a register. */
2617 void
2618 print_operand (file, x, code)
2619 FILE *file;
2620 rtx x;
2621 int code;
2623 if (code)
2625 switch (code)
2627 case '*':
2628 if (ASSEMBLER_DIALECT == 0)
2629 putc ('*', file);
2630 return;
2632 case 'L':
2633 if (ASSEMBLER_DIALECT == 0)
2634 putc ('l', file);
2635 return;
2637 case 'W':
2638 if (ASSEMBLER_DIALECT == 0)
2639 putc ('w', file);
2640 return;
2642 case 'B':
2643 if (ASSEMBLER_DIALECT == 0)
2644 putc ('b', file);
2645 return;
2647 case 'Q':
2648 if (ASSEMBLER_DIALECT == 0)
2649 putc ('l', file);
2650 return;
2652 case 'S':
2653 if (ASSEMBLER_DIALECT == 0)
2654 putc ('s', file);
2655 return;
2657 case 'T':
2658 if (ASSEMBLER_DIALECT == 0)
2659 putc ('t', file);
2660 return;
2662 case 'z':
2663 /* 387 opcodes don't get size suffixes if the operands are
2664 registers. */
2666 if (STACK_REG_P (x))
2667 return;
2669 /* Intel syntax has no truck with instruction suffixes. */
2670 if (ASSEMBLER_DIALECT != 0)
2671 return;
2673 /* this is the size of op from size of operand */
2674 switch (GET_MODE_SIZE (GET_MODE (x)))
2676 case 1:
2677 putc ('b', file);
2678 return;
2680 case 2:
2681 putc ('w', file);
2682 return;
2684 case 4:
2685 if (GET_MODE (x) == SFmode)
2687 putc ('s', file);
2688 return;
2690 else
2691 putc ('l', file);
2692 return;
2694 case 12:
2695 putc ('t', file);
2696 return;
2698 case 8:
2699 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
2701 #ifdef GAS_MNEMONICS
2702 putc ('q', file);
2703 #else
2704 putc ('l', file);
2705 putc ('l', file);
2706 #endif
2708 else
2709 putc ('l', file);
2710 return;
2713 case 'b':
2714 case 'w':
2715 case 'k':
2716 case 'h':
2717 case 'y':
2718 case 'X':
2719 case 'P':
2720 break;
2722 case 's':
2723 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
2725 PRINT_OPERAND (file, x, 0);
2726 putc (',', file);
2728 return;
2730 case 'C':
2731 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
2732 return;
2733 case 'F':
2734 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
2735 return;
2737 /* Like above, but reverse condition */
2738 case 'c':
2739 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
2740 return;
2741 case 'f':
2742 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
2743 return;
2745 default:
2747 char str[50];
2748 sprintf (str, "invalid operand code `%c'", code);
2749 output_operand_lossage (str);
2754 if (GET_CODE (x) == REG)
2756 PRINT_REG (x, code, file);
2759 else if (GET_CODE (x) == MEM)
2761 /* No `byte ptr' prefix for call instructions. */
2762 if (ASSEMBLER_DIALECT != 0 && code != 'X' && code != 'P')
2764 char * size;
2765 switch (GET_MODE_SIZE (GET_MODE (x)))
2767 case 1: size = "BYTE"; break;
2768 case 2: size = "WORD"; break;
2769 case 4: size = "DWORD"; break;
2770 case 8: size = "QWORD"; break;
2771 case 12: size = "XWORD"; break;
2772 default:
2773 abort();
2775 fputs (size, file);
2776 fputs (" PTR ", file);
2779 x = XEXP (x, 0);
2780 if (flag_pic && CONSTANT_ADDRESS_P (x))
2781 output_pic_addr_const (file, x, code);
2782 else
2783 output_address (x);
2786 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
2788 REAL_VALUE_TYPE r;
2789 long l;
2791 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
2792 REAL_VALUE_TO_TARGET_SINGLE (r, l);
2794 if (ASSEMBLER_DIALECT == 0)
2795 putc ('$', file);
2796 fprintf (file, "0x%lx", l);
2799 /* These float cases don't actually occur as immediate operands. */
2800 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
2802 REAL_VALUE_TYPE r;
2803 char dstr[30];
2805 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
2806 REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr);
2807 fprintf (file, "%s", dstr);
2810 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == XFmode)
2812 REAL_VALUE_TYPE r;
2813 char dstr[30];
2815 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
2816 REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr);
2817 fprintf (file, "%s", dstr);
2819 else
2821 if (code != 'P')
2823 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
2825 if (ASSEMBLER_DIALECT == 0)
2826 putc ('$', file);
2828 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
2829 || GET_CODE (x) == LABEL_REF)
2831 if (ASSEMBLER_DIALECT == 0)
2832 putc ('$', file);
2833 else
2834 fputs ("OFFSET FLAT:", file);
2837 if (GET_CODE (x) == CONST_INT)
2838 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
2839 else if (flag_pic)
2840 output_pic_addr_const (file, x, code);
2841 else
2842 output_addr_const (file, x);
2846 /* Print a memory operand whose address is ADDR. */
2848 void
2849 print_operand_address (file, addr)
2850 FILE *file;
2851 register rtx addr;
2853 struct ix86_address parts;
2854 rtx base, index, disp;
2855 int scale;
2857 if (! ix86_decompose_address (addr, &parts))
2858 abort ();
2860 base = parts.base;
2861 index = parts.index;
2862 disp = parts.disp;
2863 scale = parts.scale;
2865 if (!base && !index)
2867 /* Displacement only requires special attention. */
2869 if (GET_CODE (disp) == CONST_INT)
2871 if (ASSEMBLER_DIALECT != 0)
2872 fputs ("ds:", file);
2873 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (addr));
2875 else if (flag_pic)
2876 output_pic_addr_const (file, addr, 0);
2877 else
2878 output_addr_const (file, addr);
2880 else
2882 if (ASSEMBLER_DIALECT == 0)
2884 if (disp)
2886 if (flag_pic)
2887 output_pic_addr_const (file, disp, 0);
2888 else if (GET_CODE (disp) == LABEL_REF)
2889 output_asm_label (disp);
2890 else
2891 output_addr_const (file, disp);
2894 putc ('(', file);
2895 if (base)
2896 PRINT_REG (base, 0, file);
2897 if (index)
2899 putc (',', file);
2900 PRINT_REG (index, 0, file);
2901 if (scale != 1)
2902 fprintf (file, ",%d", scale);
2904 putc (')', file);
2906 else
2908 rtx offset = NULL_RTX;
2910 if (disp)
2912 /* Pull out the offset of a symbol; print any symbol itself. */
2913 if (GET_CODE (disp) == CONST
2914 && GET_CODE (XEXP (disp, 0)) == PLUS
2915 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
2917 offset = XEXP (XEXP (disp, 0), 1);
2918 disp = gen_rtx_CONST (VOIDmode,
2919 XEXP (XEXP (disp, 0), 0));
2922 if (flag_pic)
2923 output_pic_addr_const (file, disp, 0);
2924 else if (GET_CODE (disp) == LABEL_REF)
2925 output_asm_label (disp);
2926 else if (GET_CODE (disp) == CONST_INT)
2927 offset = disp;
2928 else
2929 output_addr_const (file, disp);
2932 putc ('[', file);
2933 if (base)
2935 PRINT_REG (base, 0, file);
2936 if (offset)
2938 if (INTVAL (offset) >= 0)
2939 putc ('+', file);
2940 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
2943 else if (offset)
2944 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
2945 else
2946 putc ('0', file);
2948 if (index)
2950 putc ('+', file);
2951 PRINT_REG (index, 0, file);
2952 if (scale != 1)
2953 fprintf (file, "*%d", scale);
2955 putc (']', file);
2960 /* Split one or more DImode RTL references into pairs of SImode
2961 references. The RTL can be REG, offsettable MEM, integer constant, or
2962 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
2963 split and "num" is its length. lo_half and hi_half are output arrays
2964 that parallel "operands". */
2966 void
2967 split_di (operands, num, lo_half, hi_half)
2968 rtx operands[];
2969 int num;
2970 rtx lo_half[], hi_half[];
2972 while (num--)
2974 rtx op = operands[num];
2975 if (CONSTANT_P (op))
2976 split_double (op, &lo_half[num], &hi_half[num]);
2977 else if (! reload_completed)
2979 lo_half[num] = gen_lowpart (SImode, op);
2980 hi_half[num] = gen_highpart (SImode, op);
2982 else if (GET_CODE (op) == REG)
2984 lo_half[num] = gen_rtx_REG (SImode, REGNO (op));
2985 hi_half[num] = gen_rtx_REG (SImode, REGNO (op) + 1);
2987 else if (offsettable_memref_p (op))
2989 rtx lo_addr = XEXP (op, 0);
2990 rtx hi_addr = XEXP (adj_offsettable_operand (op, 4), 0);
2991 lo_half[num] = change_address (op, SImode, lo_addr);
2992 hi_half[num] = change_address (op, SImode, hi_addr);
2994 else
2995 abort();
2999 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
3000 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
3001 is the expression of the binary operation. The output may either be
3002 emitted here, or returned to the caller, like all output_* functions.
3004 There is no guarantee that the operands are the same mode, as they
3005 might be within FLOAT or FLOAT_EXTEND expressions. */
3007 char *
3008 output_387_binary_op (insn, operands)
3009 rtx insn;
3010 rtx *operands;
3012 static char buf[100];
3013 rtx temp;
3014 char *p;
3016 switch (GET_CODE (operands[3]))
3018 case PLUS:
3019 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
3020 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
3021 p = "fiadd";
3022 else
3023 p = "fadd";
3024 break;
3026 case MINUS:
3027 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
3028 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
3029 p = "fisub";
3030 else
3031 p = "fsub";
3032 break;
3034 case MULT:
3035 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
3036 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
3037 p = "fimul";
3038 else
3039 p = "fmul";
3040 break;
3042 case DIV:
3043 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
3044 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
3045 p = "fidiv";
3046 else
3047 p = "fdiv";
3048 break;
3050 default:
3051 abort ();
3054 strcpy (buf, p);
3056 switch (GET_CODE (operands[3]))
3058 case MULT:
3059 case PLUS:
3060 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
3062 temp = operands[2];
3063 operands[2] = operands[1];
3064 operands[1] = temp;
3067 if (GET_CODE (operands[2]) == MEM)
3069 p = "%z2\t%2";
3070 break;
3073 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
3075 if (STACK_TOP_P (operands[0]))
3076 p = "p\t{%0,%2|%2, %0}";
3077 else
3078 p = "p\t{%2,%0|%0, %2}";
3079 break;
3082 if (STACK_TOP_P (operands[0]))
3083 p = "\t{%y2,%0|%0, %y2}";
3084 else
3085 p = "\t{%2,%0|%0, %2}";
3086 break;
3088 case MINUS:
3089 case DIV:
3090 if (GET_CODE (operands[1]) == MEM)
3092 p = "r%z1\t%1";
3093 break;
3096 if (GET_CODE (operands[2]) == MEM)
3098 p = "%z2\t%2";
3099 break;
3102 if (! STACK_REG_P (operands[1]) || ! STACK_REG_P (operands[2]))
3103 abort ();
3105 /* Note that the Unixware assembler, and the AT&T assembler before
3106 that, are confusingly not reversed from Intel syntax in this
3107 area. */
3108 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
3110 if (STACK_TOP_P (operands[0]))
3111 p = "p\t%0,%2";
3112 else
3113 p = "rp\t%2,%0";
3114 break;
3117 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
3119 if (STACK_TOP_P (operands[0]))
3120 p = "rp\t%0,%1";
3121 else
3122 p = "p\t%1,%0";
3123 break;
3126 if (STACK_TOP_P (operands[0]))
3128 if (STACK_TOP_P (operands[1]))
3129 p = "\t%y2,%0";
3130 else
3131 p = "r\t%y1,%0";
3132 break;
3134 else if (STACK_TOP_P (operands[1]))
3135 p = "\t%1,%0";
3136 else
3137 p = "r\t%2,%0";
3138 break;
3140 default:
3141 abort ();
3144 strcat (buf, p);
3145 return buf;
3148 /* Output code for INSN to convert a float to a signed int. OPERANDS
3149 are the insn operands. The output may be [SD]Imode and the input
3150 operand may be [SDX]Fmode. */
3152 char *
3153 output_fix_trunc (insn, operands)
3154 rtx insn;
3155 rtx *operands;
3157 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
3158 int dimode_p = GET_MODE (operands[0]) == DImode;
3159 rtx xops[4];
3161 /* Jump through a hoop or two for DImode, since the hardware has no
3162 non-popping instruction. We used to do this a different way, but
3163 that was somewhat fragile and broke with post-reload splitters. */
3164 if (dimode_p && !stack_top_dies)
3165 output_asm_insn ("fld\t%y1", operands);
3167 if (! STACK_TOP_P (operands[1]))
3168 abort ();
3170 xops[0] = GEN_INT (12);
3171 xops[1] = adj_offsettable_operand (operands[2], 1);
3172 xops[1] = change_address (xops[1], QImode, NULL_RTX);
3174 xops[2] = operands[0];
3175 if (GET_CODE (operands[0]) != MEM)
3176 xops[2] = operands[3];
3178 output_asm_insn ("fnstcw\t%2", operands);
3179 output_asm_insn ("mov{l}\t{%2, %4|%4, %2}", operands);
3180 output_asm_insn ("mov{b}\t{%0, %1|%1, %0}", xops);
3181 output_asm_insn ("fldcw\t%2", operands);
3182 output_asm_insn ("mov{l}\t{%4, %2|%2, %4}", operands);
3184 if (stack_top_dies || dimode_p)
3185 output_asm_insn ("fistp%z2\t%2", xops);
3186 else
3187 output_asm_insn ("fist%z2\t%2", xops);
3189 output_asm_insn ("fldcw\t%2", operands);
3191 if (GET_CODE (operands[0]) != MEM)
3193 if (dimode_p)
3195 split_di (operands+0, 1, xops+0, xops+1);
3196 split_di (operands+3, 1, xops+2, xops+3);
3197 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
3198 output_asm_insn ("mov{l}\t{%3, %1|%1, %3}", xops);
3200 else
3201 output_asm_insn ("mov{l}\t{%3,%0|%0, %3}", operands);
3204 return "";
3207 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
3208 should be used and 2 when fnstsw should be used. UNORDERED_P is true
3209 when fucom should be used. */
3211 char *
3212 output_fp_compare (insn, operands, eflags_p, unordered_p)
3213 rtx insn;
3214 rtx *operands;
3215 int eflags_p, unordered_p;
3217 int stack_top_dies;
3218 rtx cmp_op0 = operands[0];
3219 rtx cmp_op1 = operands[1];
3221 if (eflags_p == 2)
3223 cmp_op0 = cmp_op1;
3224 cmp_op1 = operands[2];
3227 if (! STACK_TOP_P (cmp_op0))
3228 abort ();
3230 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
3232 if (STACK_REG_P (cmp_op1)
3233 && stack_top_dies
3234 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
3235 && REGNO (cmp_op1) != FIRST_STACK_REG)
3237 /* If both the top of the 387 stack dies, and the other operand
3238 is also a stack register that dies, then this must be a
3239 `fcompp' float compare */
3241 if (eflags_p == 1)
3243 /* There is no double popping fcomi variant. Fortunately,
3244 eflags is immune from the fstp's cc clobbering. */
3245 if (unordered_p)
3246 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
3247 else
3248 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
3249 return "fstp\t%y0";
3251 else
3253 if (eflags_p == 2)
3255 if (unordered_p)
3256 return "fucompp\n\tfnstsw\t%0";
3257 else
3258 return "fcompp\n\tfnstsw\t%0";
3260 else
3262 if (unordered_p)
3263 return "fucompp";
3264 else
3265 return "fcompp";
3269 else
3271 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
3273 static char * const alt[24] =
3275 "fcom%z1\t%y1",
3276 "fcomp%z1\t%y1",
3277 "fucom%z1\t%y1",
3278 "fucomp%z1\t%y1",
3280 "ficom%z1\t%y1",
3281 "ficomp%z1\t%y1",
3282 NULL,
3283 NULL,
3285 "fcomi\t{%y1, %0|%0, %y1}",
3286 "fcomip\t{%y1, %0|%0, %y1}",
3287 "fucomi\t{%y1, %0|%0, %y1}",
3288 "fucomip\t{%y1, %0|%0, %y1}",
3290 NULL,
3291 NULL,
3292 NULL,
3293 NULL,
3295 "fcom%z2\t%y2\n\tfnstsw\t%0",
3296 "fcomp%z2\t%y2\n\tfnstsw\t%0",
3297 "fucom%z2\t%y2\n\tfnstsw\t%0",
3298 "fucomp%z2\t%y2\n\tfnstsw\t%0",
3300 "ficom%z2\t%y2\n\tfnstsw\t%0",
3301 "ficomp%z2\t%y2\n\tfnstsw\t%0",
3302 NULL,
3303 NULL
3306 int mask;
3307 char *ret;
3309 mask = eflags_p << 3;
3310 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
3311 mask |= unordered_p << 1;
3312 mask |= stack_top_dies;
3314 if (mask >= 24)
3315 abort ();
3316 ret = alt[mask];
3317 if (ret == NULL)
3318 abort ();
3320 return ret;
3324 /* Output assembler code to FILE to initialize basic-block profiling.
3326 If profile_block_flag == 2
3328 Output code to call the subroutine `__bb_init_trace_func'
3329 and pass two parameters to it. The first parameter is
3330 the address of a block allocated in the object module.
3331 The second parameter is the number of the first basic block
3332 of the function.
3334 The name of the block is a local symbol made with this statement:
3336 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 0);
3338 Of course, since you are writing the definition of
3339 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
3340 can take a short cut in the definition of this macro and use the
3341 name that you know will result.
3343 The number of the first basic block of the function is
3344 passed to the macro in BLOCK_OR_LABEL.
3346 If described in a virtual assembler language the code to be
3347 output looks like:
3349 parameter1 <- LPBX0
3350 parameter2 <- BLOCK_OR_LABEL
3351 call __bb_init_trace_func
3353 else if profile_block_flag != 0
3355 Output code to call the subroutine `__bb_init_func'
3356 and pass one single parameter to it, which is the same
3357 as the first parameter to `__bb_init_trace_func'.
3359 The first word of this parameter is a flag which will be nonzero if
3360 the object module has already been initialized. So test this word
3361 first, and do not call `__bb_init_func' if the flag is nonzero.
3362 Note: When profile_block_flag == 2 the test need not be done
3363 but `__bb_init_trace_func' *must* be called.
3365 BLOCK_OR_LABEL may be used to generate a label number as a
3366 branch destination in case `__bb_init_func' will not be called.
3368 If described in a virtual assembler language the code to be
3369 output looks like:
3371 cmp (LPBX0),0
3372 jne local_label
3373 parameter1 <- LPBX0
3374 call __bb_init_func
3375 local_label:
3378 void
3379 ix86_output_function_block_profiler (file, block_or_label)
3380 FILE *file;
3381 int block_or_label;
3383 static int num_func = 0;
3384 rtx xops[8];
3385 char block_table[80], false_label[80];
3387 ASM_GENERATE_INTERNAL_LABEL (block_table, "LPBX", 0);
3389 xops[1] = gen_rtx_SYMBOL_REF (VOIDmode, block_table);
3390 xops[5] = stack_pointer_rtx;
3391 xops[7] = gen_rtx_REG (Pmode, 0); /* eax */
3393 CONSTANT_POOL_ADDRESS_P (xops[1]) = TRUE;
3395 switch (profile_block_flag)
3397 case 2:
3398 xops[2] = GEN_INT (block_or_label);
3399 xops[3] = gen_rtx_MEM (Pmode,
3400 gen_rtx_SYMBOL_REF (VOIDmode, "__bb_init_trace_func"));
3401 xops[6] = GEN_INT (8);
3403 output_asm_insn ("push{l}\t%2", xops);
3404 if (!flag_pic)
3405 output_asm_insn ("push{l}\t%1", xops);
3406 else
3408 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a1}", xops);
3409 output_asm_insn ("push{l}\t%7", xops);
3411 output_asm_insn ("call\t%P3", xops);
3412 output_asm_insn ("add{l}\t{%6, %5|%5, %6}", xops);
3413 break;
3415 default:
3416 ASM_GENERATE_INTERNAL_LABEL (false_label, "LPBZ", num_func);
3418 xops[0] = const0_rtx;
3419 xops[2] = gen_rtx_MEM (Pmode,
3420 gen_rtx_SYMBOL_REF (VOIDmode, false_label));
3421 xops[3] = gen_rtx_MEM (Pmode,
3422 gen_rtx_SYMBOL_REF (VOIDmode, "__bb_init_func"));
3423 xops[4] = gen_rtx_MEM (Pmode, xops[1]);
3424 xops[6] = GEN_INT (4);
3426 CONSTANT_POOL_ADDRESS_P (xops[2]) = TRUE;
3428 output_asm_insn ("cmp{l}\t{%0, %4|%4, %0}", xops);
3429 output_asm_insn ("jne\t%2", xops);
3431 if (!flag_pic)
3432 output_asm_insn ("push{l}\t%1", xops);
3433 else
3435 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a2}", xops);
3436 output_asm_insn ("push{l}\t%7", xops);
3438 output_asm_insn ("call\t%P3", xops);
3439 output_asm_insn ("add{l}\t{%6, %5|%5, %6}", xops);
3440 ASM_OUTPUT_INTERNAL_LABEL (file, "LPBZ", num_func);
3441 num_func++;
3442 break;
3446 /* Output assembler code to FILE to increment a counter associated
3447 with basic block number BLOCKNO.
3449 If profile_block_flag == 2
3451 Output code to initialize the global structure `__bb' and
3452 call the function `__bb_trace_func' which will increment the
3453 counter.
3455 `__bb' consists of two words. In the first word the number
3456 of the basic block has to be stored. In the second word
3457 the address of a block allocated in the object module
3458 has to be stored.
3460 The basic block number is given by BLOCKNO.
3462 The address of the block is given by the label created with
3464 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 0);
3466 by FUNCTION_BLOCK_PROFILER.
3468 Of course, since you are writing the definition of
3469 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
3470 can take a short cut in the definition of this macro and use the
3471 name that you know will result.
3473 If described in a virtual assembler language the code to be
3474 output looks like:
3476 move BLOCKNO -> (__bb)
3477 move LPBX0 -> (__bb+4)
3478 call __bb_trace_func
3480 Note that function `__bb_trace_func' must not change the
3481 machine state, especially the flag register. To grant
3482 this, you must output code to save and restore registers
3483 either in this macro or in the macros MACHINE_STATE_SAVE
3484 and MACHINE_STATE_RESTORE. The last two macros will be
3485 used in the function `__bb_trace_func', so you must make
3486 sure that the function prologue does not change any
3487 register prior to saving it with MACHINE_STATE_SAVE.
3489 else if profile_block_flag != 0
3491 Output code to increment the counter directly.
3492 Basic blocks are numbered separately from zero within each
3493 compiled object module. The count associated with block number
3494 BLOCKNO is at index BLOCKNO in an array of words; the name of
3495 this array is a local symbol made with this statement:
3497 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 2);
3499 Of course, since you are writing the definition of
3500 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
3501 can take a short cut in the definition of this macro and use the
3502 name that you know will result.
3504 If described in a virtual assembler language the code to be
3505 output looks like:
3507 inc (LPBX2+4*BLOCKNO)
3510 void
3511 ix86_output_block_profiler (file, blockno)
3512 FILE *file ATTRIBUTE_UNUSED;
3513 int blockno;
3515 rtx xops[8], cnt_rtx;
3516 char counts[80];
3517 char *block_table = counts;
3519 switch (profile_block_flag)
3521 case 2:
3522 ASM_GENERATE_INTERNAL_LABEL (block_table, "LPBX", 0);
3524 xops[1] = gen_rtx_SYMBOL_REF (VOIDmode, block_table);
3525 xops[2] = GEN_INT (blockno);
3526 xops[3] = gen_rtx_MEM (Pmode,
3527 gen_rtx_SYMBOL_REF (VOIDmode, "__bb_trace_func"));
3528 xops[4] = gen_rtx_SYMBOL_REF (VOIDmode, "__bb");
3529 xops[5] = plus_constant (xops[4], 4);
3530 xops[0] = gen_rtx_MEM (SImode, xops[4]);
3531 xops[6] = gen_rtx_MEM (SImode, xops[5]);
3533 CONSTANT_POOL_ADDRESS_P (xops[1]) = TRUE;
3535 output_asm_insn ("pushf", xops);
3536 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
3537 if (flag_pic)
3539 xops[7] = gen_rtx_REG (Pmode, 0); /* eax */
3540 output_asm_insn ("push{l}\t%7", xops);
3541 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a1}", xops);
3542 output_asm_insn ("mov{l}\t{%7, %6|%6, %7}", xops);
3543 output_asm_insn ("pop{l}\t%7", xops);
3545 else
3546 output_asm_insn ("mov{l}\t{%1, %6|%6, %1}", xops);
3547 output_asm_insn ("call\t%P3", xops);
3548 output_asm_insn ("popf", xops);
3550 break;
3552 default:
3553 ASM_GENERATE_INTERNAL_LABEL (counts, "LPBX", 2);
3554 cnt_rtx = gen_rtx_SYMBOL_REF (VOIDmode, counts);
3555 SYMBOL_REF_FLAG (cnt_rtx) = TRUE;
3557 if (blockno)
3558 cnt_rtx = plus_constant (cnt_rtx, blockno*4);
3560 if (flag_pic)
3561 cnt_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, cnt_rtx);
3563 xops[0] = gen_rtx_MEM (SImode, cnt_rtx);
3564 output_asm_insn ("inc{l}\t%0", xops);
3566 break;
3570 void
3571 ix86_expand_move (mode, operands)
3572 enum machine_mode mode;
3573 rtx operands[];
3575 int strict = (reload_in_progress || reload_completed);
3576 int want_clobber = 0;
3577 rtx insn;
3579 if (flag_pic && mode == Pmode && symbolic_operand (operands[1], Pmode))
3581 /* Emit insns to move operands[1] into operands[0]. */
3583 if (GET_CODE (operands[0]) == MEM)
3584 operands[1] = force_reg (Pmode, operands[1]);
3585 else
3587 rtx temp = operands[0];
3588 if (GET_CODE (temp) != REG)
3589 temp = gen_reg_rtx (Pmode);
3590 temp = legitimize_pic_address (operands[1], temp);
3591 if (temp == operands[0])
3592 return;
3593 operands[1] = temp;
3596 else
3598 if (GET_CODE (operands[0]) == MEM && GET_CODE (operands[1]) == MEM)
3599 operands[1] = force_reg (mode, operands[1]);
3601 if (FLOAT_MODE_P (mode))
3603 /* If we are loading a floating point constant that isn't 0 or 1
3604 into a register, force the value to memory now, since we'll
3605 get better code out the back end. */
3607 if (strict)
3609 else if (GET_CODE (operands[0]) == MEM)
3610 operands[1] = force_reg (mode, operands[1]);
3611 else if (GET_CODE (operands[1]) == CONST_DOUBLE
3612 && ! standard_80387_constant_p (operands[1]))
3613 operands[1] = validize_mem (force_const_mem (mode, operands[1]));
3615 else
3617 /* Try to guess when a cc clobber on the move might be fruitful. */
3618 if (!strict
3619 && GET_CODE (operands[0]) == REG
3620 && operands[1] == const0_rtx
3621 && !flag_peephole2)
3622 want_clobber = 1;
3626 insn = gen_rtx_SET (VOIDmode, operands[0], operands[1]);
3627 if (want_clobber)
3629 rtx clob;
3630 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
3631 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, insn, clob));
3634 emit_insn (insn);
3637 /* Attempt to expand a binary operator. Make the expansion closer to the
3638 actual machine, then just general_operand, which will allow 3 separate
3639 memory references (one output, two input) in a single insn. Return
3640 whether the insn fails, or succeeds. */
3642 void
3643 ix86_expand_binary_operator (code, mode, operands)
3644 enum rtx_code code;
3645 enum machine_mode mode;
3646 rtx operands[];
3648 int matching_memory;
3649 rtx src1, src2, dst, op, clob;
3651 dst = operands[0];
3652 src1 = operands[1];
3653 src2 = operands[2];
3655 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
3656 if (GET_RTX_CLASS (code) == 'c'
3657 && (rtx_equal_p (dst, src2)
3658 || immediate_operand (src1, mode)))
3660 rtx temp = src1;
3661 src1 = src2;
3662 src2 = temp;
3665 /* If the destination is memory, and we do not have matching source
3666 operands, do things in registers. */
3667 matching_memory = 0;
3668 if (GET_CODE (dst) == MEM)
3670 if (rtx_equal_p (dst, src1))
3671 matching_memory = 1;
3672 else if (GET_RTX_CLASS (code) == 'c'
3673 && rtx_equal_p (dst, src2))
3674 matching_memory = 2;
3675 else
3676 dst = gen_reg_rtx (mode);
3679 /* Both source operands cannot be in memory. */
3680 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
3682 if (matching_memory != 2)
3683 src2 = force_reg (mode, src2);
3684 else
3685 src1 = force_reg (mode, src1);
3688 /* If the operation is not commutable, source 1 cannot be a constant. */
3689 if (CONSTANT_P (src1) && GET_RTX_CLASS (code) != 'c')
3690 src1 = force_reg (mode, src1);
3692 /* If optimizing, copy to regs to improve CSE */
3693 if (optimize && !reload_in_progress && !reload_completed)
3695 if (GET_CODE (dst) == MEM)
3696 dst = gen_reg_rtx (mode);
3697 if (GET_CODE (src1) == MEM)
3698 src1 = force_reg (mode, src1);
3699 if (GET_CODE (src2) == MEM)
3700 src2 = force_reg (mode, src2);
3703 /* Emit the instruction. */
3705 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
3706 if (reload_in_progress)
3708 /* Reload doesn't know about the flags register, and doesn't know that
3709 it doesn't want to clobber it. We can only do this with PLUS. */
3710 if (code != PLUS)
3711 abort ();
3712 emit_insn (op);
3714 else
3716 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
3717 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
3720 /* Fix up the destination if needed. */
3721 if (dst != operands[0])
3722 emit_move_insn (operands[0], dst);
3725 /* Return TRUE or FALSE depending on whether the binary operator meets the
3726 appropriate constraints. */
3729 ix86_binary_operator_ok (code, mode, operands)
3730 enum rtx_code code;
3731 enum machine_mode mode ATTRIBUTE_UNUSED;
3732 rtx operands[3];
3734 /* Both source operands cannot be in memory. */
3735 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
3736 return 0;
3737 /* If the operation is not commutable, source 1 cannot be a constant. */
3738 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
3739 return 0;
3740 /* If the destination is memory, we must have a matching source operand. */
3741 if (GET_CODE (operands[0]) == MEM
3742 && ! (rtx_equal_p (operands[0], operands[1])
3743 || (GET_RTX_CLASS (code) == 'c'
3744 && rtx_equal_p (operands[0], operands[2]))))
3745 return 0;
3746 return 1;
3749 /* Attempt to expand a unary operator. Make the expansion closer to the
3750 actual machine, then just general_operand, which will allow 2 separate
3751 memory references (one output, one input) in a single insn. Return
3752 whether the insn fails, or succeeds. */
3755 ix86_expand_unary_operator (code, mode, operands)
3756 enum rtx_code code;
3757 enum machine_mode mode;
3758 rtx operands[];
3760 /* If optimizing, copy to regs to improve CSE */
3761 if (optimize
3762 && ((reload_in_progress | reload_completed) == 0)
3763 && GET_CODE (operands[1]) == MEM)
3764 operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
3766 if (! ix86_unary_operator_ok (code, mode, operands))
3768 if (optimize == 0
3769 && ((reload_in_progress | reload_completed) == 0)
3770 && GET_CODE (operands[1]) == MEM)
3772 operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
3773 if (! ix86_unary_operator_ok (code, mode, operands))
3774 return FALSE;
3776 else
3777 return FALSE;
3780 return TRUE;
3783 /* Return TRUE or FALSE depending on whether the unary operator meets the
3784 appropriate constraints. */
3787 ix86_unary_operator_ok (code, mode, operands)
3788 enum rtx_code code ATTRIBUTE_UNUSED;
3789 enum machine_mode mode ATTRIBUTE_UNUSED;
3790 rtx operands[2] ATTRIBUTE_UNUSED;
3792 return TRUE;
3795 /* Produce an unsigned comparison for a given signed comparison. */
3797 static enum rtx_code
3798 unsigned_comparison (code)
3799 enum rtx_code code;
3801 switch (code)
3803 case GT:
3804 code = GTU;
3805 break;
3806 case LT:
3807 code = LTU;
3808 break;
3809 case GE:
3810 code = GEU;
3811 break;
3812 case LE:
3813 code = LEU;
3814 break;
3815 case EQ:
3816 case NE:
3817 case LEU:
3818 case LTU:
3819 case GEU:
3820 case GTU:
3821 break;
3822 default:
3823 abort ();
3825 return code;
3828 /* Generate insn patterns to do an integer compare of OPERANDS. */
3830 static rtx
3831 ix86_expand_int_compare (code, op0, op1)
3832 enum rtx_code code;
3833 rtx op0, op1;
3835 enum machine_mode cmpmode;
3836 rtx tmp, flags;
3838 cmpmode = SELECT_CC_MODE (code, op0, op1);
3839 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
3841 /* This is very simple, but making the interface the same as in the
3842 FP case makes the rest of the code easier. */
3843 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
3844 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
3846 /* Return the test that should be put into the flags user, i.e.
3847 the bcc, scc, or cmov instruction. */
3848 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
3851 /* Generate insn patterns to do a floating point compare of OPERANDS.
3852 If UNORDERED, allow for unordered compares. */
3854 static rtx
3855 ix86_expand_fp_compare (code, op0, op1, unordered)
3856 enum rtx_code code;
3857 rtx op0, op1;
3858 int unordered;
3860 enum machine_mode fpcmp_mode;
3861 enum machine_mode intcmp_mode;
3862 rtx tmp;
3864 /* When not doing IEEE compliant compares, disable unordered. */
3865 if (! TARGET_IEEE_FP)
3866 unordered = 0;
3867 fpcmp_mode = unordered ? CCFPUmode : CCFPmode;
3869 /* ??? If we knew whether invalid-operand exceptions were masked,
3870 we could rely on fcom to raise an exception and take care of
3871 NaNs. But we don't. We could know this from c9x math bits. */
3872 if (TARGET_IEEE_FP)
3873 unordered = 1;
3875 /* All of the unordered compare instructions only work on registers.
3876 The same is true of the XFmode compare instructions. */
3877 if (unordered || GET_MODE (op0) == XFmode)
3879 op0 = force_reg (GET_MODE (op0), op0);
3880 op1 = force_reg (GET_MODE (op1), op1);
3882 else
3884 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
3885 things around if they appear profitable, otherwise force op0
3886 into a register. */
3888 if (standard_80387_constant_p (op0) == 0
3889 || (GET_CODE (op0) == MEM
3890 && ! (standard_80387_constant_p (op1) == 0
3891 || GET_CODE (op1) == MEM)))
3893 rtx tmp;
3894 tmp = op0, op0 = op1, op1 = tmp;
3895 code = swap_condition (code);
3898 if (GET_CODE (op0) != REG)
3899 op0 = force_reg (GET_MODE (op0), op0);
3901 if (CONSTANT_P (op1))
3903 if (standard_80387_constant_p (op1))
3904 op1 = force_reg (GET_MODE (op1), op1);
3905 else
3906 op1 = validize_mem (force_const_mem (GET_MODE (op1), op1));
3910 /* %%% fcomi is probably always faster, even when dealing with memory,
3911 since compare-and-branch would be three insns instead of four. */
3912 if (TARGET_CMOVE && !unordered)
3914 if (GET_CODE (op0) != REG)
3915 op0 = force_reg (GET_MODE (op0), op0);
3916 if (GET_CODE (op1) != REG)
3917 op1 = force_reg (GET_MODE (op1), op1);
3919 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
3920 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG), tmp);
3921 emit_insn (tmp);
3923 /* The FP codes work out to act like unsigned. */
3924 code = unsigned_comparison (code);
3925 intcmp_mode = fpcmp_mode;
3927 else
3929 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
3931 rtx tmp2;
3932 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
3933 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), 9);
3934 tmp = gen_reg_rtx (HImode);
3935 emit_insn (gen_rtx_SET (VOIDmode, tmp, tmp2));
3937 if (! unordered)
3939 /* We have two options here -- use sahf, or testing bits of ah
3940 directly. On PPRO, they are equivalent, sahf being one byte
3941 smaller. On Pentium, sahf is non-pairable while test is UV
3942 pairable. */
3944 if (TARGET_USE_SAHF || optimize_size)
3946 do_sahf:
3948 /* The FP codes work out to act like unsigned. */
3949 code = unsigned_comparison (code);
3950 emit_insn (gen_x86_sahf_1 (tmp));
3951 intcmp_mode = CCmode;
3953 else
3956 * The numbers below correspond to the bits of the FPSW in AH.
3957 * C3, C2, and C0 are in bits 0x40, 0x40, and 0x01 respectively.
3959 * cmp C3 C2 C0
3960 * > 0 0 0
3961 * < 0 0 1
3962 * = 1 0 0
3963 * un 1 1 1
3966 int mask;
3968 switch (code)
3970 case GT:
3971 mask = 0x01;
3972 code = EQ;
3973 break;
3974 case LT:
3975 mask = 0x01;
3976 code = NE;
3977 break;
3978 case GE:
3979 /* We'd have to use `xorb 1,ah; andb 0x41,ah', so it's
3980 faster in all cases to just fall back on sahf. */
3981 goto do_sahf;
3982 case LE:
3983 mask = 0x41;
3984 code = NE;
3985 break;
3986 case EQ:
3987 mask = 0x40;
3988 code = NE;
3989 break;
3990 case NE:
3991 mask = 0x40;
3992 code = EQ;
3993 break;
3994 default:
3995 abort ();
3998 emit_insn (gen_testqi_ext_0 (tmp, GEN_INT (mask)));
3999 intcmp_mode = CCNOmode;
4002 else
4004 /* In the unordered case, we have to check C2 for NaN's, which
4005 doesn't happen to work out to anything nice combination-wise.
4006 So do some bit twiddling on the value we've got in AH to come
4007 up with an appropriate set of condition codes. */
4009 intcmp_mode = CCNOmode;
4010 switch (code)
4012 case GT:
4013 emit_insn (gen_testqi_ext_0 (tmp, GEN_INT (0x45)));
4014 code = EQ;
4015 break;
4016 case LT:
4017 emit_insn (gen_andqi_ext_0 (tmp, tmp, GEN_INT (0x45)));
4018 emit_insn (gen_cmpqi_ext_3 (tmp, GEN_INT (0x01)));
4019 intcmp_mode = CCmode;
4020 code = EQ;
4021 break;
4022 case GE:
4023 emit_insn (gen_testqi_ext_0 (tmp, GEN_INT (0x05)));
4024 code = EQ;
4025 break;
4026 case LE:
4027 emit_insn (gen_andqi_ext_0 (tmp, tmp, GEN_INT (0x45)));
4028 emit_insn (gen_addqi_ext_1 (tmp, tmp, constm1_rtx));
4029 emit_insn (gen_cmpqi_ext_3 (tmp, GEN_INT (0x40)));
4030 intcmp_mode = CCmode;
4031 code = LTU;
4032 break;
4033 case EQ:
4034 emit_insn (gen_andqi_ext_0 (tmp, tmp, GEN_INT (0x45)));
4035 emit_insn (gen_cmpqi_ext_3 (tmp, GEN_INT (0x40)));
4036 intcmp_mode = CCmode;
4037 code = EQ;
4038 break;
4039 case NE:
4040 emit_insn (gen_andqi_ext_0 (tmp, tmp, GEN_INT (0x45)));
4041 emit_insn (gen_xorqi_cc_ext_1 (tmp, tmp, GEN_INT (0x40)));
4042 code = NE;
4043 break;
4044 default:
4045 abort ();
4050 /* Return the test that should be put into the flags user, i.e.
4051 the bcc, scc, or cmov instruction. */
4052 return gen_rtx_fmt_ee (code, VOIDmode,
4053 gen_rtx_REG (intcmp_mode, FLAGS_REG),
4054 const0_rtx);
4057 static rtx
4058 ix86_expand_compare (code, unordered)
4059 enum rtx_code code;
4060 int unordered;
4062 rtx op0, op1, ret;
4063 op0 = ix86_compare_op0;
4064 op1 = ix86_compare_op1;
4066 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
4067 ret = ix86_expand_fp_compare (code, op0, op1, unordered);
4068 else
4069 ret = ix86_expand_int_compare (code, op0, op1);
4071 return ret;
4074 void
4075 ix86_expand_branch (code, unordered, label)
4076 enum rtx_code code;
4077 int unordered;
4078 rtx label;
4080 rtx tmp, lo[2], hi[2], label2;
4081 enum rtx_code code1, code2, code3;
4083 if (GET_MODE (ix86_compare_op0) != DImode)
4085 tmp = ix86_expand_compare (code, unordered);
4086 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
4087 gen_rtx_LABEL_REF (VOIDmode, label),
4088 pc_rtx);
4089 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
4090 return;
4093 /* Expand DImode branch into multiple compare+branch. */
4095 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
4097 tmp = ix86_compare_op0;
4098 ix86_compare_op0 = ix86_compare_op1;
4099 ix86_compare_op1 = tmp;
4100 code = swap_condition (code);
4102 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
4103 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
4105 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to avoid
4106 two branches. This costs one extra insn, so disable when optimizing
4107 for size. */
4109 if ((code == EQ || code == NE)
4110 && (!optimize_size
4111 || hi[1] == const0_rtx || lo[1] == const0_rtx))
4113 rtx xor0, xor1;
4115 xor1 = hi[0];
4116 if (hi[1] != const0_rtx)
4118 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
4119 NULL_RTX, 0, OPTAB_WIDEN);
4122 xor0 = lo[0];
4123 if (lo[1] != const0_rtx)
4125 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
4126 NULL_RTX, 0, OPTAB_WIDEN);
4129 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
4130 NULL_RTX, 0, OPTAB_WIDEN);
4132 ix86_compare_op0 = tmp;
4133 ix86_compare_op1 = const0_rtx;
4134 ix86_expand_branch (code, unordered, label);
4135 return;
4138 /* Otherwise, if we are doing less-than, op1 is a constant and the
4139 low word is zero, then we can just examine the high word. */
4141 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx
4142 && (code == LT || code == LTU))
4144 ix86_compare_op0 = hi[0];
4145 ix86_compare_op1 = hi[1];
4146 ix86_expand_branch (code, unordered, label);
4147 return;
4150 /* Otherwise, we need two or three jumps. */
4152 label2 = gen_label_rtx ();
4154 code1 = code;
4155 code2 = swap_condition (code);
4156 code3 = unsigned_condition (code);
4158 switch (code)
4160 case LT: case GT: case LTU: case GTU:
4161 break;
4163 case LE: code1 = LT; code2 = GT; break;
4164 case GE: code1 = GT; code2 = LT; break;
4165 case LEU: code1 = LTU; code2 = GTU; break;
4166 case GEU: code1 = GTU; code2 = LTU; break;
4168 case EQ: code1 = NIL; code2 = NE; break;
4169 case NE: code2 = NIL; break;
4171 default:
4172 abort ();
4176 * a < b =>
4177 * if (hi(a) < hi(b)) goto true;
4178 * if (hi(a) > hi(b)) goto false;
4179 * if (lo(a) < lo(b)) goto true;
4180 * false:
4183 ix86_compare_op0 = hi[0];
4184 ix86_compare_op1 = hi[1];
4186 if (code1 != NIL)
4187 ix86_expand_branch (code1, unordered, label);
4188 if (code2 != NIL)
4189 ix86_expand_branch (code2, unordered, label2);
4191 ix86_compare_op0 = lo[0];
4192 ix86_compare_op1 = lo[1];
4193 ix86_expand_branch (code3, unordered, label);
4195 if (code2 != NIL)
4196 emit_label (label2);
4200 ix86_expand_setcc (code, unordered, dest)
4201 enum rtx_code code;
4202 int unordered;
4203 rtx dest;
4205 rtx ret, tmp;
4206 int type;
4208 if (GET_MODE (ix86_compare_op0) == DImode)
4209 return 0; /* FAIL */
4211 /* Three modes of generation:
4212 0 -- destination does not overlap compare sources:
4213 clear dest first, emit strict_low_part setcc.
4214 1 -- destination does overlap compare sources:
4215 emit subreg setcc, zero extend.
4216 2 -- destination is in QImode:
4217 emit setcc only.
4220 type = 0;
4221 /* %%% reload problems with in-out. Revisit. */
4222 type = 1;
4224 if (GET_MODE (dest) == QImode)
4225 type = 2;
4226 else if (reg_overlap_mentioned_p (dest, ix86_compare_op0)
4227 || reg_overlap_mentioned_p (dest, ix86_compare_op0))
4228 type = 1;
4230 if (type == 0)
4231 emit_move_insn (dest, const0_rtx);
4233 ret = ix86_expand_compare (code, unordered);
4234 PUT_MODE (ret, QImode);
4236 tmp = dest;
4237 if (type == 0)
4239 tmp = gen_lowpart (QImode, dest);
4240 tmp = gen_rtx_STRICT_LOW_PART (VOIDmode, tmp);
4242 else if (type == 1)
4244 if (!cse_not_expected)
4245 tmp = gen_reg_rtx (QImode);
4246 else
4247 tmp = gen_lowpart (QImode, dest);
4250 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
4252 if (type == 1)
4254 rtx clob;
4256 tmp = gen_rtx_ZERO_EXTEND (GET_MODE (dest), tmp);
4257 tmp = gen_rtx_SET (VOIDmode, dest, tmp);
4258 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
4259 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
4260 emit_insn (tmp);
4263 return 1; /* DONE */
4267 ix86_expand_int_movcc (operands)
4268 rtx operands[];
4270 enum rtx_code code = GET_CODE (operands[1]), compare_code;
4271 rtx compare_seq, compare_op;
4273 /* When the compare code is not LTU or GEU, we can not use sbbl case.
4274 In case comparsion is done with immediate, we can convert it to LTU or
4275 GEU by altering the integer. */
4277 if ((code == LEU || code == GTU)
4278 && GET_CODE (ix86_compare_op1) == CONST_INT
4279 && GET_MODE (operands[0]) != HImode
4280 && (unsigned int)INTVAL (ix86_compare_op1) != 0xffffffff
4281 && GET_CODE (operands[2]) == CONST_INT
4282 && GET_CODE (operands[3]) == CONST_INT)
4284 if (code == LEU)
4285 code = LTU;
4286 else
4287 code = GEU;
4288 ix86_compare_op1 = GEN_INT (INTVAL (ix86_compare_op1) + 1);
4290 start_sequence ();
4291 compare_op = ix86_expand_compare (code, code == EQ || code == NE);
4292 compare_seq = gen_sequence ();
4293 end_sequence ();
4295 compare_code = GET_CODE (compare_op);
4297 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
4298 HImode insns, we'd be swallowed in word prefix ops. */
4300 if (GET_MODE (operands[0]) != HImode
4301 && GET_CODE (operands[2]) == CONST_INT
4302 && GET_CODE (operands[3]) == CONST_INT)
4304 rtx out = operands[0];
4305 HOST_WIDE_INT ct = INTVAL (operands[2]);
4306 HOST_WIDE_INT cf = INTVAL (operands[3]);
4307 HOST_WIDE_INT diff;
4309 if (compare_code == LTU || compare_code == GEU)
4312 /* Detect overlap between destination and compare sources. */
4313 rtx tmp = out;
4315 /* To simplify rest of code, restrict to the GEU case. */
4316 if (compare_code == LTU)
4318 int tmp = ct;
4319 ct = cf;
4320 cf = tmp;
4321 compare_code = reverse_condition (compare_code);
4322 code = reverse_condition (code);
4324 diff = ct - cf;
4326 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
4327 || reg_overlap_mentioned_p (out, ix86_compare_op0))
4328 tmp = gen_reg_rtx (SImode);
4330 emit_insn (compare_seq);
4331 emit_insn (gen_x86_movsicc_0_m1 (tmp));
4333 if (diff == 1)
4336 * cmpl op0,op1
4337 * sbbl dest,dest
4338 * [addl dest, ct]
4340 * Size 5 - 8.
4342 if (ct)
4343 emit_insn (gen_addsi3 (out, out, GEN_INT (ct)));
4345 else if (cf == -1)
4348 * cmpl op0,op1
4349 * sbbl dest,dest
4350 * orl $ct, dest
4352 * Size 8.
4354 emit_insn (gen_iorsi3 (out, out, GEN_INT (ct)));
4356 else if (diff == -1 && ct)
4359 * cmpl op0,op1
4360 * sbbl dest,dest
4361 * xorl $-1, dest
4362 * [addl dest, cf]
4364 * Size 8 - 11.
4366 emit_insn (gen_one_cmplsi2 (tmp, tmp));
4367 if (cf)
4368 emit_insn (gen_addsi3 (out, out, GEN_INT (cf)));
4370 else
4373 * cmpl op0,op1
4374 * sbbl dest,dest
4375 * andl cf - ct, dest
4376 * [addl dest, ct]
4378 * Size 8 - 11.
4380 emit_insn (gen_andsi3 (out, out, GEN_INT (cf - ct)));
4381 if (ct)
4382 emit_insn (gen_addsi3 (out, out, GEN_INT (ct)));
4385 if (tmp != out)
4386 emit_move_insn (out, tmp);
4388 return 1; /* DONE */
4391 diff = ct - cf;
4392 if (diff < 0)
4394 HOST_WIDE_INT tmp;
4395 tmp = ct, ct = cf, cf = tmp;
4396 diff = -diff;
4397 compare_code = reverse_condition (compare_code);
4398 code = reverse_condition (code);
4400 if (diff == 1 || diff == 2 || diff == 4 || diff == 8
4401 || diff == 3 || diff == 5 || diff == 9)
4404 * xorl dest,dest
4405 * cmpl op1,op2
4406 * setcc dest
4407 * lea cf(dest*(ct-cf)),dest
4409 * Size 14.
4411 * This also catches the degenerate setcc-only case.
4414 rtx tmp;
4415 int nops;
4417 out = emit_store_flag (out, code, ix86_compare_op0,
4418 ix86_compare_op1, VOIDmode, 0, 1);
4420 nops = 0;
4421 if (diff == 1)
4422 tmp = out;
4423 else
4425 tmp = gen_rtx_MULT (SImode, out, GEN_INT (diff & ~1));
4426 nops++;
4427 if (diff & 1)
4429 tmp = gen_rtx_PLUS (SImode, tmp, out);
4430 nops++;
4433 if (cf != 0)
4435 tmp = gen_rtx_PLUS (SImode, tmp, GEN_INT (cf));
4436 nops++;
4438 if (tmp != out)
4440 if (nops == 0)
4441 emit_move_insn (out, tmp);
4442 else if (nops == 1)
4444 rtx clob;
4446 clob = gen_rtx_REG (CCmode, FLAGS_REG);
4447 clob = gen_rtx_CLOBBER (VOIDmode, clob);
4449 tmp = gen_rtx_SET (VOIDmode, out, tmp);
4450 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
4451 emit_insn (tmp);
4453 else
4454 emit_insn (gen_rtx_SET (VOIDmode, out, tmp));
4456 if (out != operands[0])
4457 emit_move_insn (operands[0], out);
4459 return 1; /* DONE */
4463 * General case: Jumpful:
4464 * xorl dest,dest cmpl op1, op2
4465 * cmpl op1, op2 movl ct, dest
4466 * setcc dest jcc 1f
4467 * decl dest movl cf, dest
4468 * andl (cf-ct),dest 1:
4469 * addl ct,dest
4471 * Size 20. Size 14.
4473 * This is reasonably steep, but branch mispredict costs are
4474 * high on modern cpus, so consider failing only if optimizing
4475 * for space.
4477 * %%% Parameterize branch_cost on the tuning architecture, then
4478 * use that. The 80386 couldn't care less about mispredicts.
4481 if (!optimize_size && !TARGET_CMOVE)
4483 if (ct == 0)
4485 ct = cf;
4486 cf = 0;
4487 compare_code = reverse_condition (compare_code);
4488 code = reverse_condition (code);
4491 out = emit_store_flag (out, code, ix86_compare_op0,
4492 ix86_compare_op1, VOIDmode, 0, 1);
4494 emit_insn (gen_addsi3 (out, out, constm1_rtx));
4495 emit_insn (gen_andsi3 (out, out, GEN_INT (cf-ct)));
4496 if (ct != 0)
4497 emit_insn (gen_addsi3 (out, out, GEN_INT (ct)));
4498 if (out != operands[0])
4499 emit_move_insn (operands[0], out);
4501 return 1; /* DONE */
4505 if (!TARGET_CMOVE)
4507 /* Try a few things more with specific constants and a variable. */
4509 optab op = NULL;
4510 rtx var, orig_out, out, tmp;
4512 if (optimize_size)
4513 return 0; /* FAIL */
4515 /* If one of the two operands is an interesting constant, load a
4516 constant with the above and mask it in with a logical operation. */
4518 if (GET_CODE (operands[2]) == CONST_INT)
4520 var = operands[3];
4521 if (INTVAL (operands[2]) == 0)
4522 operands[3] = constm1_rtx, op = and_optab;
4523 else if (INTVAL (operands[2]) == -1)
4524 operands[3] = const0_rtx, op = ior_optab;
4526 else if (GET_CODE (operands[3]) == CONST_INT)
4528 var = operands[2];
4529 if (INTVAL (operands[3]) == 0)
4530 operands[2] = constm1_rtx, op = and_optab;
4531 else if (INTVAL (operands[3]) == -1)
4532 operands[2] = const0_rtx, op = ior_optab;
4535 if (op == NULL)
4536 return 0; /* FAIL */
4538 orig_out = operands[0];
4539 tmp = gen_reg_rtx (GET_MODE (orig_out));
4540 operands[0] = tmp;
4542 /* Recurse to get the constant loaded. */
4543 if (ix86_expand_int_movcc (operands) == 0)
4544 return 0; /* FAIL */
4546 /* Mask in the interesting variable. */
4547 out = expand_binop (GET_MODE (orig_out), op, var, tmp, orig_out, 0,
4548 OPTAB_WIDEN);
4549 if (out != orig_out)
4550 emit_move_insn (orig_out, out);
4552 return 1; /* DONE */
4556 * For comparison with above,
4558 * movl cf,dest
4559 * movl ct,tmp
4560 * cmpl op1,op2
4561 * cmovcc tmp,dest
4563 * Size 15.
4566 if (! nonimmediate_operand (operands[2], GET_MODE (operands[0])))
4567 operands[2] = force_reg (GET_MODE (operands[0]), operands[2]);
4568 if (! nonimmediate_operand (operands[3], GET_MODE (operands[0])))
4569 operands[3] = force_reg (GET_MODE (operands[0]), operands[3]);
4571 emit_insn (compare_seq);
4572 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
4573 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
4574 compare_op, operands[2],
4575 operands[3])));
4577 return 1; /* DONE */
4581 ix86_expand_fp_movcc (operands)
4582 rtx operands[];
4584 enum rtx_code code;
4585 enum machine_mode mode;
4586 rtx tmp;
4588 /* The floating point conditional move instructions don't directly
4589 support conditions resulting from a signed integer comparison. */
4591 code = GET_CODE (operands[1]);
4592 switch (code)
4594 case LT:
4595 case LE:
4596 case GE:
4597 case GT:
4598 tmp = gen_reg_rtx (QImode);
4599 ix86_expand_setcc (code, 0, tmp);
4600 code = NE;
4601 ix86_compare_op0 = tmp;
4602 ix86_compare_op1 = const0_rtx;
4603 break;
4605 default:
4606 break;
4609 mode = SELECT_CC_MODE (code, ix86_compare_op0, ix86_compare_op1);
4610 emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_REG (mode, FLAGS_REG),
4611 gen_rtx_COMPARE (mode,
4612 ix86_compare_op0,
4613 ix86_compare_op1)));
4614 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
4615 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
4616 gen_rtx_fmt_ee (code, VOIDmode,
4617 gen_rtx_REG (mode, FLAGS_REG),
4618 const0_rtx),
4619 operands[2],
4620 operands[3])));
4622 return 1;
4626 ix86_split_movdi (operands)
4627 rtx operands[];
4629 split_di (operands+0, 1, operands+2, operands+3);
4630 split_di (operands+1, 1, operands+4, operands+5);
4631 if (reg_overlap_mentioned_p (operands[2], operands[1]))
4633 rtx tmp;
4634 if (!reg_overlap_mentioned_p (operands[3], operands[4]))
4636 tmp = operands[2], operands[2] = operands[3], operands[3] = tmp;
4637 tmp = operands[4], operands[4] = operands[5], operands[5] = tmp;
4639 else
4641 emit_insn (gen_push (operands[4]));
4642 emit_insn (gen_rtx_SET (VOIDmode, operands[3], operands[5]));
4643 emit_insn (gen_popsi1 (operands[2]));
4645 return 1; /* DONE */
4649 return 0;
4652 void
4653 ix86_split_ashldi (operands, scratch)
4654 rtx *operands, scratch;
4656 rtx low[2], high[2];
4657 int count;
4659 if (GET_CODE (operands[2]) == CONST_INT)
4661 split_di (operands, 2, low, high);
4662 count = INTVAL (operands[2]) & 63;
4664 if (count >= 32)
4666 emit_move_insn (high[0], low[1]);
4667 emit_move_insn (low[0], const0_rtx);
4669 if (count > 32)
4670 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
4672 else
4674 if (!rtx_equal_p (operands[0], operands[1]))
4675 emit_move_insn (operands[0], operands[1]);
4676 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
4677 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
4680 else
4682 if (!rtx_equal_p (operands[0], operands[1]))
4683 emit_move_insn (operands[0], operands[1]);
4685 split_di (operands, 1, low, high);
4687 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
4688 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
4690 if (TARGET_CMOVE && (! reload_completed || scratch))
4692 if (! reload_completed)
4693 scratch = force_reg (SImode, const0_rtx);
4694 else
4695 emit_move_insn (scratch, const0_rtx);
4697 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
4698 scratch));
4700 else
4701 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
4705 void
4706 ix86_split_ashrdi (operands, scratch)
4707 rtx *operands, scratch;
4709 rtx low[2], high[2];
4710 int count;
4712 if (GET_CODE (operands[2]) == CONST_INT)
4714 split_di (operands, 2, low, high);
4715 count = INTVAL (operands[2]) & 63;
4717 if (count >= 32)
4719 emit_move_insn (low[0], high[1]);
4721 if (! reload_completed)
4722 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
4723 else
4725 emit_move_insn (high[0], low[0]);
4726 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
4729 if (count > 32)
4730 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
4732 else
4734 if (!rtx_equal_p (operands[0], operands[1]))
4735 emit_move_insn (operands[0], operands[1]);
4736 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
4737 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
4740 else
4742 if (!rtx_equal_p (operands[0], operands[1]))
4743 emit_move_insn (operands[0], operands[1]);
4745 split_di (operands, 1, low, high);
4747 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
4748 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
4750 if (TARGET_CMOVE && (!reload_completed || scratch))
4752 if (! reload_completed)
4753 scratch = gen_reg_rtx (SImode);
4754 emit_move_insn (scratch, high[0]);
4755 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
4756 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
4757 scratch));
4759 else
4760 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
4764 void
4765 ix86_split_lshrdi (operands, scratch)
4766 rtx *operands, scratch;
4768 rtx low[2], high[2];
4769 int count;
4771 if (GET_CODE (operands[2]) == CONST_INT)
4773 split_di (operands, 2, low, high);
4774 count = INTVAL (operands[2]) & 63;
4776 if (count >= 32)
4778 emit_move_insn (low[0], high[1]);
4779 emit_move_insn (high[0], const0_rtx);
4781 if (count > 32)
4782 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
4784 else
4786 if (!rtx_equal_p (operands[0], operands[1]))
4787 emit_move_insn (operands[0], operands[1]);
4788 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
4789 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
4792 else
4794 if (!rtx_equal_p (operands[0], operands[1]))
4795 emit_move_insn (operands[0], operands[1]);
4797 split_di (operands, 1, low, high);
4799 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
4800 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
4802 /* Heh. By reversing the arguments, we can reuse this pattern. */
4803 if (TARGET_CMOVE && (! reload_completed || scratch))
4805 if (! reload_completed)
4806 scratch = force_reg (SImode, const0_rtx);
4807 else
4808 emit_move_insn (scratch, const0_rtx);
4810 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
4811 scratch));
4813 else
4814 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
4818 /* Expand the appropriate insns for doing strlen if not just doing
4819 repnz; scasb
4821 out = result, initialized with the start address
4822 align_rtx = alignment of the address.
4823 scratch = scratch register, initialized with the startaddress when
4824 not aligned, otherwise undefined
4826 This is just the body. It needs the initialisations mentioned above and
4827 some address computing at the end. These things are done in i386.md. */
4829 void
4830 ix86_expand_strlensi_unroll_1 (out, align_rtx, scratch)
4831 rtx out, align_rtx, scratch;
4833 int align;
4834 rtx tmp;
4835 rtx align_2_label = NULL_RTX;
4836 rtx align_3_label = NULL_RTX;
4837 rtx align_4_label = gen_label_rtx ();
4838 rtx end_0_label = gen_label_rtx ();
4839 rtx end_2_label = gen_label_rtx ();
4840 rtx end_3_label = gen_label_rtx ();
4841 rtx mem;
4842 rtx flags = gen_rtx_REG (CCNOmode, FLAGS_REG);
4844 align = 0;
4845 if (GET_CODE (align_rtx) == CONST_INT)
4846 align = INTVAL (align_rtx);
4848 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
4850 /* Is there a known alignment and is it less than 4? */
4851 if (align < 4)
4853 /* Is there a known alignment and is it not 2? */
4854 if (align != 2)
4856 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
4857 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
4859 /* Leave just the 3 lower bits. */
4860 align_rtx = expand_binop (SImode, and_optab, scratch, GEN_INT (3),
4861 NULL_RTX, 0, OPTAB_WIDEN);
4863 emit_insn (gen_cmpsi_0 (align_rtx, const0_rtx));
4865 tmp = gen_rtx_EQ (VOIDmode, flags, const0_rtx);
4866 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
4867 gen_rtx_LABEL_REF (VOIDmode,
4868 align_4_label),
4869 pc_rtx);
4870 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
4872 emit_insn (gen_cmpsi_1 (align_rtx, GEN_INT (2)));
4874 tmp = gen_rtx_EQ (VOIDmode, flags, const0_rtx);
4875 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
4876 gen_rtx_LABEL_REF (VOIDmode,
4877 align_2_label),
4878 pc_rtx);
4879 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
4881 tmp = gen_rtx_GTU (VOIDmode, flags, const0_rtx);
4882 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
4883 gen_rtx_LABEL_REF (VOIDmode,
4884 align_3_label),
4885 pc_rtx);
4886 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
4888 else
4890 /* Since the alignment is 2, we have to check 2 or 0 bytes;
4891 check if is aligned to 4 - byte. */
4893 align_rtx = expand_binop (SImode, and_optab, scratch, GEN_INT (2),
4894 NULL_RTX, 0, OPTAB_WIDEN);
4896 emit_insn (gen_cmpsi_0 (align_rtx, const0_rtx));
4898 tmp = gen_rtx_EQ (VOIDmode, flags, const0_rtx);
4899 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
4900 gen_rtx_LABEL_REF (VOIDmode,
4901 align_4_label),
4902 pc_rtx);
4903 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
4906 mem = gen_rtx_MEM (QImode, out);
4908 /* Now compare the bytes. */
4910 /* Compare the first n unaligned byte on a byte per byte basis. */
4911 emit_insn (gen_cmpqi_0 (mem, const0_rtx));
4913 tmp = gen_rtx_EQ (VOIDmode, flags, const0_rtx);
4914 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
4915 gen_rtx_LABEL_REF (VOIDmode, end_0_label),
4916 pc_rtx);
4917 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
4919 /* Increment the address. */
4920 emit_insn (gen_addsi3 (out, out, const1_rtx));
4922 /* Not needed with an alignment of 2 */
4923 if (align != 2)
4925 emit_label (align_2_label);
4927 emit_insn (gen_cmpqi_0 (mem, const0_rtx));
4929 tmp = gen_rtx_EQ (VOIDmode, flags, const0_rtx);
4930 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
4931 gen_rtx_LABEL_REF (VOIDmode,
4932 end_0_label),
4933 pc_rtx);
4934 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
4936 emit_insn (gen_addsi3 (out, out, const1_rtx));
4938 emit_label (align_3_label);
4941 emit_insn (gen_cmpqi_0 (mem, const0_rtx));
4943 tmp = gen_rtx_EQ (VOIDmode, flags, const0_rtx);
4944 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
4945 gen_rtx_LABEL_REF (VOIDmode, end_0_label),
4946 pc_rtx);
4947 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
4949 emit_insn (gen_addsi3 (out, out, const1_rtx));
4952 /* Generate loop to check 4 bytes at a time. It is not a good idea to
4953 align this loop. It gives only huge programs, but does not help to
4954 speed up. */
4955 emit_label (align_4_label);
4957 mem = gen_rtx_MEM (SImode, out);
4958 emit_move_insn (scratch, mem);
4960 /* Check first byte. */
4961 emit_insn (gen_cmpqi_0 (gen_lowpart (QImode, scratch), const0_rtx));
4962 tmp = gen_rtx_EQ (VOIDmode, flags, const0_rtx);
4963 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
4964 gen_rtx_LABEL_REF (VOIDmode, end_0_label),
4965 pc_rtx);
4966 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
4968 /* Check second byte. */
4969 emit_insn (gen_cmpqi_ext_3 (scratch, const0_rtx));
4970 tmp = gen_rtx_EQ (VOIDmode, flags, const0_rtx);
4971 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
4972 gen_rtx_LABEL_REF (VOIDmode, end_3_label),
4973 pc_rtx);
4974 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
4976 /* Check third byte. */
4977 emit_insn (gen_testsi_1 (scratch, GEN_INT (0x00ff0000)));
4978 tmp = gen_rtx_EQ (VOIDmode, flags, const0_rtx);
4979 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
4980 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
4981 pc_rtx);
4982 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
4984 /* Check fourth byte and increment address. */
4985 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
4986 emit_insn (gen_testsi_1 (scratch, GEN_INT (0xff000000)));
4987 tmp = gen_rtx_NE (VOIDmode, flags, const0_rtx);
4988 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
4989 gen_rtx_LABEL_REF (VOIDmode, align_4_label),
4990 pc_rtx);
4991 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
4993 /* Now generate fixups when the compare stops within a 4-byte word. */
4994 emit_insn (gen_subsi3 (out, out, GEN_INT (3)));
4996 emit_label (end_2_label);
4997 emit_insn (gen_addsi3 (out, out, const1_rtx));
4999 emit_label (end_3_label);
5000 emit_insn (gen_addsi3 (out, out, const1_rtx));
5002 emit_label (end_0_label);
5005 /* Clear stack slot assignments remembered from previous functions.
5006 This is called from INIT_EXPANDERS once before RTL is emitted for each
5007 function. */
5009 static void
5010 ix86_init_machine_status (p)
5011 struct function *p;
5013 enum machine_mode mode;
5014 int n;
5015 p->machine
5016 = (struct machine_function *) xmalloc (sizeof (struct machine_function));
5018 for (mode = VOIDmode; (int) mode < (int) MAX_MACHINE_MODE;
5019 mode = (enum machine_mode) ((int) mode + 1))
5020 for (n = 0; n < MAX_386_STACK_LOCALS; n++)
5021 ix86_stack_locals[(int) mode][n] = NULL_RTX;
5024 /* Mark machine specific bits of P for GC. */
5025 static void
5026 ix86_mark_machine_status (p)
5027 struct function *p;
5029 enum machine_mode mode;
5030 int n;
5032 for (mode = VOIDmode; (int) mode < (int) MAX_MACHINE_MODE;
5033 mode = (enum machine_mode) ((int) mode + 1))
5034 for (n = 0; n < MAX_386_STACK_LOCALS; n++)
5035 ggc_mark_rtx (p->machine->stack_locals[(int) mode][n]);
5038 /* Return a MEM corresponding to a stack slot with mode MODE.
5039 Allocate a new slot if necessary.
5041 The RTL for a function can have several slots available: N is
5042 which slot to use. */
5045 assign_386_stack_local (mode, n)
5046 enum machine_mode mode;
5047 int n;
5049 if (n < 0 || n >= MAX_386_STACK_LOCALS)
5050 abort ();
5052 if (ix86_stack_locals[(int) mode][n] == NULL_RTX)
5053 ix86_stack_locals[(int) mode][n]
5054 = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
5056 return ix86_stack_locals[(int) mode][n];
5059 /* Calculate the length of the memory address in the instruction
5060 encoding. Does not include the one-byte modrm, opcode, or prefix. */
5062 static int
5063 memory_address_length (addr)
5064 rtx addr;
5066 struct ix86_address parts;
5067 rtx base, index, disp;
5068 int len;
5070 if (GET_CODE (addr) == PRE_DEC
5071 || GET_CODE (addr) == POST_INC)
5072 return 0;
5074 if (! ix86_decompose_address (addr, &parts))
5075 abort ();
5077 base = parts.base;
5078 index = parts.index;
5079 disp = parts.disp;
5080 len = 0;
5082 /* Register Indirect. */
5083 if (base && !index && !disp)
5085 /* Special cases: ebp and esp need the two-byte modrm form. */
5086 if (addr == stack_pointer_rtx
5087 || addr == arg_pointer_rtx
5088 || addr == frame_pointer_rtx)
5089 len = 1;
5092 /* Direct Addressing. */
5093 else if (disp && !base && !index)
5094 len = 4;
5096 else
5098 /* Find the length of the displacement constant. */
5099 if (disp)
5101 if (GET_CODE (disp) == CONST_INT
5102 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K'))
5103 len = 1;
5104 else
5105 len = 4;
5108 /* An index requires the two-byte modrm form. */
5109 if (index)
5110 len += 1;
5113 return len;
5117 ix86_attr_length_default (insn)
5118 rtx insn;
5120 enum attr_type type;
5121 int len = 0, i;
5123 type = get_attr_type (insn);
5124 extract_insn (insn);
5125 switch (type)
5127 case TYPE_INCDEC:
5128 case TYPE_SETCC:
5129 case TYPE_ICMOV:
5130 case TYPE_FMOV:
5131 case TYPE_FOP:
5132 case TYPE_FCMP:
5133 case TYPE_FOP1:
5134 case TYPE_FMUL:
5135 case TYPE_FDIV:
5136 case TYPE_FSGN:
5137 case TYPE_FPSPC:
5138 case TYPE_FCMOV:
5139 case TYPE_IBR:
5140 break;
5142 case TYPE_ALU1:
5143 case TYPE_NEGNOT:
5144 case TYPE_ALU:
5145 case TYPE_ICMP:
5146 case TYPE_IMOVX:
5147 case TYPE_ISHIFT:
5148 case TYPE_IMUL:
5149 case TYPE_IDIV:
5150 case TYPE_PUSH:
5151 case TYPE_POP:
5152 for (i = recog_data.n_operands - 1; i >= 0; --i)
5153 if (CONSTANT_P (recog_data.operand[i]))
5155 if (GET_CODE (recog_data.operand[i]) == CONST_INT
5156 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
5157 len += 1;
5158 else
5159 len += GET_MODE_SIZE (GET_MODE (recog_data.operand[0]));
5161 break;
5163 case TYPE_IMOV:
5164 if (CONSTANT_P (recog_data.operand[1]))
5165 len += GET_MODE_SIZE (GET_MODE (recog_data.operand[0]));
5166 break;
5168 case TYPE_CALL:
5169 if (constant_call_address_operand (recog_data.operand[0],
5170 GET_MODE (recog_data.operand[0])))
5171 return 5;
5172 break;
5174 case TYPE_CALLV:
5175 if (constant_call_address_operand (recog_data.operand[1],
5176 GET_MODE (recog_data.operand[1])))
5177 return 5;
5178 break;
5180 case TYPE_LEA:
5182 /* Irritatingly, single_set doesn't work with REG_UNUSED present,
5183 as we'll get from running life_analysis during reg-stack when
5184 not optimizing. */
5185 rtx set = PATTERN (insn);
5186 if (GET_CODE (set) == SET)
5188 else if (GET_CODE (set) == PARALLEL
5189 && XVECLEN (set, 0) == 2
5190 && GET_CODE (XVECEXP (set, 0, 0)) == SET
5191 && GET_CODE (XVECEXP (set, 0, 1)) == CLOBBER)
5192 set = XVECEXP (set, 0, 0);
5193 else
5194 abort ();
5196 len += memory_address_length (SET_SRC (set));
5197 goto just_opcode;
5200 case TYPE_OTHER:
5201 case TYPE_MULTI:
5202 return 15;
5204 case TYPE_FXCH:
5205 if (STACK_TOP_P (recog_data.operand[0]))
5206 return 2 + (REGNO (recog_data.operand[1]) != FIRST_STACK_REG + 1);
5207 else
5208 return 2 + (REGNO (recog_data.operand[0]) != FIRST_STACK_REG + 1);
5210 default:
5211 abort ();
5214 for (i = recog_data.n_operands - 1; i >= 0; --i)
5215 if (GET_CODE (recog_data.operand[i]) == MEM)
5217 len += memory_address_length (XEXP (recog_data.operand[i], 0));
5218 break;
5221 just_opcode:
5222 len += get_attr_length_opcode (insn);
5223 len += get_attr_length_prefix (insn);
5225 return len;
5228 /* Return the maximum number of instructions a cpu can issue. */
5231 ix86_issue_rate ()
5233 switch (ix86_cpu)
5235 case PROCESSOR_PENTIUM:
5236 case PROCESSOR_K6:
5237 return 2;
5239 case PROCESSOR_PENTIUMPRO:
5240 return 3;
5242 default:
5243 return 1;
5247 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
5248 by DEP_INSN and nothing set by DEP_INSN. */
5250 static int
5251 ix86_flags_dependant (insn, dep_insn, insn_type)
5252 rtx insn, dep_insn;
5253 enum attr_type insn_type;
5255 rtx set, set2;
5257 /* Simplify the test for uninteresting insns. */
5258 if (insn_type != TYPE_SETCC
5259 && insn_type != TYPE_ICMOV
5260 && insn_type != TYPE_FCMOV
5261 && insn_type != TYPE_IBR)
5262 return 0;
5264 if ((set = single_set (dep_insn)) != 0)
5266 set = SET_DEST (set);
5267 set2 = NULL_RTX;
5269 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
5270 && XVECLEN (PATTERN (dep_insn), 0) == 2
5271 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
5272 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
5274 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
5275 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
5278 if (set && GET_CODE (set) == REG && REGNO (set) == FLAGS_REG)
5280 /* This test is true if the dependant insn reads the flags but
5281 not any other potentially set register. */
5282 if (reg_overlap_mentioned_p (set, PATTERN (insn))
5283 && (!set2 || !reg_overlap_mentioned_p (set2, PATTERN (insn))))
5284 return 1;
5287 return 0;
5290 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
5291 address with operands set by DEP_INSN. */
5293 static int
5294 ix86_agi_dependant (insn, dep_insn, insn_type)
5295 rtx insn, dep_insn;
5296 enum attr_type insn_type;
5298 rtx addr;
5300 if (insn_type == TYPE_LEA)
5301 addr = SET_SRC (single_set (insn));
5302 else
5304 int i;
5305 extract_insn (insn);
5306 for (i = recog_data.n_operands - 1; i >= 0; --i)
5307 if (GET_CODE (recog_data.operand[i]) == MEM)
5309 addr = XEXP (recog_data.operand[i], 0);
5310 goto found;
5312 return 0;
5313 found:;
5316 return modified_in_p (addr, dep_insn);
5320 ix86_adjust_cost (insn, link, dep_insn, cost)
5321 rtx insn, link, dep_insn;
5322 int cost;
5324 enum attr_type insn_type, dep_insn_type;
5325 rtx set, set2;
5326 int dep_insn_code_number;
5328 /* We describe no anti or output depenancies. */
5329 if (REG_NOTE_KIND (link) != 0)
5330 return cost;
5332 dep_insn_code_number = recog_memoized (dep_insn);
5334 /* If we can't recognize the insns, we can't really do anything. */
5335 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
5336 return cost;
5338 /* Prologue and epilogue allocators have false dependency on ebp.
5339 This results in one cycle extra stall on Pentium prologue scheduling, so
5340 handle this important case manually. */
5342 if ((dep_insn_code_number == CODE_FOR_prologue_allocate_stack
5343 || dep_insn_code_number == CODE_FOR_epilogue_deallocate_stack)
5344 && !reg_mentioned_p (stack_pointer_rtx, insn))
5345 return 0;
5347 insn_type = get_attr_type (insn);
5348 dep_insn_type = get_attr_type (dep_insn);
5350 switch (ix86_cpu)
5352 case PROCESSOR_PENTIUM:
5353 /* Address Generation Interlock adds a cycle of latency. */
5354 if (ix86_agi_dependant (insn, dep_insn, insn_type))
5355 cost += 1;
5357 /* ??? Compares pair with jump/setcc. */
5358 if (ix86_flags_dependant (insn, dep_insn, insn_type))
5359 cost = 0;
5361 /* Floating point stores require value to be ready one cycle ealier. */
5362 if (insn_type == TYPE_FMOV
5363 && get_attr_memory (insn) == MEMORY_STORE
5364 && !ix86_agi_dependant (insn, dep_insn, insn_type))
5365 cost += 1;
5366 break;
5368 case PROCESSOR_PENTIUMPRO:
5369 /* Since we can't represent delayed latencies of load+operation,
5370 increase the cost here for non-imov insns. */
5371 if (dep_insn_type != TYPE_IMOV
5372 && dep_insn_type != TYPE_FMOV
5373 && get_attr_memory (dep_insn) == MEMORY_LOAD)
5374 cost += 1;
5376 /* INT->FP conversion is expensive. */
5377 if (get_attr_fp_int_src (dep_insn))
5378 cost += 5;
5380 /* There is one cycle extra latency between an FP op and a store. */
5381 if (insn_type == TYPE_FMOV
5382 && (set = single_set (dep_insn)) != NULL_RTX
5383 && (set2 = single_set (insn)) != NULL_RTX
5384 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
5385 && GET_CODE (SET_DEST (set2)) == MEM)
5386 cost += 1;
5387 break;
5389 case PROCESSOR_K6:
5390 /* The esp dependency is resolved before the instruction is really
5391 finished. */
5392 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
5393 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
5394 return 1;
5396 /* Since we can't represent delayed latencies of load+operation,
5397 increase the cost here for non-imov insns. */
5398 if (get_attr_memory (dep_insn) == MEMORY_LOAD)
5399 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
5401 /* INT->FP conversion is expensive. */
5402 if (get_attr_fp_int_src (dep_insn))
5403 cost += 5;
5404 break;
5406 default:
5407 break;
5410 return cost;
5413 static union
5415 struct ppro_sched_data
5417 rtx decode[3];
5418 int issued_this_cycle;
5419 } ppro;
5420 } ix86_sched_data;
5422 static int
5423 ix86_safe_length (insn)
5424 rtx insn;
5426 if (recog_memoized (insn) >= 0)
5427 return get_attr_length(insn);
5428 else
5429 return 128;
5432 static int
5433 ix86_safe_length_prefix (insn)
5434 rtx insn;
5436 if (recog_memoized (insn) >= 0)
5437 return get_attr_length(insn);
5438 else
5439 return 0;
5442 static enum attr_memory
5443 ix86_safe_memory (insn)
5444 rtx insn;
5446 if (recog_memoized (insn) >= 0)
5447 return get_attr_memory(insn);
5448 else
5449 return MEMORY_UNKNOWN;
5452 static enum attr_pent_pair
5453 ix86_safe_pent_pair (insn)
5454 rtx insn;
5456 if (recog_memoized (insn) >= 0)
5457 return get_attr_pent_pair(insn);
5458 else
5459 return PENT_PAIR_NP;
5462 static enum attr_ppro_uops
5463 ix86_safe_ppro_uops (insn)
5464 rtx insn;
5466 if (recog_memoized (insn) >= 0)
5467 return get_attr_ppro_uops (insn);
5468 else
5469 return PPRO_UOPS_MANY;
5472 static void
5473 ix86_dump_ppro_packet (dump)
5474 FILE *dump;
5476 if (ix86_sched_data.ppro.decode[0])
5478 fprintf (dump, "PPRO packet: %d",
5479 INSN_UID (ix86_sched_data.ppro.decode[0]));
5480 if (ix86_sched_data.ppro.decode[1])
5481 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
5482 if (ix86_sched_data.ppro.decode[2])
5483 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
5484 fputc ('\n', dump);
5488 /* We're beginning a new block. Initialize data structures as necessary. */
5490 void
5491 ix86_sched_init (dump, sched_verbose)
5492 FILE *dump ATTRIBUTE_UNUSED;
5493 int sched_verbose ATTRIBUTE_UNUSED;
5495 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
5498 /* Shift INSN to SLOT, and shift everything else down. */
5500 static void
5501 ix86_reorder_insn (insnp, slot)
5502 rtx *insnp, *slot;
5504 if (insnp != slot)
5506 rtx insn = *insnp;
5508 insnp[0] = insnp[1];
5509 while (++insnp != slot);
5510 *insnp = insn;
5514 /* Find an instruction with given pairability and minimal amount of cycles
5515 lost by the fact that the CPU waits for both pipelines to finish before
5516 reading next instructions. Also take care that both instructions together
5517 can not exceed 7 bytes. */
5519 static rtx *
5520 ix86_pent_find_pair (e_ready, ready, type, first)
5521 rtx *e_ready;
5522 rtx *ready;
5523 enum attr_pent_pair type;
5524 rtx first;
5526 int mincycles, cycles;
5527 enum attr_pent_pair tmp;
5528 enum attr_memory memory;
5529 rtx *insnp, *bestinsnp = NULL;
5531 if (ix86_safe_length (first) > 7 + ix86_safe_length_prefix (first))
5532 return NULL;
5534 memory = ix86_safe_memory (first);
5535 cycles = result_ready_cost (first);
5536 mincycles = INT_MAX;
5538 for (insnp = e_ready; insnp >= ready && mincycles; --insnp)
5539 if ((tmp = ix86_safe_pent_pair (*insnp)) == type
5540 && ix86_safe_length (*insnp) <= 7 + ix86_safe_length_prefix (*insnp))
5542 enum attr_memory second_memory;
5543 int secondcycles, currentcycles;
5545 second_memory = ix86_safe_memory (*insnp);
5546 secondcycles = result_ready_cost (*insnp);
5547 currentcycles = abs (cycles - secondcycles);
5549 if (secondcycles >= 1 && cycles >= 1)
5551 /* Two read/modify/write instructions together takes two
5552 cycles longer. */
5553 if (memory == MEMORY_BOTH && second_memory == MEMORY_BOTH)
5554 currentcycles += 2;
5556 /* Read modify/write instruction followed by read/modify
5557 takes one cycle longer. */
5558 if (memory == MEMORY_BOTH && second_memory == MEMORY_LOAD
5559 && tmp != PENT_PAIR_UV
5560 && ix86_safe_pent_pair (first) != PENT_PAIR_UV)
5561 currentcycles += 1;
5563 if (currentcycles < mincycles)
5564 bestinsnp = insnp, mincycles = currentcycles;
5567 return bestinsnp;
5570 /* We are about to being issuing insns for this clock cycle.
5571 Override the default sort algorithm to better slot instructions. */
5574 ix86_sched_reorder (dump, sched_verbose, ready, n_ready, clock_var)
5575 FILE *dump ATTRIBUTE_UNUSED;
5576 int sched_verbose ATTRIBUTE_UNUSED;
5577 rtx *ready;
5578 int n_ready, clock_var;
5580 rtx *e_ready = ready + n_ready - 1;
5581 rtx *insnp;
5582 int i;
5584 if (n_ready < 2)
5585 goto out;
5587 switch (ix86_cpu)
5589 default:
5590 goto out;
5592 case PROCESSOR_PENTIUM:
5593 /* This wouldn't be necessary if Haifa knew that static insn ordering
5594 is important to which pipe an insn is issued to. So we have to make
5595 some minor rearrangements. */
5597 enum attr_pent_pair pair1, pair2;
5599 pair1 = ix86_safe_pent_pair (*e_ready);
5601 /* If the first insn is non-pairable, let it be. */
5602 if (pair1 == PENT_PAIR_NP)
5603 goto out;
5604 pair2 = PENT_PAIR_NP;
5606 /* If the first insn is UV or PV pairable, search for a PU
5607 insn to go with. */
5608 if (pair1 == PENT_PAIR_UV || pair1 == PENT_PAIR_PV)
5610 insnp = ix86_pent_find_pair (e_ready-1, ready,
5611 PENT_PAIR_PU, *e_ready);
5612 if (insnp)
5613 pair2 = PENT_PAIR_PU;
5616 /* If the first insn is PU or UV pairable, search for a PV
5617 insn to go with. */
5618 if (pair2 == PENT_PAIR_NP
5619 && (pair1 == PENT_PAIR_PU || pair1 == PENT_PAIR_UV))
5621 insnp = ix86_pent_find_pair (e_ready-1, ready,
5622 PENT_PAIR_PV, *e_ready);
5623 if (insnp)
5624 pair2 = PENT_PAIR_PV;
5627 /* If the first insn is pairable, search for a UV
5628 insn to go with. */
5629 if (pair2 == PENT_PAIR_NP)
5631 insnp = ix86_pent_find_pair (e_ready-1, ready,
5632 PENT_PAIR_UV, *e_ready);
5633 if (insnp)
5634 pair2 = PENT_PAIR_UV;
5637 if (pair2 == PENT_PAIR_NP)
5638 goto out;
5640 /* Found something! Decide if we need to swap the order. */
5641 if (pair1 == PENT_PAIR_PV || pair2 == PENT_PAIR_PU
5642 || (pair1 == PENT_PAIR_UV && pair2 == PENT_PAIR_UV
5643 && ix86_safe_memory (*e_ready) == MEMORY_BOTH
5644 && ix86_safe_memory (*insnp) == MEMORY_LOAD))
5645 ix86_reorder_insn (insnp, e_ready);
5646 else
5647 ix86_reorder_insn (insnp, e_ready - 1);
5649 break;
5651 case PROCESSOR_PENTIUMPRO:
5653 rtx decode[3];
5654 enum attr_ppro_uops cur_uops;
5655 int issued_this_cycle;
5657 /* At this point .ppro.decode contains the state of the three
5658 decoders from last "cycle". That is, those insns that were
5659 actually independant. But here we're scheduling for the
5660 decoder, and we may find things that are decodable in the
5661 same cycle. */
5663 memcpy (decode, ix86_sched_data.ppro.decode, sizeof(decode));
5664 issued_this_cycle = 0;
5666 insnp = e_ready;
5667 cur_uops = ix86_safe_ppro_uops (*insnp);
5669 /* If the decoders are empty, and we've a complex insn at the
5670 head of the priority queue, let it issue without complaint. */
5671 if (decode[0] == NULL)
5673 if (cur_uops == PPRO_UOPS_MANY)
5675 decode[0] = *insnp;
5676 goto ppro_done;
5679 /* Otherwise, search for a 2-4 uop unsn to issue. */
5680 while (cur_uops != PPRO_UOPS_FEW)
5682 if (insnp == ready)
5683 break;
5684 cur_uops = ix86_safe_ppro_uops (*--insnp);
5687 /* If so, move it to the head of the line. */
5688 if (cur_uops == PPRO_UOPS_FEW)
5689 ix86_reorder_insn (insnp, e_ready);
5691 /* Issue the head of the queue. */
5692 issued_this_cycle = 1;
5693 decode[0] = *e_ready--;
5696 /* Look for simple insns to fill in the other two slots. */
5697 for (i = 1; i < 3; ++i)
5698 if (decode[i] == NULL)
5700 if (ready >= e_ready)
5701 goto ppro_done;
5703 insnp = e_ready;
5704 cur_uops = ix86_safe_ppro_uops (*insnp);
5705 while (cur_uops != PPRO_UOPS_ONE)
5707 if (insnp == ready)
5708 break;
5709 cur_uops = ix86_safe_ppro_uops (*--insnp);
5712 /* Found one. Move it to the head of the queue and issue it. */
5713 if (cur_uops == PPRO_UOPS_ONE)
5715 ix86_reorder_insn (insnp, e_ready);
5716 decode[i] = *e_ready--;
5717 issued_this_cycle++;
5718 continue;
5721 /* ??? Didn't find one. Ideally, here we would do a lazy split
5722 of 2-uop insns, issue one and queue the other. */
5725 ppro_done:
5726 if (issued_this_cycle == 0)
5727 issued_this_cycle = 1;
5728 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
5730 break;
5733 out:
5734 return ix86_issue_rate ();
5737 /* We are about to issue INSN. Return the number of insns left on the
5738 ready queue that can be issued this cycle. */
5741 ix86_variable_issue (dump, sched_verbose, insn, can_issue_more)
5742 FILE *dump;
5743 int sched_verbose;
5744 rtx insn;
5745 int can_issue_more;
5747 int i;
5748 switch (ix86_cpu)
5750 default:
5751 return can_issue_more - 1;
5753 case PROCESSOR_PENTIUMPRO:
5755 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
5757 if (uops == PPRO_UOPS_MANY)
5759 if (sched_verbose)
5760 ix86_dump_ppro_packet (dump);
5761 ix86_sched_data.ppro.decode[0] = insn;
5762 ix86_sched_data.ppro.decode[1] = NULL;
5763 ix86_sched_data.ppro.decode[2] = NULL;
5764 if (sched_verbose)
5765 ix86_dump_ppro_packet (dump);
5766 ix86_sched_data.ppro.decode[0] = NULL;
5768 else if (uops == PPRO_UOPS_FEW)
5770 if (sched_verbose)
5771 ix86_dump_ppro_packet (dump);
5772 ix86_sched_data.ppro.decode[0] = insn;
5773 ix86_sched_data.ppro.decode[1] = NULL;
5774 ix86_sched_data.ppro.decode[2] = NULL;
5776 else
5778 for (i = 0; i < 3; ++i)
5779 if (ix86_sched_data.ppro.decode[i] == NULL)
5781 ix86_sched_data.ppro.decode[i] = insn;
5782 break;
5784 if (i == 3)
5785 abort ();
5786 if (i == 2)
5788 if (sched_verbose)
5789 ix86_dump_ppro_packet (dump);
5790 ix86_sched_data.ppro.decode[0] = NULL;
5791 ix86_sched_data.ppro.decode[1] = NULL;
5792 ix86_sched_data.ppro.decode[2] = NULL;
5796 return --ix86_sched_data.ppro.issued_this_cycle;