1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 92, 94-98, 1999 Free Software Foundation, Inc.
4 This file is part of GNU CC.
6 GNU CC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
11 GNU CC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GNU CC; see the file COPYING. If not, write to
18 the Free Software Foundation, 59 Temple Place - Suite 330,
19 Boston, MA 02111-1307, USA. */
28 #include "hard-reg-set.h"
30 #include "insn-config.h"
31 #include "conditions.h"
32 #include "insn-flags.h"
34 #include "insn-attr.h"
41 #include "basic-block.h"
44 #ifdef EXTRA_CONSTRAINT
45 /* If EXTRA_CONSTRAINT is defined, then the 'S'
46 constraint in REG_CLASS_FROM_LETTER will no longer work, and various
47 asm statements that need 'S' for class SIREG will break. */
48 error EXTRA_CONSTRAINT conflicts with S constraint letter
49 /* The previous line used to be #error, but some compilers barf
50 even if the conditional was untrue. */
53 #ifndef CHECK_STACK_LIMIT
54 #define CHECK_STACK_LIMIT -1
57 /* Processor costs (relative to an add) */
58 struct processor_costs i386_cost
= { /* 386 specific costs */
59 1, /* cost of an add instruction */
60 1, /* cost of a lea instruction */
61 3, /* variable shift costs */
62 2, /* constant shift costs */
63 6, /* cost of starting a multiply */
64 1, /* cost of multiply per each bit set */
65 23, /* cost of a divide/mod */
69 struct processor_costs i486_cost
= { /* 486 specific costs */
70 1, /* cost of an add instruction */
71 1, /* cost of a lea instruction */
72 3, /* variable shift costs */
73 2, /* constant shift costs */
74 12, /* cost of starting a multiply */
75 1, /* cost of multiply per each bit set */
76 40, /* cost of a divide/mod */
80 struct processor_costs pentium_cost
= {
81 1, /* cost of an add instruction */
82 1, /* cost of a lea instruction */
83 4, /* variable shift costs */
84 1, /* constant shift costs */
85 11, /* cost of starting a multiply */
86 0, /* cost of multiply per each bit set */
87 25, /* cost of a divide/mod */
91 struct processor_costs pentiumpro_cost
= {
92 1, /* cost of an add instruction */
93 1, /* cost of a lea instruction */
94 1, /* variable shift costs */
95 1, /* constant shift costs */
96 1, /* cost of starting a multiply */
97 0, /* cost of multiply per each bit set */
98 17, /* cost of a divide/mod */
102 struct processor_costs k6_cost
= {
103 1, /* cost of an add instruction */
104 2, /* cost of a lea instruction */
105 1, /* variable shift costs */
106 1, /* constant shift costs */
107 3, /* cost of starting a multiply */
108 0, /* cost of multiply per each bit set */
109 18, /* cost of a divide/mod */
113 struct processor_costs
*ix86_cost
= &pentium_cost
;
115 /* Processor feature/optimization bitmasks. */
116 #define m_386 (1<<PROCESSOR_I386)
117 #define m_486 (1<<PROCESSOR_I486)
118 #define m_PENT (1<<PROCESSOR_PENTIUM)
119 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
120 #define m_K6 (1<<PROCESSOR_K6)
122 const int x86_use_leave
= m_386
| m_K6
;
123 const int x86_push_memory
= m_386
| m_K6
;
124 const int x86_zero_extend_with_and
= m_486
| m_PENT
;
125 const int x86_movx
= 0 /* m_386 | m_PPRO | m_K6 */;
126 const int x86_double_with_add
= ~m_386
;
127 const int x86_use_bit_test
= m_386
;
128 const int x86_unroll_strlen
= m_486
| m_PENT
;
129 const int x86_use_q_reg
= m_PENT
| m_PPRO
| m_K6
;
130 const int x86_use_any_reg
= m_486
;
131 const int x86_cmove
= m_PPRO
;
132 const int x86_deep_branch
= m_PPRO
| m_K6
;
133 const int x86_use_sahf
= m_PPRO
| m_K6
;
134 const int x86_partial_reg_stall
= m_PPRO
;
135 const int x86_use_loop
= m_K6
;
136 const int x86_use_fiop
= ~m_PPRO
;
137 const int x86_use_mov0
= m_K6
;
138 const int x86_use_cltd
= ~(m_PENT
| m_K6
);
139 const int x86_read_modify_write
= ~m_PENT
;
140 const int x86_read_modify
= ~(m_PENT
| m_PPRO
);
141 const int x86_split_long_moves
= m_PPRO
;
143 #define AT_BP(mode) (gen_rtx_MEM ((mode), frame_pointer_rtx))
145 const char * const hi_reg_name
[] = HI_REGISTER_NAMES
;
146 const char * const qi_reg_name
[] = QI_REGISTER_NAMES
;
147 const char * const qi_high_reg_name
[] = QI_HIGH_REGISTER_NAMES
;
149 /* Array of the smallest class containing reg number REGNO, indexed by
150 REGNO. Used by REGNO_REG_CLASS in i386.h. */
152 enum reg_class
const regclass_map
[FIRST_PSEUDO_REGISTER
] =
155 AREG
, DREG
, CREG
, BREG
,
157 SIREG
, DIREG
, NON_Q_REGS
, NON_Q_REGS
,
159 FP_TOP_REG
, FP_SECOND_REG
, FLOAT_REGS
, FLOAT_REGS
,
160 FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
,
167 /* Test and compare insns in i386.md store the information needed to
168 generate branch and scc insns here. */
170 struct rtx_def
*ix86_compare_op0
= NULL_RTX
;
171 struct rtx_def
*ix86_compare_op1
= NULL_RTX
;
173 #define MAX_386_STACK_LOCALS 2
175 /* Define the structure for the machine field in struct function. */
176 struct machine_function
178 rtx stack_locals
[(int) MAX_MACHINE_MODE
][MAX_386_STACK_LOCALS
];
181 #define ix86_stack_locals (current_function->machine->stack_locals)
183 /* which cpu are we scheduling for */
184 enum processor_type ix86_cpu
;
186 /* which instruction set architecture to use. */
189 /* Strings to hold which cpu and instruction set architecture to use. */
190 const char *ix86_cpu_string
; /* for -mcpu=<xxx> */
191 const char *ix86_arch_string
; /* for -march=<xxx> */
193 /* Register allocation order */
194 const char *ix86_reg_alloc_order
;
195 static char regs_allocated
[FIRST_PSEUDO_REGISTER
];
197 /* # of registers to use to pass arguments. */
198 const char *ix86_regparm_string
;
200 /* ix86_regparm_string as a number */
203 /* Alignment to use for loops and jumps: */
205 /* Power of two alignment for loops. */
206 const char *ix86_align_loops_string
;
208 /* Power of two alignment for non-loop jumps. */
209 const char *ix86_align_jumps_string
;
211 /* Power of two alignment for stack boundary in bytes. */
212 const char *ix86_preferred_stack_boundary_string
;
214 /* Preferred alignment for stack boundary in bits. */
215 int ix86_preferred_stack_boundary
;
217 /* Values 1-5: see jump.c */
218 int ix86_branch_cost
;
219 const char *ix86_branch_cost_string
;
221 /* Power of two alignment for functions. */
222 int ix86_align_funcs
;
223 const char *ix86_align_funcs_string
;
225 /* Power of two alignment for loops. */
226 int ix86_align_loops
;
228 /* Power of two alignment for non-loop jumps. */
229 int ix86_align_jumps
;
231 static void output_pic_addr_const
PROTO ((FILE *, rtx
, int));
232 static void put_condition_code
PROTO ((enum rtx_code
, enum machine_mode
,
234 static enum rtx_code unsigned_comparison
PROTO ((enum rtx_code code
));
235 static rtx ix86_expand_int_compare
PROTO ((enum rtx_code
, rtx
, rtx
));
236 static rtx ix86_expand_fp_compare
PROTO ((enum rtx_code
, rtx
, rtx
, int));
237 static rtx ix86_expand_compare
PROTO ((enum rtx_code
, int));
238 static rtx gen_push
PROTO ((rtx
));
239 static int memory_address_length
PROTO ((rtx addr
));
240 static int ix86_flags_dependant
PROTO ((rtx
, rtx
, enum attr_type
));
241 static int ix86_agi_dependant
PROTO ((rtx
, rtx
, enum attr_type
));
242 static int ix86_safe_length
PROTO ((rtx
));
243 static enum attr_memory ix86_safe_memory
PROTO ((rtx
));
244 static enum attr_pent_pair ix86_safe_pent_pair
PROTO ((rtx
));
245 static enum attr_ppro_uops ix86_safe_ppro_uops
PROTO ((rtx
));
246 static void ix86_dump_ppro_packet
PROTO ((FILE *));
247 static void ix86_reorder_insn
PROTO ((rtx
*, rtx
*));
248 static rtx
* ix86_pent_find_pair
PROTO ((rtx
*, rtx
*, enum attr_pent_pair
,
250 static void ix86_init_machine_status
PROTO ((struct function
*));
251 static void ix86_mark_machine_status
PROTO ((struct function
*));
255 rtx base
, index
, disp
;
259 static int ix86_decompose_address
PARAMS ((rtx
, struct ix86_address
*));
261 /* Sometimes certain combinations of command options do not make
262 sense on a particular target machine. You can define a macro
263 `OVERRIDE_OPTIONS' to take account of this. This macro, if
264 defined, is executed once just after all the command options have
267 Don't use this macro to turn on various extra optimizations for
268 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
273 /* Comes from final.c -- no real reason to change it. */
274 #define MAX_CODE_ALIGN 16
278 struct processor_costs
*cost
; /* Processor costs */
279 int target_enable
; /* Target flags to enable. */
280 int target_disable
; /* Target flags to disable. */
281 int align_loop
; /* Default alignments. */
286 const processor_target_table
[PROCESSOR_max
] =
288 {&i386_cost
, 0, 0, 2, 2, 2, 1},
289 {&i486_cost
, 0, 0, 4, 4, 4, 1},
290 {&pentium_cost
, 0, 0, -4, -4, -4, 1},
291 {&pentiumpro_cost
, 0, 0, 4, -4, 4, 1},
292 {&k6_cost
, 0, 0, -5, -5, 4, 1}
297 char *name
; /* processor name or nickname. */
298 enum processor_type processor
;
300 const processor_alias_table
[] =
302 {"i386", PROCESSOR_I386
},
303 {"i486", PROCESSOR_I486
},
304 {"i586", PROCESSOR_PENTIUM
},
305 {"pentium", PROCESSOR_PENTIUM
},
306 {"i686", PROCESSOR_PENTIUMPRO
},
307 {"pentiumpro", PROCESSOR_PENTIUMPRO
},
308 {"k6", PROCESSOR_K6
},
311 int const pta_size
= sizeof(processor_alias_table
)/sizeof(struct pta
);
313 #ifdef SUBTARGET_OVERRIDE_OPTIONS
314 SUBTARGET_OVERRIDE_OPTIONS
;
317 ix86_arch
= PROCESSOR_I386
;
318 ix86_cpu
= (enum processor_type
) TARGET_CPU_DEFAULT
;
320 if (ix86_arch_string
!= 0)
323 for (i
= 0; i
< pta_size
; i
++)
324 if (! strcmp (ix86_arch_string
, processor_alias_table
[i
].name
))
326 ix86_arch
= processor_alias_table
[i
].processor
;
327 /* Default cpu tuning to the architecture. */
328 ix86_cpu
= ix86_arch
;
332 error ("bad value (%s) for -march= switch", ix86_arch_string
);
335 if (ix86_cpu_string
!= 0)
338 for (i
= 0; i
< pta_size
; i
++)
339 if (! strcmp (ix86_cpu_string
, processor_alias_table
[i
].name
))
341 ix86_cpu
= processor_alias_table
[i
].processor
;
345 error ("bad value (%s) for -mcpu= switch", ix86_cpu_string
);
348 ix86_cost
= processor_target_table
[ix86_cpu
].cost
;
349 target_flags
|= processor_target_table
[ix86_cpu
].target_enable
;
350 target_flags
&= ~processor_target_table
[ix86_cpu
].target_disable
;
352 /* Arrange to set up i386_stack_locals for all functions. */
353 init_machine_status
= ix86_init_machine_status
;
354 mark_machine_status
= ix86_mark_machine_status
;
356 /* Validate registers in register allocation order. */
357 if (ix86_reg_alloc_order
)
360 for (i
= 0; (ch
= ix86_reg_alloc_order
[i
]) != '\0'; i
++)
366 case 'a': regno
= 0; break;
367 case 'd': regno
= 1; break;
368 case 'c': regno
= 2; break;
369 case 'b': regno
= 3; break;
370 case 'S': regno
= 4; break;
371 case 'D': regno
= 5; break;
372 case 'B': regno
= 6; break;
374 default: fatal ("Register '%c' is unknown", ch
);
377 if (regs_allocated
[regno
])
378 fatal ("Register '%c' already specified in allocation order", ch
);
380 regs_allocated
[regno
] = 1;
384 /* Validate -mregparm= value. */
385 if (ix86_regparm_string
)
387 ix86_regparm
= atoi (ix86_regparm_string
);
388 if (ix86_regparm
< 0 || ix86_regparm
> REGPARM_MAX
)
389 fatal ("-mregparm=%d is not between 0 and %d",
390 ix86_regparm
, REGPARM_MAX
);
393 /* Validate -malign-loops= value, or provide default. */
394 ix86_align_loops
= processor_target_table
[ix86_cpu
].align_loop
;
395 if (ix86_align_loops_string
)
397 ix86_align_loops
= atoi (ix86_align_loops_string
);
398 if (ix86_align_loops
< 0 || ix86_align_loops
> MAX_CODE_ALIGN
)
399 fatal ("-malign-loops=%d is not between 0 and %d",
400 ix86_align_loops
, MAX_CODE_ALIGN
);
403 /* Validate -malign-jumps= value, or provide default. */
404 ix86_align_jumps
= processor_target_table
[ix86_cpu
].align_jump
;
405 if (ix86_align_jumps_string
)
407 ix86_align_jumps
= atoi (ix86_align_jumps_string
);
408 if (ix86_align_jumps
< 0 || ix86_align_jumps
> MAX_CODE_ALIGN
)
409 fatal ("-malign-jumps=%d is not between 0 and %d",
410 ix86_align_jumps
, MAX_CODE_ALIGN
);
413 /* Validate -malign-functions= value, or provide default. */
414 ix86_align_funcs
= processor_target_table
[ix86_cpu
].align_func
;
415 if (ix86_align_funcs_string
)
417 ix86_align_funcs
= atoi (ix86_align_funcs_string
);
418 if (ix86_align_funcs
< 0 || ix86_align_funcs
> MAX_CODE_ALIGN
)
419 fatal ("-malign-functions=%d is not between 0 and %d",
420 ix86_align_funcs
, MAX_CODE_ALIGN
);
423 /* Validate -mpreferred_stack_boundary= value, or provide default.
424 The default of 128 bits is for Pentium III's SSE __m128. */
425 ix86_preferred_stack_boundary
= 128;
426 if (ix86_preferred_stack_boundary_string
)
428 int i
= atoi (ix86_preferred_stack_boundary_string
);
430 fatal ("-mpreferred_stack_boundary=%d is not between 2 and 31", i
);
431 ix86_preferred_stack_boundary
= (1 << i
) * BITS_PER_UNIT
;
434 /* Validate -mbranch-cost= value, or provide default. */
435 ix86_branch_cost
= processor_target_table
[ix86_cpu
].branch_cost
;
436 if (ix86_branch_cost_string
)
438 ix86_branch_cost
= atoi (ix86_branch_cost_string
);
439 if (ix86_branch_cost
< 0 || ix86_branch_cost
> 5)
440 fatal ("-mbranch-cost=%d is not between 0 and 5",
444 /* Keep nonleaf frame pointers. */
445 if (TARGET_OMIT_LEAF_FRAME_POINTER
)
446 flag_omit_frame_pointer
= 1;
448 /* If we're doing fast math, we don't care about comparison order
449 wrt NaNs. This lets us use a shorter comparison sequence. */
451 target_flags
&= ~MASK_IEEE_FP
;
453 /* If we're planning on using `loop', use it. */
454 if (TARGET_USE_LOOP
&& optimize
)
455 flag_branch_on_count_reg
= 1;
458 /* A C statement (sans semicolon) to choose the order in which to
459 allocate hard registers for pseudo-registers local to a basic
462 Store the desired register order in the array `reg_alloc_order'.
463 Element 0 should be the register to allocate first; element 1, the
464 next register; and so on.
466 The macro body should not assume anything about the contents of
467 `reg_alloc_order' before execution of the macro.
469 On most machines, it is not necessary to define this macro. */
472 order_regs_for_local_alloc ()
476 /* User specified the register allocation order. */
478 if (ix86_reg_alloc_order
)
480 for (i
= order
= 0; (ch
= ix86_reg_alloc_order
[i
]) != '\0'; i
++)
486 case 'a': regno
= 0; break;
487 case 'd': regno
= 1; break;
488 case 'c': regno
= 2; break;
489 case 'b': regno
= 3; break;
490 case 'S': regno
= 4; break;
491 case 'D': regno
= 5; break;
492 case 'B': regno
= 6; break;
495 reg_alloc_order
[order
++] = regno
;
498 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
500 if (! regs_allocated
[i
])
501 reg_alloc_order
[order
++] = i
;
505 /* If user did not specify a register allocation order, use natural order. */
508 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
509 reg_alloc_order
[i
] = i
;
514 optimization_options (level
, size
)
516 int size ATTRIBUTE_UNUSED
;
518 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
519 make the problem with not enough registers even worse. */
520 #ifdef INSN_SCHEDULING
522 flag_schedule_insns
= 0;
526 /* Return nonzero if the rtx is known aligned. */
533 struct ix86_address parts
;
535 /* Registers and immediate operands are always "aligned". */
536 if (GET_CODE (op
) != MEM
)
539 /* Don't even try to do any aligned optimizations with volatiles. */
540 if (MEM_VOLATILE_P (op
))
545 /* Pushes and pops are only valid on the stack pointer. */
546 if (GET_CODE (op
) == PRE_DEC
547 || GET_CODE (op
) == POST_INC
)
550 /* Decode the address. */
551 if (! ix86_decompose_address (op
, &parts
))
554 /* Look for some component that isn't known to be aligned. */
558 && REGNO_POINTER_ALIGN (REGNO (parts
.index
)) < 4)
563 if (REGNO_POINTER_ALIGN (REGNO (parts
.index
)) < 4)
568 if (GET_CODE (parts
.disp
) != CONST_INT
569 || (INTVAL (parts
.disp
) & 3) != 0)
573 /* Didn't find one -- this must be an aligned address. */
577 /* Return nonzero if IDENTIFIER with arguments ARGS is a valid machine specific
578 attribute for DECL. The attributes in ATTRIBUTES have previously been
582 ix86_valid_decl_attribute_p (decl
, attributes
, identifier
, args
)
583 tree decl ATTRIBUTE_UNUSED
;
584 tree attributes ATTRIBUTE_UNUSED
;
585 tree identifier ATTRIBUTE_UNUSED
;
586 tree args ATTRIBUTE_UNUSED
;
591 /* Return nonzero if IDENTIFIER with arguments ARGS is a valid machine specific
592 attribute for TYPE. The attributes in ATTRIBUTES have previously been
596 ix86_valid_type_attribute_p (type
, attributes
, identifier
, args
)
598 tree attributes ATTRIBUTE_UNUSED
;
602 if (TREE_CODE (type
) != FUNCTION_TYPE
603 && TREE_CODE (type
) != METHOD_TYPE
604 && TREE_CODE (type
) != FIELD_DECL
605 && TREE_CODE (type
) != TYPE_DECL
)
608 /* Stdcall attribute says callee is responsible for popping arguments
609 if they are not variable. */
610 if (is_attribute_p ("stdcall", identifier
))
611 return (args
== NULL_TREE
);
613 /* Cdecl attribute says the callee is a normal C declaration. */
614 if (is_attribute_p ("cdecl", identifier
))
615 return (args
== NULL_TREE
);
617 /* Regparm attribute specifies how many integer arguments are to be
618 passed in registers. */
619 if (is_attribute_p ("regparm", identifier
))
623 if (! args
|| TREE_CODE (args
) != TREE_LIST
624 || TREE_CHAIN (args
) != NULL_TREE
625 || TREE_VALUE (args
) == NULL_TREE
)
628 cst
= TREE_VALUE (args
);
629 if (TREE_CODE (cst
) != INTEGER_CST
)
632 if (TREE_INT_CST_HIGH (cst
) != 0
633 || TREE_INT_CST_LOW (cst
) < 0
634 || TREE_INT_CST_LOW (cst
) > REGPARM_MAX
)
643 /* Return 0 if the attributes for two types are incompatible, 1 if they
644 are compatible, and 2 if they are nearly compatible (which causes a
645 warning to be generated). */
648 ix86_comp_type_attributes (type1
, type2
)
652 /* Check for mismatch of non-default calling convention. */
653 char *rtdstr
= TARGET_RTD
? "cdecl" : "stdcall";
655 if (TREE_CODE (type1
) != FUNCTION_TYPE
)
658 /* Check for mismatched return types (cdecl vs stdcall). */
659 if (!lookup_attribute (rtdstr
, TYPE_ATTRIBUTES (type1
))
660 != !lookup_attribute (rtdstr
, TYPE_ATTRIBUTES (type2
)))
665 /* Value is the number of bytes of arguments automatically
666 popped when returning from a subroutine call.
667 FUNDECL is the declaration node of the function (as a tree),
668 FUNTYPE is the data type of the function (as a tree),
669 or for a library call it is an identifier node for the subroutine name.
670 SIZE is the number of bytes of arguments passed on the stack.
672 On the 80386, the RTD insn may be used to pop them if the number
673 of args is fixed, but if the number is variable then the caller
674 must pop them all. RTD can't be used for library calls now
675 because the library is compiled with the Unix compiler.
676 Use of RTD is a selectable option, since it is incompatible with
677 standard Unix calling sequences. If the option is not selected,
678 the caller must always pop the args.
680 The attribute stdcall is equivalent to RTD on a per module basis. */
683 ix86_return_pops_args (fundecl
, funtype
, size
)
688 int rtd
= TARGET_RTD
&& (!fundecl
|| TREE_CODE (fundecl
) != IDENTIFIER_NODE
);
690 /* Cdecl functions override -mrtd, and never pop the stack. */
691 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype
))) {
693 /* Stdcall functions will pop the stack if not variable args. */
694 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype
)))
698 && (TYPE_ARG_TYPES (funtype
) == NULL_TREE
699 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype
)))
704 /* Lose any fake structure return argument. */
705 if (aggregate_value_p (TREE_TYPE (funtype
)))
706 return GET_MODE_SIZE (Pmode
);
711 /* Argument support functions. */
713 /* Initialize a variable CUM of type CUMULATIVE_ARGS
714 for a call to a function whose data type is FNTYPE.
715 For a library call, FNTYPE is 0. */
718 init_cumulative_args (cum
, fntype
, libname
)
719 CUMULATIVE_ARGS
*cum
; /* Argument info to initialize */
720 tree fntype
; /* tree ptr for function decl */
721 rtx libname
; /* SYMBOL_REF of library name or 0 */
723 static CUMULATIVE_ARGS zero_cum
;
724 tree param
, next_param
;
726 if (TARGET_DEBUG_ARG
)
728 fprintf (stderr
, "\ninit_cumulative_args (");
730 fprintf (stderr
, "fntype code = %s, ret code = %s",
731 tree_code_name
[(int) TREE_CODE (fntype
)],
732 tree_code_name
[(int) TREE_CODE (TREE_TYPE (fntype
))]);
734 fprintf (stderr
, "no fntype");
737 fprintf (stderr
, ", libname = %s", XSTR (libname
, 0));
742 /* Set up the number of registers to use for passing arguments. */
743 cum
->nregs
= ix86_regparm
;
746 tree attr
= lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype
));
749 cum
->nregs
= TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
)));
752 /* Determine if this function has variable arguments. This is
753 indicated by the last argument being 'void_type_mode' if there
754 are no variable arguments. If there are variable arguments, then
755 we won't pass anything in registers */
759 for (param
= (fntype
) ? TYPE_ARG_TYPES (fntype
) : 0;
760 param
!= 0; param
= next_param
)
762 next_param
= TREE_CHAIN (param
);
763 if (next_param
== 0 && TREE_VALUE (param
) != void_type_node
)
768 if (TARGET_DEBUG_ARG
)
769 fprintf (stderr
, ", nregs=%d )\n", cum
->nregs
);
774 /* Update the data in CUM to advance over an argument
775 of mode MODE and data type TYPE.
776 (TYPE is null for libcalls where that information may not be available.) */
779 function_arg_advance (cum
, mode
, type
, named
)
780 CUMULATIVE_ARGS
*cum
; /* current arg information */
781 enum machine_mode mode
; /* current arg mode */
782 tree type
; /* type of the argument or 0 if lib support */
783 int named
; /* whether or not the argument was named */
786 = (mode
== BLKmode
) ? int_size_in_bytes (type
) : GET_MODE_SIZE (mode
);
787 int words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
789 if (TARGET_DEBUG_ARG
)
791 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
792 words
, cum
->words
, cum
->nregs
, GET_MODE_NAME (mode
), named
);
807 /* Define where to put the arguments to a function.
808 Value is zero to push the argument on the stack,
809 or a hard register in which to store the argument.
811 MODE is the argument's machine mode.
812 TYPE is the data type of the argument (as a tree).
813 This is null for libcalls where that information may
815 CUM is a variable of type CUMULATIVE_ARGS which gives info about
816 the preceding args and about the function being called.
817 NAMED is nonzero if this argument is a named parameter
818 (otherwise it is an extra parameter matching an ellipsis). */
821 function_arg (cum
, mode
, type
, named
)
822 CUMULATIVE_ARGS
*cum
; /* current arg information */
823 enum machine_mode mode
; /* current arg mode */
824 tree type
; /* type of the argument or 0 if lib support */
825 int named
; /* != 0 for normal args, == 0 for ... args */
829 = (mode
== BLKmode
) ? int_size_in_bytes (type
) : GET_MODE_SIZE (mode
);
830 int words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
834 /* For now, pass fp/complex values on the stack. */
843 if (words
<= cum
->nregs
)
844 ret
= gen_rtx_REG (mode
, cum
->regno
);
848 if (TARGET_DEBUG_ARG
)
851 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d",
852 words
, cum
->words
, cum
->nregs
, GET_MODE_NAME (mode
), named
);
855 fprintf (stderr
, ", reg=%%e%s", reg_names
[ REGNO(ret
) ]);
857 fprintf (stderr
, ", stack");
859 fprintf (stderr
, " )\n");
865 /* Returns 1 if OP is either a symbol reference or a sum of a symbol
866 reference and a constant. */
869 symbolic_operand (op
, mode
)
871 enum machine_mode mode ATTRIBUTE_UNUSED
;
873 switch (GET_CODE (op
))
881 if (GET_CODE (op
) == SYMBOL_REF
882 || GET_CODE (op
) == LABEL_REF
883 || (GET_CODE (op
) == UNSPEC
885 && XINT (op
, 1) <= 7))
887 if (GET_CODE (op
) != PLUS
888 || GET_CODE (XEXP (op
, 1)) != CONST_INT
)
892 if (GET_CODE (op
) == SYMBOL_REF
893 || GET_CODE (op
) == LABEL_REF
)
895 /* Only @GOTOFF gets offsets. */
896 if (GET_CODE (op
) != UNSPEC
897 || XINT (op
, 1) != 7)
900 op
= XVECEXP (op
, 0, 0);
901 if (GET_CODE (op
) == SYMBOL_REF
902 || GET_CODE (op
) == LABEL_REF
)
911 /* Return true if the operand contains a @GOT or @GOTOFF reference. */
914 pic_symbolic_operand (op
, mode
)
916 enum machine_mode mode ATTRIBUTE_UNUSED
;
918 if (GET_CODE (op
) == CONST
)
921 if (GET_CODE (op
) == UNSPEC
)
923 if (GET_CODE (op
) != PLUS
924 || GET_CODE (XEXP (op
, 1)) != CONST_INT
)
927 if (GET_CODE (op
) == UNSPEC
)
933 /* Test for a valid operand for a call instruction. Don't allow the
934 arg pointer register or virtual regs since they may decay into
935 reg + const, which the patterns can't handle. */
938 call_insn_operand (op
, mode
)
940 enum machine_mode mode ATTRIBUTE_UNUSED
;
942 if (GET_CODE (op
) != MEM
)
946 /* Disallow indirect through a virtual register. This leads to
947 compiler aborts when trying to eliminate them. */
948 if (GET_CODE (op
) == REG
949 && (op
== arg_pointer_rtx
950 || (REGNO (op
) >= FIRST_PSEUDO_REGISTER
951 && REGNO (op
) <= LAST_VIRTUAL_REGISTER
)))
954 /* Disallow `call 1234'. Due to varying assembler lameness this
955 gets either rejected or translated to `call .+1234'. */
956 if (GET_CODE (op
) == CONST_INT
)
959 /* Otherwise we can allow any general_operand in the address. */
960 return general_operand (op
, Pmode
);
963 /* Like call_insn_operand but allow (mem (symbol_ref ...)) even if pic. */
966 expander_call_insn_operand (op
, mode
)
968 enum machine_mode mode
;
970 if (GET_CODE (op
) == MEM
971 && GET_CODE (XEXP (op
, 0)) == SYMBOL_REF
)
974 return call_insn_operand (op
, mode
);
978 constant_call_address_operand (op
, mode
)
980 enum machine_mode mode ATTRIBUTE_UNUSED
;
982 return GET_CODE (op
) == MEM
&& CONSTANT_ADDRESS_P (XEXP (op
, 0));
985 /* Match exactly zero and one. */
988 const0_operand (op
, mode
)
990 enum machine_mode mode
;
992 return op
== CONST0_RTX (mode
);
996 const1_operand (op
, mode
)
998 enum machine_mode mode ATTRIBUTE_UNUSED
;
1000 return op
== const1_rtx
;
1003 /* Match 2, 4, or 8. Used for leal multiplicands. */
1006 const248_operand (op
, mode
)
1008 enum machine_mode mode ATTRIBUTE_UNUSED
;
1010 return (GET_CODE (op
) == CONST_INT
1011 && (INTVAL (op
) == 2 || INTVAL (op
) == 4 || INTVAL (op
) == 8));
1014 /* True if this is a constant appropriate for an increment or decremenmt. */
1017 incdec_operand (op
, mode
)
1019 enum machine_mode mode
;
1021 if (op
== const1_rtx
|| op
== constm1_rtx
)
1023 if (GET_CODE (op
) != CONST_INT
)
1025 if (mode
== SImode
&& INTVAL (op
) == (HOST_WIDE_INT
) 0xffffffff)
1027 if (mode
== HImode
&& INTVAL (op
) == (HOST_WIDE_INT
) 0xffff)
1029 if (mode
== QImode
&& INTVAL (op
) == (HOST_WIDE_INT
) 0xff)
1034 /* Return false if this is the stack pointer, or any other fake
1035 register eliminable to the stack pointer. Otherwise, this is
1038 This is used to prevent esp from being used as an index reg.
1039 Which would only happen in pathological cases. */
1042 reg_no_sp_operand (op
, mode
)
1044 enum machine_mode mode
;
1047 if (GET_CODE (t
) == SUBREG
)
1049 if (t
== stack_pointer_rtx
|| t
== arg_pointer_rtx
)
1052 return register_operand (op
, mode
);
1055 /* Return true if op is a Q_REGS class register. */
1058 q_regs_operand (op
, mode
)
1060 enum machine_mode mode
;
1062 if (mode
!= VOIDmode
&& GET_MODE (op
) != mode
)
1064 if (GET_CODE (op
) == SUBREG
)
1065 op
= SUBREG_REG (op
);
1066 return QI_REG_P (op
);
1069 /* Return true if op is a NON_Q_REGS class register. */
1072 non_q_regs_operand (op
, mode
)
1074 enum machine_mode mode
;
1076 if (mode
!= VOIDmode
&& GET_MODE (op
) != mode
)
1078 if (GET_CODE (op
) == SUBREG
)
1079 op
= SUBREG_REG (op
);
1080 return NON_QI_REG_P (op
);
1083 /* Return 1 if OP is a comparison operator that can use the condition code
1084 generated by a logical operation, which characteristicly does not set
1085 overflow or carry. To be used with CCNOmode. */
1088 no_comparison_operator (op
, mode
)
1090 enum machine_mode mode
;
1092 return ((mode
== VOIDmode
|| GET_MODE (op
) == mode
)
1093 && GET_RTX_CLASS (GET_CODE (op
)) == '<'
1094 && GET_CODE (op
) != LE
1095 && GET_CODE (op
) != GT
);
1098 /* Return 1 if OP is a comparison operator that can be issued by fcmov. */
1101 fcmov_comparison_operator (op
, mode
)
1103 enum machine_mode mode
;
1105 return ((mode
== VOIDmode
|| GET_MODE (op
) == mode
)
1106 && GET_RTX_CLASS (GET_CODE (op
)) == '<'
1107 && GET_CODE (op
) == unsigned_condition (GET_CODE (op
)));
1110 /* Nearly general operand, but accept any const_double, since we wish
1111 to be able to drop them into memory rather than have them get pulled
1115 cmp_fp_expander_operand (op
, mode
)
1117 enum machine_mode mode
;
1119 if (mode
!= VOIDmode
&& mode
!= GET_MODE (op
))
1121 if (GET_CODE (op
) == CONST_DOUBLE
)
1123 return general_operand (op
, mode
);
1126 /* Match an SI or HImode register for a zero_extract. */
1129 ext_register_operand (op
, mode
)
1131 enum machine_mode mode ATTRIBUTE_UNUSED
;
1133 if (GET_MODE (op
) != SImode
&& GET_MODE (op
) != HImode
)
1135 return register_operand (op
, VOIDmode
);
1138 /* Return 1 if this is a valid binary floating-point operation.
1139 OP is the expression matched, and MODE is its mode. */
1142 binary_fp_operator (op
, mode
)
1144 enum machine_mode mode
;
1146 if (mode
!= VOIDmode
&& mode
!= GET_MODE (op
))
1149 switch (GET_CODE (op
))
1155 return GET_MODE_CLASS (GET_MODE (op
)) == MODE_FLOAT
;
1163 mult_operator(op
, mode
)
1165 enum machine_mode mode ATTRIBUTE_UNUSED
;
1167 return GET_CODE (op
) == MULT
;
1171 div_operator(op
, mode
)
1173 enum machine_mode mode ATTRIBUTE_UNUSED
;
1175 return GET_CODE (op
) == DIV
;
1179 arith_or_logical_operator (op
, mode
)
1181 enum machine_mode mode
;
1183 return ((mode
== VOIDmode
|| GET_MODE (op
) == mode
)
1184 && (GET_RTX_CLASS (GET_CODE (op
)) == 'c'
1185 || GET_RTX_CLASS (GET_CODE (op
)) == '2'));
1188 /* Returns 1 if OP is memory operand with a displacement. */
1191 memory_displacement_operand (op
, mode
)
1193 enum machine_mode mode
;
1195 struct ix86_address parts
;
1197 if (! memory_operand (op
, mode
))
1200 if (! ix86_decompose_address (XEXP (op
, 0), &parts
))
1203 return parts
.disp
!= NULL_RTX
;
1206 /* To avoid problems when jump re-emits comparisons like testqi_ext_0,
1207 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
1209 ??? It seems likely that this will only work because cmpsi is an
1210 expander, and no actual insns use this. */
1213 cmpsi_operand (op
, mode
)
1215 enum machine_mode mode
;
1217 if (general_operand (op
, mode
))
1220 if (GET_CODE (op
) == AND
1221 && GET_MODE (op
) == SImode
1222 && GET_CODE (XEXP (op
, 0)) == ZERO_EXTRACT
1223 && GET_CODE (XEXP (XEXP (op
, 0), 1)) == CONST_INT
1224 && GET_CODE (XEXP (XEXP (op
, 0), 2)) == CONST_INT
1225 && INTVAL (XEXP (XEXP (op
, 0), 1)) == 8
1226 && INTVAL (XEXP (XEXP (op
, 0), 2)) == 8
1227 && GET_CODE (XEXP (op
, 1)) == CONST_INT
)
1233 /* Returns 1 if OP is memory operand that can not be represented by the
1237 long_memory_operand (op
, mode
)
1239 enum machine_mode mode
;
1241 if (! memory_operand (op
, mode
))
1244 return memory_address_length (op
) != 0;
1247 /* Return true if the constant is something that can be loaded with
1248 a special instruction. Only handle 0.0 and 1.0; others are less
1252 standard_80387_constant_p (x
)
1255 if (GET_CODE (x
) != CONST_DOUBLE
)
1258 #if ! defined (REAL_IS_NOT_DOUBLE) || defined (REAL_ARITHMETIC)
1264 if (setjmp (handler
))
1267 set_float_handler (handler
);
1268 REAL_VALUE_FROM_CONST_DOUBLE (d
, x
);
1269 is0
= REAL_VALUES_EQUAL (d
, dconst0
) && !REAL_VALUE_MINUS_ZERO (d
);
1270 is1
= REAL_VALUES_EQUAL (d
, dconst1
);
1271 set_float_handler (NULL_PTR
);
1279 /* Note that on the 80387, other constants, such as pi,
1280 are much slower to load as standard constants
1281 than to load from doubles in memory! */
1282 /* ??? Not true on K6: all constants are equal cost. */
1289 /* Returns 1 if OP contains a symbol reference */
1292 symbolic_reference_mentioned_p (op
)
1295 register const char *fmt
;
1298 if (GET_CODE (op
) == SYMBOL_REF
|| GET_CODE (op
) == LABEL_REF
)
1301 fmt
= GET_RTX_FORMAT (GET_CODE (op
));
1302 for (i
= GET_RTX_LENGTH (GET_CODE (op
)) - 1; i
>= 0; i
--)
1308 for (j
= XVECLEN (op
, i
) - 1; j
>= 0; j
--)
1309 if (symbolic_reference_mentioned_p (XVECEXP (op
, i
, j
)))
1313 else if (fmt
[i
] == 'e' && symbolic_reference_mentioned_p (XEXP (op
, i
)))
1320 /* Return 1 if it is appropriate to emit `ret' instructions in the
1321 body of a function. Do this only if the epilogue is simple, needing a
1322 couple of insns. Prior to reloading, we can't tell how many registers
1323 must be saved, so return 0 then. Return 0 if there is no frame
1324 marker to de-allocate.
1326 If NON_SAVING_SETJMP is defined and true, then it is not possible
1327 for the epilogue to be simple, so return 0. This is a special case
1328 since NON_SAVING_SETJMP will not cause regs_ever_live to change
1329 until final, but jump_optimize may need to know sooner if a
1333 ix86_can_use_return_insn_p ()
1337 int reglimit
= (frame_pointer_needed
1338 ? FRAME_POINTER_REGNUM
: STACK_POINTER_REGNUM
);
1339 int pic_reg_used
= flag_pic
&& (current_function_uses_pic_offset_table
1340 || current_function_uses_const_pool
);
1342 #ifdef NON_SAVING_SETJMP
1343 if (NON_SAVING_SETJMP
&& current_function_calls_setjmp
)
1347 if (! reload_completed
)
1350 for (regno
= reglimit
- 1; regno
>= 0; regno
--)
1351 if ((regs_ever_live
[regno
] && ! call_used_regs
[regno
])
1352 || (regno
== PIC_OFFSET_TABLE_REGNUM
&& pic_reg_used
))
1355 return nregs
== 0 || ! frame_pointer_needed
;
1358 static char *pic_label_name
;
1359 static int pic_label_output
;
1360 static char *global_offset_table_name
;
1362 /* This function generates code for -fpic that loads %ebx with
1363 the return address of the caller and then returns. */
1366 asm_output_function_prefix (file
, name
)
1368 char *name ATTRIBUTE_UNUSED
;
1371 int pic_reg_used
= flag_pic
&& (current_function_uses_pic_offset_table
1372 || current_function_uses_const_pool
);
1373 xops
[0] = pic_offset_table_rtx
;
1374 xops
[1] = stack_pointer_rtx
;
1376 /* Deep branch prediction favors having a return for every call. */
1377 if (pic_reg_used
&& TARGET_DEEP_BRANCH_PREDICTION
)
1379 if (!pic_label_output
)
1381 /* This used to call ASM_DECLARE_FUNCTION_NAME() but since it's an
1382 internal (non-global) label that's being emitted, it didn't make
1383 sense to have .type information for local labels. This caused
1384 the SCO OpenServer 5.0.4 ELF assembler grief (why are you giving
1385 me debug info for a label that you're declaring non-global?) this
1386 was changed to call ASM_OUTPUT_LABEL() instead. */
1388 ASM_OUTPUT_LABEL (file
, pic_label_name
);
1390 xops
[1] = gen_rtx_MEM (SImode
, xops
[1]);
1391 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops
);
1392 output_asm_insn ("ret", xops
);
1394 pic_label_output
= 1;
1400 load_pic_register ()
1404 if (global_offset_table_name
== NULL
)
1406 global_offset_table_name
=
1407 ggc_alloc_string ("_GLOBAL_OFFSET_TABLE_", 21);
1408 ggc_add_string_root (&global_offset_table_name
, 1);
1410 gotsym
= gen_rtx_SYMBOL_REF (Pmode
, global_offset_table_name
);
1412 if (TARGET_DEEP_BRANCH_PREDICTION
)
1414 if (pic_label_name
== NULL
)
1416 pic_label_name
= ggc_alloc_string (NULL
, 32);
1417 ggc_add_string_root (&pic_label_name
, 1);
1418 ASM_GENERATE_INTERNAL_LABEL (pic_label_name
, "LPR", 0);
1420 pclab
= gen_rtx_MEM (QImode
, gen_rtx_SYMBOL_REF (Pmode
, pic_label_name
));
1424 pclab
= gen_rtx_LABEL_REF (VOIDmode
, gen_label_rtx ());
1427 emit_insn (gen_prologue_get_pc (pic_offset_table_rtx
, pclab
));
1429 if (! TARGET_DEEP_BRANCH_PREDICTION
)
1430 emit_insn (gen_popsi1 (pic_offset_table_rtx
));
1432 emit_insn (gen_prologue_set_got (pic_offset_table_rtx
, gotsym
, pclab
));
1435 /* Generate an SImode "push" pattern for input ARG. */
1441 return gen_rtx_SET (VOIDmode
,
1442 gen_rtx_MEM (SImode
,
1443 gen_rtx_PRE_DEC (SImode
,
1444 stack_pointer_rtx
)),
1448 /* Compute the size of local storage taking into consideration the
1449 desired stack alignment which is to be maintained. Also determine
1450 the number of registers saved below the local storage. */
1453 ix86_compute_frame_size (size
, nregs_on_stack
)
1455 int *nregs_on_stack
;
1461 int pic_reg_used
= flag_pic
&& (current_function_uses_pic_offset_table
1462 || current_function_uses_const_pool
);
1463 HOST_WIDE_INT total_size
;
1465 limit
= frame_pointer_needed
1466 ? FRAME_POINTER_REGNUM
: STACK_POINTER_REGNUM
;
1470 for (regno
= limit
- 1; regno
>= 0; regno
--)
1471 if ((regs_ever_live
[regno
] && ! call_used_regs
[regno
])
1472 || (regno
== PIC_OFFSET_TABLE_REGNUM
&& pic_reg_used
))
1476 total_size
= size
+ (nregs
* UNITS_PER_WORD
);
1478 #ifdef PREFERRED_STACK_BOUNDARY
1481 int preferred_alignment
= PREFERRED_STACK_BOUNDARY
/ BITS_PER_UNIT
;
1484 if (frame_pointer_needed
)
1485 offset
+= UNITS_PER_WORD
;
1487 total_size
+= offset
;
1489 padding
= ((total_size
+ preferred_alignment
- 1)
1490 & -preferred_alignment
) - total_size
;
1492 if (padding
< (((offset
+ preferred_alignment
- 1)
1493 & -preferred_alignment
) - offset
))
1494 padding
+= preferred_alignment
;
1496 /* Don't bother aligning the stack of a leaf function
1497 which doesn't allocate any stack slots. */
1498 if (size
== 0 && current_function_is_leaf
)
1504 *nregs_on_stack
= nregs
;
1506 return size
+ padding
;
1509 /* Expand the prologue into a bunch of separate insns. */
1512 ix86_expand_prologue ()
1516 int pic_reg_used
= flag_pic
&& (current_function_uses_pic_offset_table
1517 || current_function_uses_const_pool
);
1518 HOST_WIDE_INT tsize
= ix86_compute_frame_size (get_frame_size (), (int *)0);
1521 /* Note: AT&T enter does NOT have reversed args. Enter is probably
1522 slower on all targets. Also sdb doesn't like it. */
1524 if (frame_pointer_needed
)
1526 insn
= emit_insn (gen_push (frame_pointer_rtx
));
1527 RTX_FRAME_RELATED_P (insn
) = 1;
1529 insn
= emit_move_insn (frame_pointer_rtx
, stack_pointer_rtx
);
1530 RTX_FRAME_RELATED_P (insn
) = 1;
1535 else if (! TARGET_STACK_PROBE
|| tsize
< CHECK_STACK_LIMIT
)
1537 if (frame_pointer_needed
)
1538 insn
= emit_insn (gen_prologue_allocate_stack (stack_pointer_rtx
,
1541 frame_pointer_rtx
));
1543 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
1545 RTX_FRAME_RELATED_P (insn
) = 1;
1549 /* ??? Is this only valid for Win32? */
1553 arg0
= gen_rtx_REG (SImode
, 0);
1554 emit_move_insn (arg0
, GEN_INT (tsize
));
1556 sym
= gen_rtx_MEM (FUNCTION_MODE
,
1557 gen_rtx_SYMBOL_REF (Pmode
, "_alloca"));
1558 insn
= emit_call_insn (gen_call (sym
, const0_rtx
));
1560 CALL_INSN_FUNCTION_USAGE (insn
)
1561 = gen_rtx_EXPR_LIST (VOIDmode
, gen_rtx_USE (VOIDmode
, arg0
),
1562 CALL_INSN_FUNCTION_USAGE (insn
));
1565 limit
= (frame_pointer_needed
? FRAME_POINTER_REGNUM
: STACK_POINTER_REGNUM
);
1566 for (regno
= limit
- 1; regno
>= 0; regno
--)
1567 if ((regs_ever_live
[regno
] && ! call_used_regs
[regno
])
1568 || (regno
== PIC_OFFSET_TABLE_REGNUM
&& pic_reg_used
))
1570 insn
= emit_insn (gen_push (gen_rtx_REG (SImode
, regno
)));
1571 RTX_FRAME_RELATED_P (insn
) = 1;
1574 #ifdef SUBTARGET_PROLOGUE
1579 load_pic_register ();
1581 /* If we are profiling, make sure no instructions are scheduled before
1582 the call to mcount. However, if -fpic, the above call will have
1584 if ((profile_flag
|| profile_block_flag
) && ! pic_reg_used
)
1585 emit_insn (gen_blockage ());
1588 /* Restore function stack, frame, and registers. */
1591 ix86_expand_epilogue ()
1596 int pic_reg_used
= flag_pic
&& (current_function_uses_pic_offset_table
1597 || current_function_uses_const_pool
);
1598 int sp_valid
= !frame_pointer_needed
|| current_function_sp_is_unchanging
;
1599 HOST_WIDE_INT offset
;
1600 HOST_WIDE_INT tsize
= ix86_compute_frame_size (get_frame_size (), &nregs
);
1602 /* SP is often unreliable so we may have to go off the frame pointer. */
1604 offset
= -(tsize
+ nregs
* UNITS_PER_WORD
);
1606 /* If we're only restoring one register and sp is not valid then
1607 using a move instruction to restore the register since it's
1608 less work than reloading sp and popping the register. Otherwise,
1609 restore sp (if necessary) and pop the registers. */
1611 limit
= (frame_pointer_needed
1612 ? FRAME_POINTER_REGNUM
: STACK_POINTER_REGNUM
);
1614 if (nregs
> 1 || sp_valid
)
1619 addr_offset
= adj_offsettable_operand (AT_BP (QImode
), offset
);
1620 addr_offset
= XEXP (addr_offset
, 0);
1622 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, addr_offset
));
1625 for (regno
= 0; regno
< limit
; regno
++)
1626 if ((regs_ever_live
[regno
] && ! call_used_regs
[regno
])
1627 || (regno
== PIC_OFFSET_TABLE_REGNUM
&& pic_reg_used
))
1629 emit_insn (gen_popsi1 (gen_rtx_REG (SImode
, regno
)));
1634 for (regno
= 0; regno
< limit
; regno
++)
1635 if ((regs_ever_live
[regno
] && ! call_used_regs
[regno
])
1636 || (regno
== PIC_OFFSET_TABLE_REGNUM
&& pic_reg_used
))
1638 emit_move_insn (gen_rtx_REG (SImode
, regno
),
1639 adj_offsettable_operand (AT_BP (Pmode
), offset
));
1644 if (frame_pointer_needed
)
1646 /* If not an i386, mov & pop is faster than "leave". */
1647 if (TARGET_USE_LEAVE
)
1648 emit_insn (gen_leave());
1651 emit_insn (gen_epilogue_deallocate_stack (stack_pointer_rtx
,
1652 frame_pointer_rtx
));
1653 emit_insn (gen_popsi1 (frame_pointer_rtx
));
1658 /* Intel's docs say that for 4 or 8 bytes of stack frame one should
1659 use `pop' and not `add'. */
1660 int use_pop
= tsize
== 4;
1663 /* Use two pops only for the Pentium processors. */
1664 if (tsize
== 8 && !TARGET_386
&& !TARGET_486
)
1666 rtx retval
= current_function_return_rtx
;
1668 edx
= gen_rtx_REG (SImode
, 1);
1670 /* This case is a bit more complex. Since we cannot pop into
1671 %ecx twice we need a second register. But this is only
1672 available if the return value is not of DImode in which
1673 case the %edx register is not available. */
1674 use_pop
= (retval
== NULL
1675 || ! reg_overlap_mentioned_p (edx
, retval
));
1680 ecx
= gen_rtx_REG (SImode
, 2);
1682 /* We have to prevent the two pops here from being scheduled.
1683 GCC otherwise would try in some situation to put other
1684 instructions in between them which has a bad effect. */
1685 emit_insn (gen_blockage ());
1686 emit_insn (gen_popsi1 (ecx
));
1688 emit_insn (gen_popsi1 (edx
));
1692 /* If there is no frame pointer, we must still release the frame. */
1693 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
1698 #ifdef FUNCTION_BLOCK_PROFILER_EXIT
1699 if (profile_block_flag
== 2)
1701 FUNCTION_BLOCK_PROFILER_EXIT
;
1705 if (current_function_pops_args
&& current_function_args_size
)
1707 rtx popc
= GEN_INT (current_function_pops_args
);
1709 /* i386 can only pop 32K bytes (maybe 64K? Is it signed?). If
1710 asked to pop more, pop return address, do explicit add, and jump
1711 indirectly to the caller. */
1713 if (current_function_pops_args
>= 32768)
1715 rtx ecx
= gen_rtx_REG (SImode
, 2);
1717 emit_insn (gen_popsi1 (ecx
));
1718 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, popc
));
1719 emit_indirect_jump (ecx
);
1722 emit_jump_insn (gen_return_pop_internal (popc
));
1725 emit_jump_insn (gen_return_internal ());
1728 /* Extract the parts of an RTL expression that is a valid memory address
1729 for an instruction. Return false if the structure of the address is
1733 ix86_decompose_address (addr
, out
)
1735 struct ix86_address
*out
;
1737 rtx base
= NULL_RTX
;
1738 rtx index
= NULL_RTX
;
1739 rtx disp
= NULL_RTX
;
1740 HOST_WIDE_INT scale
= 1;
1741 rtx scale_rtx
= NULL_RTX
;
1743 if (GET_CODE (addr
) == REG
|| GET_CODE (addr
) == SUBREG
)
1745 else if (GET_CODE (addr
) == PLUS
)
1747 rtx op0
= XEXP (addr
, 0);
1748 rtx op1
= XEXP (addr
, 1);
1749 enum rtx_code code0
= GET_CODE (op0
);
1750 enum rtx_code code1
= GET_CODE (op1
);
1752 if (code0
== REG
|| code0
== SUBREG
)
1754 if (code1
== REG
|| code1
== SUBREG
)
1755 index
= op0
, base
= op1
; /* index + base */
1757 base
= op0
, disp
= op1
; /* base + displacement */
1759 else if (code0
== MULT
)
1761 index
= XEXP (op0
, 0);
1762 scale_rtx
= XEXP (op0
, 1);
1763 if (code1
== REG
|| code1
== SUBREG
)
1764 base
= op1
; /* index*scale + base */
1766 disp
= op1
; /* index*scale + disp */
1768 else if (code0
== PLUS
&& GET_CODE (XEXP (op0
, 0)) == MULT
)
1770 index
= XEXP (XEXP (op0
, 0), 0); /* index*scale + base + disp */
1771 scale_rtx
= XEXP (XEXP (op0
, 0), 1);
1772 base
= XEXP (op0
, 1);
1775 else if (code0
== PLUS
)
1777 index
= XEXP (op0
, 0); /* index + base + disp */
1778 base
= XEXP (op0
, 1);
1784 else if (GET_CODE (addr
) == MULT
)
1786 index
= XEXP (addr
, 0); /* index*scale */
1787 scale_rtx
= XEXP (addr
, 1);
1789 else if (GET_CODE (addr
) == ASHIFT
)
1793 /* We're called for lea too, which implements ashift on occasion. */
1794 index
= XEXP (addr
, 0);
1795 tmp
= XEXP (addr
, 1);
1796 if (GET_CODE (tmp
) != CONST_INT
)
1798 scale
= INTVAL (tmp
);
1799 if ((unsigned HOST_WIDE_INT
) scale
> 3)
1804 disp
= addr
; /* displacement */
1806 /* Extract the integral value of scale. */
1809 if (GET_CODE (scale_rtx
) != CONST_INT
)
1811 scale
= INTVAL (scale_rtx
);
1814 /* Allow arg pointer and stack pointer as index if there is not scaling */
1815 if (base
&& index
&& scale
== 1
1816 && (index
== arg_pointer_rtx
|| index
== stack_pointer_rtx
))
1823 /* Special case: %ebp cannot be encoded as a base without a displacement. */
1824 if (base
== frame_pointer_rtx
&& !disp
)
1827 /* Special case: on K6, [%esi] makes the instruction vector decoded.
1828 Avoid this by transforming to [%esi+0]. */
1829 if (ix86_cpu
== PROCESSOR_K6
&& !optimize_size
1830 && base
&& !index
&& !disp
1831 && REGNO_REG_CLASS (REGNO (base
)) == SIREG
)
1834 /* Special case: encode reg+reg instead of reg*2. */
1835 if (!base
&& index
&& scale
&& scale
== 2)
1836 base
= index
, scale
= 1;
1838 /* Special case: scaling cannot be encoded without base or displacement. */
1839 if (!base
&& !disp
&& index
&& scale
!= 1)
1850 /* Determine if a given CONST RTX is a valid memory displacement
1854 legitimate_pic_address_disp_p (disp
)
1857 if (GET_CODE (disp
) != CONST
)
1859 disp
= XEXP (disp
, 0);
1861 if (GET_CODE (disp
) == PLUS
)
1863 if (GET_CODE (XEXP (disp
, 1)) != CONST_INT
)
1865 disp
= XEXP (disp
, 0);
1868 if (GET_CODE (disp
) != UNSPEC
1869 || XVECLEN (disp
, 0) != 1)
1872 /* Must be @GOT or @GOTOFF. */
1873 if (XINT (disp
, 1) != 6
1874 && XINT (disp
, 1) != 7)
1877 if (GET_CODE (XVECEXP (disp
, 0, 0)) != SYMBOL_REF
1878 && GET_CODE (XVECEXP (disp
, 0, 0)) != LABEL_REF
)
1884 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
1885 memory address for an instruction. The MODE argument is the machine mode
1886 for the MEM expression that wants to use this address.
1888 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
1889 convert common non-canonical forms to canonical form so that they will
1893 legitimate_address_p (mode
, addr
, strict
)
1894 enum machine_mode mode
;
1898 struct ix86_address parts
;
1899 rtx base
, index
, disp
;
1900 HOST_WIDE_INT scale
;
1901 const char *reason
= NULL
;
1902 rtx reason_rtx
= NULL_RTX
;
1904 if (TARGET_DEBUG_ADDR
)
1907 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
1908 GET_MODE_NAME (mode
), strict
);
1912 if (! ix86_decompose_address (addr
, &parts
))
1914 reason
= "decomposition failed";
1919 index
= parts
.index
;
1921 scale
= parts
.scale
;
1923 /* Validate base register.
1925 Don't allow SUBREG's here, it can lead to spill failures when the base
1926 is one word out of a two word structure, which is represented internally
1933 if (GET_CODE (base
) != REG
)
1935 reason
= "base is not a register";
1939 if (GET_MODE (base
) != Pmode
)
1941 reason
= "base is not in Pmode";
1945 if ((strict
&& ! REG_OK_FOR_BASE_STRICT_P (base
))
1946 || (! strict
&& ! REG_OK_FOR_BASE_NONSTRICT_P (base
)))
1948 reason
= "base is not valid";
1953 /* Validate index register.
1955 Don't allow SUBREG's here, it can lead to spill failures when the index
1956 is one word out of a two word structure, which is represented internally
1963 if (GET_CODE (index
) != REG
)
1965 reason
= "index is not a register";
1969 if (GET_MODE (index
) != Pmode
)
1971 reason
= "index is not in Pmode";
1975 if ((strict
&& ! REG_OK_FOR_INDEX_STRICT_P (index
))
1976 || (! strict
&& ! REG_OK_FOR_INDEX_NONSTRICT_P (index
)))
1978 reason
= "index is not valid";
1983 /* Validate scale factor. */
1986 reason_rtx
= GEN_INT (scale
);
1989 reason
= "scale without index";
1993 if (scale
!= 2 && scale
!= 4 && scale
!= 8)
1995 reason
= "scale is not a valid multiplier";
2000 /* Validate displacement. */
2005 if (!CONSTANT_ADDRESS_P (disp
))
2007 reason
= "displacement is not constant";
2011 if (GET_CODE (disp
) == CONST_DOUBLE
)
2013 reason
= "displacement is a const_double";
2017 if (flag_pic
&& SYMBOLIC_CONST (disp
))
2019 if (! legitimate_pic_address_disp_p (disp
))
2021 reason
= "displacement is an invalid pic construct";
2025 /* Verify that a symbolic pic displacement includes
2026 the pic_offset_table_rtx register. */
2027 if (base
!= pic_offset_table_rtx
2028 && (index
!= pic_offset_table_rtx
|| scale
!= 1))
2030 reason
= "pic displacement against invalid base";
2034 else if (HALF_PIC_P ())
2036 if (! HALF_PIC_ADDRESS_P (disp
)
2037 || (base
!= NULL_RTX
|| index
!= NULL_RTX
))
2039 reason
= "displacement is an invalid half-pic reference";
2045 /* Everything looks valid. */
2046 if (TARGET_DEBUG_ADDR
)
2047 fprintf (stderr
, "Success.\n");
2051 if (TARGET_DEBUG_ADDR
)
2053 fprintf (stderr
, "Error: %s\n", reason
);
2054 debug_rtx (reason_rtx
);
2059 /* Return a legitimate reference for ORIG (an address) using the
2060 register REG. If REG is 0, a new pseudo is generated.
2062 There are two types of references that must be handled:
2064 1. Global data references must load the address from the GOT, via
2065 the PIC reg. An insn is emitted to do this load, and the reg is
2068 2. Static data references, constant pool addresses, and code labels
2069 compute the address as an offset from the GOT, whose base is in
2070 the PIC reg. Static data objects have SYMBOL_REF_FLAG set to
2071 differentiate them from global data objects. The returned
2072 address is the PIC reg + an unspec constant.
2074 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
2075 reg also appears in the address. */
2078 legitimize_pic_address (orig
, reg
)
2086 if (GET_CODE (addr
) == LABEL_REF
2087 || (GET_CODE (addr
) == SYMBOL_REF
2088 && (CONSTANT_POOL_ADDRESS_P (addr
)
2089 || SYMBOL_REF_FLAG (addr
))))
2091 /* This symbol may be referenced via a displacement from the PIC
2092 base address (@GOTOFF). */
2094 current_function_uses_pic_offset_table
= 1;
2095 new = gen_rtx_UNSPEC (VOIDmode
, gen_rtvec (1, addr
), 7);
2096 new = gen_rtx_CONST (VOIDmode
, new);
2097 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
2101 emit_move_insn (reg
, new);
2105 else if (GET_CODE (addr
) == SYMBOL_REF
)
2107 /* This symbol must be referenced via a load from the
2108 Global Offset Table (@GOT). */
2110 current_function_uses_pic_offset_table
= 1;
2111 new = gen_rtx_UNSPEC (VOIDmode
, gen_rtvec (1, addr
), 6);
2112 new = gen_rtx_CONST (VOIDmode
, new);
2113 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
2114 new = gen_rtx_MEM (Pmode
, new);
2115 RTX_UNCHANGING_P (new) = 1;
2118 reg
= gen_reg_rtx (Pmode
);
2119 emit_move_insn (reg
, new);
2124 if (GET_CODE (addr
) == CONST
)
2126 addr
= XEXP (addr
, 0);
2127 if (GET_CODE (addr
) == UNSPEC
)
2129 /* Check that the unspec is one of the ones we generate? */
2131 else if (GET_CODE (addr
) != PLUS
)
2134 if (GET_CODE (addr
) == PLUS
)
2136 rtx op0
= XEXP (addr
, 0), op1
= XEXP (addr
, 1);
2138 /* Check first to see if this is a constant offset from a @GOTOFF
2139 symbol reference. */
2140 if ((GET_CODE (op0
) == LABEL_REF
2141 || (GET_CODE (op0
) == SYMBOL_REF
2142 && (CONSTANT_POOL_ADDRESS_P (op0
)
2143 || SYMBOL_REF_FLAG (op0
))))
2144 && GET_CODE (op1
) == CONST_INT
)
2146 current_function_uses_pic_offset_table
= 1;
2147 new = gen_rtx_UNSPEC (VOIDmode
, gen_rtvec (1, op0
), 7);
2148 new = gen_rtx_PLUS (VOIDmode
, new, op1
);
2149 new = gen_rtx_CONST (VOIDmode
, new);
2150 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
2154 emit_move_insn (reg
, new);
2160 base
= legitimize_pic_address (XEXP (addr
, 0), reg
);
2161 new = legitimize_pic_address (XEXP (addr
, 1),
2162 base
== reg
? NULL_RTX
: reg
);
2164 if (GET_CODE (new) == CONST_INT
)
2165 new = plus_constant (base
, INTVAL (new));
2168 if (GET_CODE (new) == PLUS
&& CONSTANT_P (XEXP (new, 1)))
2170 base
= gen_rtx_PLUS (Pmode
, base
, XEXP (new, 0));
2171 new = XEXP (new, 1);
2173 new = gen_rtx_PLUS (Pmode
, base
, new);
2181 /* Try machine-dependent ways of modifying an illegitimate address
2182 to be legitimate. If we find one, return the new, valid address.
2183 This macro is used in only one place: `memory_address' in explow.c.
2185 OLDX is the address as it was before break_out_memory_refs was called.
2186 In some cases it is useful to look at this to decide what needs to be done.
2188 MODE and WIN are passed so that this macro can use
2189 GO_IF_LEGITIMATE_ADDRESS.
2191 It is always safe for this macro to do nothing. It exists to recognize
2192 opportunities to optimize the output.
2194 For the 80386, we handle X+REG by loading X into a register R and
2195 using R+REG. R will go in a general reg and indexing will be used.
2196 However, if REG is a broken-out memory address or multiplication,
2197 nothing needs to be done because REG can certainly go in a general reg.
2199 When -fpic is used, special handling is needed for symbolic references.
2200 See comments by legitimize_pic_address in i386.c for details. */
2203 legitimize_address (x
, oldx
, mode
)
2205 register rtx oldx ATTRIBUTE_UNUSED
;
2206 enum machine_mode mode
;
2211 if (TARGET_DEBUG_ADDR
)
2213 fprintf (stderr
, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
2214 GET_MODE_NAME (mode
));
2218 if (flag_pic
&& SYMBOLIC_CONST (x
))
2219 return legitimize_pic_address (x
, 0);
2221 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
2222 if (GET_CODE (x
) == ASHIFT
2223 && GET_CODE (XEXP (x
, 1)) == CONST_INT
2224 && (log
= (unsigned)exact_log2 (INTVAL (XEXP (x
, 1)))) < 4)
2227 x
= gen_rtx_MULT (Pmode
, force_reg (Pmode
, XEXP (x
, 0)),
2228 GEN_INT (1 << log
));
2231 if (GET_CODE (x
) == PLUS
)
2233 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
2235 if (GET_CODE (XEXP (x
, 0)) == ASHIFT
2236 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == CONST_INT
2237 && (log
= (unsigned)exact_log2 (INTVAL (XEXP (XEXP (x
, 0), 1)))) < 4)
2240 XEXP (x
, 0) = gen_rtx_MULT (Pmode
,
2241 force_reg (Pmode
, XEXP (XEXP (x
, 0), 0)),
2242 GEN_INT (1 << log
));
2245 if (GET_CODE (XEXP (x
, 1)) == ASHIFT
2246 && GET_CODE (XEXP (XEXP (x
, 1), 1)) == CONST_INT
2247 && (log
= (unsigned)exact_log2 (INTVAL (XEXP (XEXP (x
, 1), 1)))) < 4)
2250 XEXP (x
, 1) = gen_rtx_MULT (Pmode
,
2251 force_reg (Pmode
, XEXP (XEXP (x
, 1), 0)),
2252 GEN_INT (1 << log
));
2255 /* Put multiply first if it isn't already. */
2256 if (GET_CODE (XEXP (x
, 1)) == MULT
)
2258 rtx tmp
= XEXP (x
, 0);
2259 XEXP (x
, 0) = XEXP (x
, 1);
2264 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
2265 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
2266 created by virtual register instantiation, register elimination, and
2267 similar optimizations. */
2268 if (GET_CODE (XEXP (x
, 0)) == MULT
&& GET_CODE (XEXP (x
, 1)) == PLUS
)
2271 x
= gen_rtx_PLUS (Pmode
,
2272 gen_rtx_PLUS (Pmode
, XEXP (x
, 0),
2273 XEXP (XEXP (x
, 1), 0)),
2274 XEXP (XEXP (x
, 1), 1));
2278 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
2279 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
2280 else if (GET_CODE (x
) == PLUS
&& GET_CODE (XEXP (x
, 0)) == PLUS
2281 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
2282 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == PLUS
2283 && CONSTANT_P (XEXP (x
, 1)))
2286 rtx other
= NULL_RTX
;
2288 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
2290 constant
= XEXP (x
, 1);
2291 other
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
2293 else if (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 1), 1)) == CONST_INT
)
2295 constant
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
2296 other
= XEXP (x
, 1);
2304 x
= gen_rtx_PLUS (Pmode
,
2305 gen_rtx_PLUS (Pmode
, XEXP (XEXP (x
, 0), 0),
2306 XEXP (XEXP (XEXP (x
, 0), 1), 0)),
2307 plus_constant (other
, INTVAL (constant
)));
2311 if (changed
&& legitimate_address_p (mode
, x
, FALSE
))
2314 if (GET_CODE (XEXP (x
, 0)) == MULT
)
2317 XEXP (x
, 0) = force_operand (XEXP (x
, 0), 0);
2320 if (GET_CODE (XEXP (x
, 1)) == MULT
)
2323 XEXP (x
, 1) = force_operand (XEXP (x
, 1), 0);
2327 && GET_CODE (XEXP (x
, 1)) == REG
2328 && GET_CODE (XEXP (x
, 0)) == REG
)
2331 if (flag_pic
&& SYMBOLIC_CONST (XEXP (x
, 1)))
2334 x
= legitimize_pic_address (x
, 0);
2337 if (changed
&& legitimate_address_p (mode
, x
, FALSE
))
2340 if (GET_CODE (XEXP (x
, 0)) == REG
)
2342 register rtx temp
= gen_reg_rtx (Pmode
);
2343 register rtx val
= force_operand (XEXP (x
, 1), temp
);
2345 emit_move_insn (temp
, val
);
2351 else if (GET_CODE (XEXP (x
, 1)) == REG
)
2353 register rtx temp
= gen_reg_rtx (Pmode
);
2354 register rtx val
= force_operand (XEXP (x
, 0), temp
);
2356 emit_move_insn (temp
, val
);
2366 /* Print an integer constant expression in assembler syntax. Addition
2367 and subtraction are the only arithmetic that may appear in these
2368 expressions. FILE is the stdio stream to write to, X is the rtx, and
2369 CODE is the operand print code from the output string. */
2372 output_pic_addr_const (file
, x
, code
)
2379 switch (GET_CODE (x
))
2389 assemble_name (file
, XSTR (x
, 0));
2390 if (code
== 'P' && ! SYMBOL_REF_FLAG (x
))
2391 fputs ("@PLT", file
);
2398 ASM_GENERATE_INTERNAL_LABEL (buf
, "L", CODE_LABEL_NUMBER (x
));
2399 assemble_name (asm_out_file
, buf
);
2403 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
2407 /* This used to output parentheses around the expression,
2408 but that does not work on the 386 (either ATT or BSD assembler). */
2409 output_pic_addr_const (file
, XEXP (x
, 0), code
);
2413 if (GET_MODE (x
) == VOIDmode
)
2415 /* We can use %d if the number is <32 bits and positive. */
2416 if (CONST_DOUBLE_HIGH (x
) || CONST_DOUBLE_LOW (x
) < 0)
2417 fprintf (file
, "0x%lx%08lx",
2418 (unsigned long) CONST_DOUBLE_HIGH (x
),
2419 (unsigned long) CONST_DOUBLE_LOW (x
));
2421 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, CONST_DOUBLE_LOW (x
));
2424 /* We can't handle floating point constants;
2425 PRINT_OPERAND must handle them. */
2426 output_operand_lossage ("floating constant misused");
2430 /* Some assemblers need integer constants to appear first. */
2431 if (GET_CODE (XEXP (x
, 0)) == CONST_INT
)
2433 output_pic_addr_const (file
, XEXP (x
, 0), code
);
2435 output_pic_addr_const (file
, XEXP (x
, 1), code
);
2437 else if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
2439 output_pic_addr_const (file
, XEXP (x
, 1), code
);
2441 output_pic_addr_const (file
, XEXP (x
, 0), code
);
2448 putc (ASSEMBLER_DIALECT
? '(' : '[', file
);
2449 output_pic_addr_const (file
, XEXP (x
, 0), code
);
2451 output_pic_addr_const (file
, XEXP (x
, 1), code
);
2452 putc (ASSEMBLER_DIALECT
? ')' : ']', file
);
2456 if (XVECLEN (x
, 0) != 1)
2458 output_pic_addr_const (file
, XVECEXP (x
, 0, 0), code
);
2459 switch (XINT (x
, 1))
2462 fputs ("@GOT", file
);
2465 fputs ("@GOTOFF", file
);
2468 fputs ("@PLT", file
);
2471 output_operand_lossage ("invalid UNSPEC as operand");
2477 output_operand_lossage ("invalid expression as operand");
2482 put_condition_code (code
, mode
, reverse
, fp
, file
)
2484 enum machine_mode mode
;
2491 code
= reverse_condition (code
);
2502 if (mode
== CCNOmode
)
2507 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
2508 Those same assemblers have the same but opposite losage on cmov. */
2509 suffix
= fp
? "nbe" : "a";
2512 if (mode
== CCNOmode
)
2521 if (mode
== CCNOmode
)
2528 suffix
= fp
? "nb" : "ae";
2531 if (mode
== CCNOmode
)
2541 fputs (suffix
, file
);
2545 print_reg (x
, code
, file
)
2550 if (REGNO (x
) == ARG_POINTER_REGNUM
2551 || REGNO (x
) == FLAGS_REG
2552 || REGNO (x
) == FPSR_REG
)
2555 if (ASSEMBLER_DIALECT
== 0 || USER_LABEL_PREFIX
[0] == 0)
2560 else if (code
== 'b')
2562 else if (code
== 'k')
2564 else if (code
== 'y')
2566 else if (code
== 'h')
2569 code
= GET_MODE_SIZE (GET_MODE (x
));
2574 if (STACK_TOP_P (x
))
2576 fputs ("st(0)", file
);
2587 fputs (hi_reg_name
[REGNO (x
)], file
);
2590 fputs (qi_reg_name
[REGNO (x
)], file
);
2593 fputs (qi_high_reg_name
[REGNO (x
)], file
);
2601 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
2602 C -- print opcode suffix for set/cmov insn.
2603 c -- like C, but print reversed condition
2604 R -- print the prefix for register names.
2605 z -- print the opcode suffix for the size of the current operand.
2606 * -- print a star (in certain assembler syntax)
2607 w -- print the operand as if it's a "word" (HImode) even if it isn't.
2608 s -- print a shift double count, followed by the assemblers argument
2610 b -- print the QImode name of the register for the indicated operand.
2611 %b0 would print %al if operands[0] is reg 0.
2612 w -- likewise, print the HImode name of the register.
2613 k -- likewise, print the SImode name of the register.
2614 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
2615 y -- print "st(0)" instead of "st" as a register. */
2618 print_operand (file
, x
, code
)
2628 if (ASSEMBLER_DIALECT
== 0)
2633 if (ASSEMBLER_DIALECT
== 0)
2638 if (ASSEMBLER_DIALECT
== 0)
2643 if (ASSEMBLER_DIALECT
== 0)
2648 if (ASSEMBLER_DIALECT
== 0)
2653 if (ASSEMBLER_DIALECT
== 0)
2658 if (ASSEMBLER_DIALECT
== 0)
2663 /* 387 opcodes don't get size suffixes if the operands are
2666 if (STACK_REG_P (x
))
2669 /* Intel syntax has no truck with instruction suffixes. */
2670 if (ASSEMBLER_DIALECT
!= 0)
2673 /* this is the size of op from size of operand */
2674 switch (GET_MODE_SIZE (GET_MODE (x
)))
2685 if (GET_MODE (x
) == SFmode
)
2699 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
2701 #ifdef GAS_MNEMONICS
2723 if (GET_CODE (x
) == CONST_INT
|| ! SHIFT_DOUBLE_OMITS_COUNT
)
2725 PRINT_OPERAND (file
, x
, 0);
2731 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 0, 0, file
);
2734 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 0, 1, file
);
2737 /* Like above, but reverse condition */
2739 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 1, 0, file
);
2742 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 1, 1, file
);
2748 sprintf (str
, "invalid operand code `%c'", code
);
2749 output_operand_lossage (str
);
2754 if (GET_CODE (x
) == REG
)
2756 PRINT_REG (x
, code
, file
);
2759 else if (GET_CODE (x
) == MEM
)
2761 /* No `byte ptr' prefix for call instructions. */
2762 if (ASSEMBLER_DIALECT
!= 0 && code
!= 'X' && code
!= 'P')
2765 switch (GET_MODE_SIZE (GET_MODE (x
)))
2767 case 1: size
= "BYTE"; break;
2768 case 2: size
= "WORD"; break;
2769 case 4: size
= "DWORD"; break;
2770 case 8: size
= "QWORD"; break;
2771 case 12: size
= "XWORD"; break;
2776 fputs (" PTR ", file
);
2780 if (flag_pic
&& CONSTANT_ADDRESS_P (x
))
2781 output_pic_addr_const (file
, x
, code
);
2786 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == SFmode
)
2791 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
2792 REAL_VALUE_TO_TARGET_SINGLE (r
, l
);
2794 if (ASSEMBLER_DIALECT
== 0)
2796 fprintf (file
, "0x%lx", l
);
2799 /* These float cases don't actually occur as immediate operands. */
2800 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == DFmode
)
2805 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
2806 REAL_VALUE_TO_DECIMAL (r
, "%.22e", dstr
);
2807 fprintf (file
, "%s", dstr
);
2810 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == XFmode
)
2815 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
2816 REAL_VALUE_TO_DECIMAL (r
, "%.22e", dstr
);
2817 fprintf (file
, "%s", dstr
);
2823 if (GET_CODE (x
) == CONST_INT
|| GET_CODE (x
) == CONST_DOUBLE
)
2825 if (ASSEMBLER_DIALECT
== 0)
2828 else if (GET_CODE (x
) == CONST
|| GET_CODE (x
) == SYMBOL_REF
2829 || GET_CODE (x
) == LABEL_REF
)
2831 if (ASSEMBLER_DIALECT
== 0)
2834 fputs ("OFFSET FLAT:", file
);
2837 if (GET_CODE (x
) == CONST_INT
)
2838 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
2840 output_pic_addr_const (file
, x
, code
);
2842 output_addr_const (file
, x
);
2846 /* Print a memory operand whose address is ADDR. */
2849 print_operand_address (file
, addr
)
2853 struct ix86_address parts
;
2854 rtx base
, index
, disp
;
2857 if (! ix86_decompose_address (addr
, &parts
))
2861 index
= parts
.index
;
2863 scale
= parts
.scale
;
2865 if (!base
&& !index
)
2867 /* Displacement only requires special attention. */
2869 if (GET_CODE (disp
) == CONST_INT
)
2871 if (ASSEMBLER_DIALECT
!= 0)
2872 fputs ("ds:", file
);
2873 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (addr
));
2876 output_pic_addr_const (file
, addr
, 0);
2878 output_addr_const (file
, addr
);
2882 if (ASSEMBLER_DIALECT
== 0)
2887 output_pic_addr_const (file
, disp
, 0);
2888 else if (GET_CODE (disp
) == LABEL_REF
)
2889 output_asm_label (disp
);
2891 output_addr_const (file
, disp
);
2896 PRINT_REG (base
, 0, file
);
2900 PRINT_REG (index
, 0, file
);
2902 fprintf (file
, ",%d", scale
);
2908 rtx offset
= NULL_RTX
;
2912 /* Pull out the offset of a symbol; print any symbol itself. */
2913 if (GET_CODE (disp
) == CONST
2914 && GET_CODE (XEXP (disp
, 0)) == PLUS
2915 && GET_CODE (XEXP (XEXP (disp
, 0), 1)) == CONST_INT
)
2917 offset
= XEXP (XEXP (disp
, 0), 1);
2918 disp
= gen_rtx_CONST (VOIDmode
,
2919 XEXP (XEXP (disp
, 0), 0));
2923 output_pic_addr_const (file
, disp
, 0);
2924 else if (GET_CODE (disp
) == LABEL_REF
)
2925 output_asm_label (disp
);
2926 else if (GET_CODE (disp
) == CONST_INT
)
2929 output_addr_const (file
, disp
);
2935 PRINT_REG (base
, 0, file
);
2938 if (INTVAL (offset
) >= 0)
2940 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
2944 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
2951 PRINT_REG (index
, 0, file
);
2953 fprintf (file
, "*%d", scale
);
2960 /* Split one or more DImode RTL references into pairs of SImode
2961 references. The RTL can be REG, offsettable MEM, integer constant, or
2962 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
2963 split and "num" is its length. lo_half and hi_half are output arrays
2964 that parallel "operands". */
2967 split_di (operands
, num
, lo_half
, hi_half
)
2970 rtx lo_half
[], hi_half
[];
2974 rtx op
= operands
[num
];
2975 if (CONSTANT_P (op
))
2976 split_double (op
, &lo_half
[num
], &hi_half
[num
]);
2977 else if (! reload_completed
)
2979 lo_half
[num
] = gen_lowpart (SImode
, op
);
2980 hi_half
[num
] = gen_highpart (SImode
, op
);
2982 else if (GET_CODE (op
) == REG
)
2984 lo_half
[num
] = gen_rtx_REG (SImode
, REGNO (op
));
2985 hi_half
[num
] = gen_rtx_REG (SImode
, REGNO (op
) + 1);
2987 else if (offsettable_memref_p (op
))
2989 rtx lo_addr
= XEXP (op
, 0);
2990 rtx hi_addr
= XEXP (adj_offsettable_operand (op
, 4), 0);
2991 lo_half
[num
] = change_address (op
, SImode
, lo_addr
);
2992 hi_half
[num
] = change_address (op
, SImode
, hi_addr
);
2999 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
3000 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
3001 is the expression of the binary operation. The output may either be
3002 emitted here, or returned to the caller, like all output_* functions.
3004 There is no guarantee that the operands are the same mode, as they
3005 might be within FLOAT or FLOAT_EXTEND expressions. */
3008 output_387_binary_op (insn
, operands
)
3012 static char buf
[100];
3016 switch (GET_CODE (operands
[3]))
3019 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
3020 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
3027 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
3028 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
3035 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
3036 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
3043 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
3044 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
3056 switch (GET_CODE (operands
[3]))
3060 if (REG_P (operands
[2]) && REGNO (operands
[0]) == REGNO (operands
[2]))
3063 operands
[2] = operands
[1];
3067 if (GET_CODE (operands
[2]) == MEM
)
3073 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
3075 if (STACK_TOP_P (operands
[0]))
3076 p
= "p\t{%0,%2|%2, %0}";
3078 p
= "p\t{%2,%0|%0, %2}";
3082 if (STACK_TOP_P (operands
[0]))
3083 p
= "\t{%y2,%0|%0, %y2}";
3085 p
= "\t{%2,%0|%0, %2}";
3090 if (GET_CODE (operands
[1]) == MEM
)
3096 if (GET_CODE (operands
[2]) == MEM
)
3102 if (! STACK_REG_P (operands
[1]) || ! STACK_REG_P (operands
[2]))
3105 /* Note that the Unixware assembler, and the AT&T assembler before
3106 that, are confusingly not reversed from Intel syntax in this
3108 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
3110 if (STACK_TOP_P (operands
[0]))
3117 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
3119 if (STACK_TOP_P (operands
[0]))
3126 if (STACK_TOP_P (operands
[0]))
3128 if (STACK_TOP_P (operands
[1]))
3134 else if (STACK_TOP_P (operands
[1]))
3148 /* Output code for INSN to convert a float to a signed int. OPERANDS
3149 are the insn operands. The output may be [SD]Imode and the input
3150 operand may be [SDX]Fmode. */
3153 output_fix_trunc (insn
, operands
)
3157 int stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
3158 int dimode_p
= GET_MODE (operands
[0]) == DImode
;
3161 /* Jump through a hoop or two for DImode, since the hardware has no
3162 non-popping instruction. We used to do this a different way, but
3163 that was somewhat fragile and broke with post-reload splitters. */
3164 if (dimode_p
&& !stack_top_dies
)
3165 output_asm_insn ("fld\t%y1", operands
);
3167 if (! STACK_TOP_P (operands
[1]))
3170 xops
[0] = GEN_INT (12);
3171 xops
[1] = adj_offsettable_operand (operands
[2], 1);
3172 xops
[1] = change_address (xops
[1], QImode
, NULL_RTX
);
3174 xops
[2] = operands
[0];
3175 if (GET_CODE (operands
[0]) != MEM
)
3176 xops
[2] = operands
[3];
3178 output_asm_insn ("fnstcw\t%2", operands
);
3179 output_asm_insn ("mov{l}\t{%2, %4|%4, %2}", operands
);
3180 output_asm_insn ("mov{b}\t{%0, %1|%1, %0}", xops
);
3181 output_asm_insn ("fldcw\t%2", operands
);
3182 output_asm_insn ("mov{l}\t{%4, %2|%2, %4}", operands
);
3184 if (stack_top_dies
|| dimode_p
)
3185 output_asm_insn ("fistp%z2\t%2", xops
);
3187 output_asm_insn ("fist%z2\t%2", xops
);
3189 output_asm_insn ("fldcw\t%2", operands
);
3191 if (GET_CODE (operands
[0]) != MEM
)
3195 split_di (operands
+0, 1, xops
+0, xops
+1);
3196 split_di (operands
+3, 1, xops
+2, xops
+3);
3197 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops
);
3198 output_asm_insn ("mov{l}\t{%3, %1|%1, %3}", xops
);
3201 output_asm_insn ("mov{l}\t{%3,%0|%0, %3}", operands
);
3207 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
3208 should be used and 2 when fnstsw should be used. UNORDERED_P is true
3209 when fucom should be used. */
3212 output_fp_compare (insn
, operands
, eflags_p
, unordered_p
)
3215 int eflags_p
, unordered_p
;
3218 rtx cmp_op0
= operands
[0];
3219 rtx cmp_op1
= operands
[1];
3224 cmp_op1
= operands
[2];
3227 if (! STACK_TOP_P (cmp_op0
))
3230 stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
3232 if (STACK_REG_P (cmp_op1
)
3234 && find_regno_note (insn
, REG_DEAD
, REGNO (cmp_op1
))
3235 && REGNO (cmp_op1
) != FIRST_STACK_REG
)
3237 /* If both the top of the 387 stack dies, and the other operand
3238 is also a stack register that dies, then this must be a
3239 `fcompp' float compare */
3243 /* There is no double popping fcomi variant. Fortunately,
3244 eflags is immune from the fstp's cc clobbering. */
3246 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands
);
3248 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands
);
3256 return "fucompp\n\tfnstsw\t%0";
3258 return "fcompp\n\tfnstsw\t%0";
3271 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
3273 static char * const alt
[24] =
3285 "fcomi\t{%y1, %0|%0, %y1}",
3286 "fcomip\t{%y1, %0|%0, %y1}",
3287 "fucomi\t{%y1, %0|%0, %y1}",
3288 "fucomip\t{%y1, %0|%0, %y1}",
3295 "fcom%z2\t%y2\n\tfnstsw\t%0",
3296 "fcomp%z2\t%y2\n\tfnstsw\t%0",
3297 "fucom%z2\t%y2\n\tfnstsw\t%0",
3298 "fucomp%z2\t%y2\n\tfnstsw\t%0",
3300 "ficom%z2\t%y2\n\tfnstsw\t%0",
3301 "ficomp%z2\t%y2\n\tfnstsw\t%0",
3309 mask
= eflags_p
<< 3;
3310 mask
|= (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
) << 2;
3311 mask
|= unordered_p
<< 1;
3312 mask
|= stack_top_dies
;
3324 /* Output assembler code to FILE to initialize basic-block profiling.
3326 If profile_block_flag == 2
3328 Output code to call the subroutine `__bb_init_trace_func'
3329 and pass two parameters to it. The first parameter is
3330 the address of a block allocated in the object module.
3331 The second parameter is the number of the first basic block
3334 The name of the block is a local symbol made with this statement:
3336 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 0);
3338 Of course, since you are writing the definition of
3339 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
3340 can take a short cut in the definition of this macro and use the
3341 name that you know will result.
3343 The number of the first basic block of the function is
3344 passed to the macro in BLOCK_OR_LABEL.
3346 If described in a virtual assembler language the code to be
3350 parameter2 <- BLOCK_OR_LABEL
3351 call __bb_init_trace_func
3353 else if profile_block_flag != 0
3355 Output code to call the subroutine `__bb_init_func'
3356 and pass one single parameter to it, which is the same
3357 as the first parameter to `__bb_init_trace_func'.
3359 The first word of this parameter is a flag which will be nonzero if
3360 the object module has already been initialized. So test this word
3361 first, and do not call `__bb_init_func' if the flag is nonzero.
3362 Note: When profile_block_flag == 2 the test need not be done
3363 but `__bb_init_trace_func' *must* be called.
3365 BLOCK_OR_LABEL may be used to generate a label number as a
3366 branch destination in case `__bb_init_func' will not be called.
3368 If described in a virtual assembler language the code to be
3379 ix86_output_function_block_profiler (file
, block_or_label
)
3383 static int num_func
= 0;
3385 char block_table
[80], false_label
[80];
3387 ASM_GENERATE_INTERNAL_LABEL (block_table
, "LPBX", 0);
3389 xops
[1] = gen_rtx_SYMBOL_REF (VOIDmode
, block_table
);
3390 xops
[5] = stack_pointer_rtx
;
3391 xops
[7] = gen_rtx_REG (Pmode
, 0); /* eax */
3393 CONSTANT_POOL_ADDRESS_P (xops
[1]) = TRUE
;
3395 switch (profile_block_flag
)
3398 xops
[2] = GEN_INT (block_or_label
);
3399 xops
[3] = gen_rtx_MEM (Pmode
,
3400 gen_rtx_SYMBOL_REF (VOIDmode
, "__bb_init_trace_func"));
3401 xops
[6] = GEN_INT (8);
3403 output_asm_insn ("push{l}\t%2", xops
);
3405 output_asm_insn ("push{l}\t%1", xops
);
3408 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a1}", xops
);
3409 output_asm_insn ("push{l}\t%7", xops
);
3411 output_asm_insn ("call\t%P3", xops
);
3412 output_asm_insn ("add{l}\t{%6, %5|%5, %6}", xops
);
3416 ASM_GENERATE_INTERNAL_LABEL (false_label
, "LPBZ", num_func
);
3418 xops
[0] = const0_rtx
;
3419 xops
[2] = gen_rtx_MEM (Pmode
,
3420 gen_rtx_SYMBOL_REF (VOIDmode
, false_label
));
3421 xops
[3] = gen_rtx_MEM (Pmode
,
3422 gen_rtx_SYMBOL_REF (VOIDmode
, "__bb_init_func"));
3423 xops
[4] = gen_rtx_MEM (Pmode
, xops
[1]);
3424 xops
[6] = GEN_INT (4);
3426 CONSTANT_POOL_ADDRESS_P (xops
[2]) = TRUE
;
3428 output_asm_insn ("cmp{l}\t{%0, %4|%4, %0}", xops
);
3429 output_asm_insn ("jne\t%2", xops
);
3432 output_asm_insn ("push{l}\t%1", xops
);
3435 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a2}", xops
);
3436 output_asm_insn ("push{l}\t%7", xops
);
3438 output_asm_insn ("call\t%P3", xops
);
3439 output_asm_insn ("add{l}\t{%6, %5|%5, %6}", xops
);
3440 ASM_OUTPUT_INTERNAL_LABEL (file
, "LPBZ", num_func
);
3446 /* Output assembler code to FILE to increment a counter associated
3447 with basic block number BLOCKNO.
3449 If profile_block_flag == 2
3451 Output code to initialize the global structure `__bb' and
3452 call the function `__bb_trace_func' which will increment the
3455 `__bb' consists of two words. In the first word the number
3456 of the basic block has to be stored. In the second word
3457 the address of a block allocated in the object module
3460 The basic block number is given by BLOCKNO.
3462 The address of the block is given by the label created with
3464 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 0);
3466 by FUNCTION_BLOCK_PROFILER.
3468 Of course, since you are writing the definition of
3469 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
3470 can take a short cut in the definition of this macro and use the
3471 name that you know will result.
3473 If described in a virtual assembler language the code to be
3476 move BLOCKNO -> (__bb)
3477 move LPBX0 -> (__bb+4)
3478 call __bb_trace_func
3480 Note that function `__bb_trace_func' must not change the
3481 machine state, especially the flag register. To grant
3482 this, you must output code to save and restore registers
3483 either in this macro or in the macros MACHINE_STATE_SAVE
3484 and MACHINE_STATE_RESTORE. The last two macros will be
3485 used in the function `__bb_trace_func', so you must make
3486 sure that the function prologue does not change any
3487 register prior to saving it with MACHINE_STATE_SAVE.
3489 else if profile_block_flag != 0
3491 Output code to increment the counter directly.
3492 Basic blocks are numbered separately from zero within each
3493 compiled object module. The count associated with block number
3494 BLOCKNO is at index BLOCKNO in an array of words; the name of
3495 this array is a local symbol made with this statement:
3497 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 2);
3499 Of course, since you are writing the definition of
3500 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
3501 can take a short cut in the definition of this macro and use the
3502 name that you know will result.
3504 If described in a virtual assembler language the code to be
3507 inc (LPBX2+4*BLOCKNO)
3511 ix86_output_block_profiler (file
, blockno
)
3512 FILE *file ATTRIBUTE_UNUSED
;
3515 rtx xops
[8], cnt_rtx
;
3517 char *block_table
= counts
;
3519 switch (profile_block_flag
)
3522 ASM_GENERATE_INTERNAL_LABEL (block_table
, "LPBX", 0);
3524 xops
[1] = gen_rtx_SYMBOL_REF (VOIDmode
, block_table
);
3525 xops
[2] = GEN_INT (blockno
);
3526 xops
[3] = gen_rtx_MEM (Pmode
,
3527 gen_rtx_SYMBOL_REF (VOIDmode
, "__bb_trace_func"));
3528 xops
[4] = gen_rtx_SYMBOL_REF (VOIDmode
, "__bb");
3529 xops
[5] = plus_constant (xops
[4], 4);
3530 xops
[0] = gen_rtx_MEM (SImode
, xops
[4]);
3531 xops
[6] = gen_rtx_MEM (SImode
, xops
[5]);
3533 CONSTANT_POOL_ADDRESS_P (xops
[1]) = TRUE
;
3535 output_asm_insn ("pushf", xops
);
3536 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops
);
3539 xops
[7] = gen_rtx_REG (Pmode
, 0); /* eax */
3540 output_asm_insn ("push{l}\t%7", xops
);
3541 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a1}", xops
);
3542 output_asm_insn ("mov{l}\t{%7, %6|%6, %7}", xops
);
3543 output_asm_insn ("pop{l}\t%7", xops
);
3546 output_asm_insn ("mov{l}\t{%1, %6|%6, %1}", xops
);
3547 output_asm_insn ("call\t%P3", xops
);
3548 output_asm_insn ("popf", xops
);
3553 ASM_GENERATE_INTERNAL_LABEL (counts
, "LPBX", 2);
3554 cnt_rtx
= gen_rtx_SYMBOL_REF (VOIDmode
, counts
);
3555 SYMBOL_REF_FLAG (cnt_rtx
) = TRUE
;
3558 cnt_rtx
= plus_constant (cnt_rtx
, blockno
*4);
3561 cnt_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, cnt_rtx
);
3563 xops
[0] = gen_rtx_MEM (SImode
, cnt_rtx
);
3564 output_asm_insn ("inc{l}\t%0", xops
);
3571 ix86_expand_move (mode
, operands
)
3572 enum machine_mode mode
;
3575 int strict
= (reload_in_progress
|| reload_completed
);
3576 int want_clobber
= 0;
3579 if (flag_pic
&& mode
== Pmode
&& symbolic_operand (operands
[1], Pmode
))
3581 /* Emit insns to move operands[1] into operands[0]. */
3583 if (GET_CODE (operands
[0]) == MEM
)
3584 operands
[1] = force_reg (Pmode
, operands
[1]);
3587 rtx temp
= operands
[0];
3588 if (GET_CODE (temp
) != REG
)
3589 temp
= gen_reg_rtx (Pmode
);
3590 temp
= legitimize_pic_address (operands
[1], temp
);
3591 if (temp
== operands
[0])
3598 if (GET_CODE (operands
[0]) == MEM
&& GET_CODE (operands
[1]) == MEM
)
3599 operands
[1] = force_reg (mode
, operands
[1]);
3601 if (FLOAT_MODE_P (mode
))
3603 /* If we are loading a floating point constant that isn't 0 or 1
3604 into a register, force the value to memory now, since we'll
3605 get better code out the back end. */
3609 else if (GET_CODE (operands
[0]) == MEM
)
3610 operands
[1] = force_reg (mode
, operands
[1]);
3611 else if (GET_CODE (operands
[1]) == CONST_DOUBLE
3612 && ! standard_80387_constant_p (operands
[1]))
3613 operands
[1] = validize_mem (force_const_mem (mode
, operands
[1]));
3617 /* Try to guess when a cc clobber on the move might be fruitful. */
3619 && GET_CODE (operands
[0]) == REG
3620 && operands
[1] == const0_rtx
3626 insn
= gen_rtx_SET (VOIDmode
, operands
[0], operands
[1]);
3630 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
3631 insn
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, insn
, clob
));
3637 /* Attempt to expand a binary operator. Make the expansion closer to the
3638 actual machine, then just general_operand, which will allow 3 separate
3639 memory references (one output, two input) in a single insn. Return
3640 whether the insn fails, or succeeds. */
3643 ix86_expand_binary_operator (code
, mode
, operands
)
3645 enum machine_mode mode
;
3648 int matching_memory
;
3649 rtx src1
, src2
, dst
, op
, clob
;
3655 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
3656 if (GET_RTX_CLASS (code
) == 'c'
3657 && (rtx_equal_p (dst
, src2
)
3658 || immediate_operand (src1
, mode
)))
3665 /* If the destination is memory, and we do not have matching source
3666 operands, do things in registers. */
3667 matching_memory
= 0;
3668 if (GET_CODE (dst
) == MEM
)
3670 if (rtx_equal_p (dst
, src1
))
3671 matching_memory
= 1;
3672 else if (GET_RTX_CLASS (code
) == 'c'
3673 && rtx_equal_p (dst
, src2
))
3674 matching_memory
= 2;
3676 dst
= gen_reg_rtx (mode
);
3679 /* Both source operands cannot be in memory. */
3680 if (GET_CODE (src1
) == MEM
&& GET_CODE (src2
) == MEM
)
3682 if (matching_memory
!= 2)
3683 src2
= force_reg (mode
, src2
);
3685 src1
= force_reg (mode
, src1
);
3688 /* If the operation is not commutable, source 1 cannot be a constant. */
3689 if (CONSTANT_P (src1
) && GET_RTX_CLASS (code
) != 'c')
3690 src1
= force_reg (mode
, src1
);
3692 /* If optimizing, copy to regs to improve CSE */
3693 if (optimize
&& !reload_in_progress
&& !reload_completed
)
3695 if (GET_CODE (dst
) == MEM
)
3696 dst
= gen_reg_rtx (mode
);
3697 if (GET_CODE (src1
) == MEM
)
3698 src1
= force_reg (mode
, src1
);
3699 if (GET_CODE (src2
) == MEM
)
3700 src2
= force_reg (mode
, src2
);
3703 /* Emit the instruction. */
3705 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_ee (code
, mode
, src1
, src2
));
3706 if (reload_in_progress
)
3708 /* Reload doesn't know about the flags register, and doesn't know that
3709 it doesn't want to clobber it. We can only do this with PLUS. */
3716 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
3717 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
3720 /* Fix up the destination if needed. */
3721 if (dst
!= operands
[0])
3722 emit_move_insn (operands
[0], dst
);
3725 /* Return TRUE or FALSE depending on whether the binary operator meets the
3726 appropriate constraints. */
3729 ix86_binary_operator_ok (code
, mode
, operands
)
3731 enum machine_mode mode ATTRIBUTE_UNUSED
;
3734 /* Both source operands cannot be in memory. */
3735 if (GET_CODE (operands
[1]) == MEM
&& GET_CODE (operands
[2]) == MEM
)
3737 /* If the operation is not commutable, source 1 cannot be a constant. */
3738 if (CONSTANT_P (operands
[1]) && GET_RTX_CLASS (code
) != 'c')
3740 /* If the destination is memory, we must have a matching source operand. */
3741 if (GET_CODE (operands
[0]) == MEM
3742 && ! (rtx_equal_p (operands
[0], operands
[1])
3743 || (GET_RTX_CLASS (code
) == 'c'
3744 && rtx_equal_p (operands
[0], operands
[2]))))
3749 /* Attempt to expand a unary operator. Make the expansion closer to the
3750 actual machine, then just general_operand, which will allow 2 separate
3751 memory references (one output, one input) in a single insn. Return
3752 whether the insn fails, or succeeds. */
3755 ix86_expand_unary_operator (code
, mode
, operands
)
3757 enum machine_mode mode
;
3760 /* If optimizing, copy to regs to improve CSE */
3762 && ((reload_in_progress
| reload_completed
) == 0)
3763 && GET_CODE (operands
[1]) == MEM
)
3764 operands
[1] = force_reg (GET_MODE (operands
[1]), operands
[1]);
3766 if (! ix86_unary_operator_ok (code
, mode
, operands
))
3769 && ((reload_in_progress
| reload_completed
) == 0)
3770 && GET_CODE (operands
[1]) == MEM
)
3772 operands
[1] = force_reg (GET_MODE (operands
[1]), operands
[1]);
3773 if (! ix86_unary_operator_ok (code
, mode
, operands
))
3783 /* Return TRUE or FALSE depending on whether the unary operator meets the
3784 appropriate constraints. */
3787 ix86_unary_operator_ok (code
, mode
, operands
)
3788 enum rtx_code code ATTRIBUTE_UNUSED
;
3789 enum machine_mode mode ATTRIBUTE_UNUSED
;
3790 rtx operands
[2] ATTRIBUTE_UNUSED
;
3795 /* Produce an unsigned comparison for a given signed comparison. */
3797 static enum rtx_code
3798 unsigned_comparison (code
)
3828 /* Generate insn patterns to do an integer compare of OPERANDS. */
3831 ix86_expand_int_compare (code
, op0
, op1
)
3835 enum machine_mode cmpmode
;
3838 cmpmode
= SELECT_CC_MODE (code
, op0
, op1
);
3839 flags
= gen_rtx_REG (cmpmode
, FLAGS_REG
);
3841 /* This is very simple, but making the interface the same as in the
3842 FP case makes the rest of the code easier. */
3843 tmp
= gen_rtx_COMPARE (cmpmode
, op0
, op1
);
3844 emit_insn (gen_rtx_SET (VOIDmode
, flags
, tmp
));
3846 /* Return the test that should be put into the flags user, i.e.
3847 the bcc, scc, or cmov instruction. */
3848 return gen_rtx_fmt_ee (code
, VOIDmode
, flags
, const0_rtx
);
3851 /* Generate insn patterns to do a floating point compare of OPERANDS.
3852 If UNORDERED, allow for unordered compares. */
3855 ix86_expand_fp_compare (code
, op0
, op1
, unordered
)
3860 enum machine_mode fpcmp_mode
;
3861 enum machine_mode intcmp_mode
;
3864 /* When not doing IEEE compliant compares, disable unordered. */
3865 if (! TARGET_IEEE_FP
)
3867 fpcmp_mode
= unordered
? CCFPUmode
: CCFPmode
;
3869 /* ??? If we knew whether invalid-operand exceptions were masked,
3870 we could rely on fcom to raise an exception and take care of
3871 NaNs. But we don't. We could know this from c9x math bits. */
3875 /* All of the unordered compare instructions only work on registers.
3876 The same is true of the XFmode compare instructions. */
3877 if (unordered
|| GET_MODE (op0
) == XFmode
)
3879 op0
= force_reg (GET_MODE (op0
), op0
);
3880 op1
= force_reg (GET_MODE (op1
), op1
);
3884 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
3885 things around if they appear profitable, otherwise force op0
3888 if (standard_80387_constant_p (op0
) == 0
3889 || (GET_CODE (op0
) == MEM
3890 && ! (standard_80387_constant_p (op1
) == 0
3891 || GET_CODE (op1
) == MEM
)))
3894 tmp
= op0
, op0
= op1
, op1
= tmp
;
3895 code
= swap_condition (code
);
3898 if (GET_CODE (op0
) != REG
)
3899 op0
= force_reg (GET_MODE (op0
), op0
);
3901 if (CONSTANT_P (op1
))
3903 if (standard_80387_constant_p (op1
))
3904 op1
= force_reg (GET_MODE (op1
), op1
);
3906 op1
= validize_mem (force_const_mem (GET_MODE (op1
), op1
));
3910 /* %%% fcomi is probably always faster, even when dealing with memory,
3911 since compare-and-branch would be three insns instead of four. */
3912 if (TARGET_CMOVE
&& !unordered
)
3914 if (GET_CODE (op0
) != REG
)
3915 op0
= force_reg (GET_MODE (op0
), op0
);
3916 if (GET_CODE (op1
) != REG
)
3917 op1
= force_reg (GET_MODE (op1
), op1
);
3919 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
3920 tmp
= gen_rtx_SET (VOIDmode
, gen_rtx_REG (fpcmp_mode
, FLAGS_REG
), tmp
);
3923 /* The FP codes work out to act like unsigned. */
3924 code
= unsigned_comparison (code
);
3925 intcmp_mode
= fpcmp_mode
;
3929 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
3932 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
3933 tmp2
= gen_rtx_UNSPEC (HImode
, gen_rtvec (1, tmp
), 9);
3934 tmp
= gen_reg_rtx (HImode
);
3935 emit_insn (gen_rtx_SET (VOIDmode
, tmp
, tmp2
));
3939 /* We have two options here -- use sahf, or testing bits of ah
3940 directly. On PPRO, they are equivalent, sahf being one byte
3941 smaller. On Pentium, sahf is non-pairable while test is UV
3944 if (TARGET_USE_SAHF
|| optimize_size
)
3948 /* The FP codes work out to act like unsigned. */
3949 code
= unsigned_comparison (code
);
3950 emit_insn (gen_x86_sahf_1 (tmp
));
3951 intcmp_mode
= CCmode
;
3956 * The numbers below correspond to the bits of the FPSW in AH.
3957 * C3, C2, and C0 are in bits 0x40, 0x40, and 0x01 respectively.
3979 /* We'd have to use `xorb 1,ah; andb 0x41,ah', so it's
3980 faster in all cases to just fall back on sahf. */
3998 emit_insn (gen_testqi_ext_0 (tmp
, GEN_INT (mask
)));
3999 intcmp_mode
= CCNOmode
;
4004 /* In the unordered case, we have to check C2 for NaN's, which
4005 doesn't happen to work out to anything nice combination-wise.
4006 So do some bit twiddling on the value we've got in AH to come
4007 up with an appropriate set of condition codes. */
4009 intcmp_mode
= CCNOmode
;
4013 emit_insn (gen_testqi_ext_0 (tmp
, GEN_INT (0x45)));
4017 emit_insn (gen_andqi_ext_0 (tmp
, tmp
, GEN_INT (0x45)));
4018 emit_insn (gen_cmpqi_ext_3 (tmp
, GEN_INT (0x01)));
4019 intcmp_mode
= CCmode
;
4023 emit_insn (gen_testqi_ext_0 (tmp
, GEN_INT (0x05)));
4027 emit_insn (gen_andqi_ext_0 (tmp
, tmp
, GEN_INT (0x45)));
4028 emit_insn (gen_addqi_ext_1 (tmp
, tmp
, constm1_rtx
));
4029 emit_insn (gen_cmpqi_ext_3 (tmp
, GEN_INT (0x40)));
4030 intcmp_mode
= CCmode
;
4034 emit_insn (gen_andqi_ext_0 (tmp
, tmp
, GEN_INT (0x45)));
4035 emit_insn (gen_cmpqi_ext_3 (tmp
, GEN_INT (0x40)));
4036 intcmp_mode
= CCmode
;
4040 emit_insn (gen_andqi_ext_0 (tmp
, tmp
, GEN_INT (0x45)));
4041 emit_insn (gen_xorqi_cc_ext_1 (tmp
, tmp
, GEN_INT (0x40)));
4050 /* Return the test that should be put into the flags user, i.e.
4051 the bcc, scc, or cmov instruction. */
4052 return gen_rtx_fmt_ee (code
, VOIDmode
,
4053 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
4058 ix86_expand_compare (code
, unordered
)
4063 op0
= ix86_compare_op0
;
4064 op1
= ix86_compare_op1
;
4066 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_FLOAT
)
4067 ret
= ix86_expand_fp_compare (code
, op0
, op1
, unordered
);
4069 ret
= ix86_expand_int_compare (code
, op0
, op1
);
4075 ix86_expand_branch (code
, unordered
, label
)
4080 rtx tmp
, lo
[2], hi
[2], label2
;
4081 enum rtx_code code1
, code2
, code3
;
4083 if (GET_MODE (ix86_compare_op0
) != DImode
)
4085 tmp
= ix86_expand_compare (code
, unordered
);
4086 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
4087 gen_rtx_LABEL_REF (VOIDmode
, label
),
4089 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
4093 /* Expand DImode branch into multiple compare+branch. */
4095 if (CONSTANT_P (ix86_compare_op0
) && ! CONSTANT_P (ix86_compare_op1
))
4097 tmp
= ix86_compare_op0
;
4098 ix86_compare_op0
= ix86_compare_op1
;
4099 ix86_compare_op1
= tmp
;
4100 code
= swap_condition (code
);
4102 split_di (&ix86_compare_op0
, 1, lo
+0, hi
+0);
4103 split_di (&ix86_compare_op1
, 1, lo
+1, hi
+1);
4105 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to avoid
4106 two branches. This costs one extra insn, so disable when optimizing
4109 if ((code
== EQ
|| code
== NE
)
4111 || hi
[1] == const0_rtx
|| lo
[1] == const0_rtx
))
4116 if (hi
[1] != const0_rtx
)
4118 xor1
= expand_binop (SImode
, xor_optab
, xor1
, hi
[1],
4119 NULL_RTX
, 0, OPTAB_WIDEN
);
4123 if (lo
[1] != const0_rtx
)
4125 xor0
= expand_binop (SImode
, xor_optab
, xor0
, lo
[1],
4126 NULL_RTX
, 0, OPTAB_WIDEN
);
4129 tmp
= expand_binop (SImode
, ior_optab
, xor1
, xor0
,
4130 NULL_RTX
, 0, OPTAB_WIDEN
);
4132 ix86_compare_op0
= tmp
;
4133 ix86_compare_op1
= const0_rtx
;
4134 ix86_expand_branch (code
, unordered
, label
);
4138 /* Otherwise, if we are doing less-than, op1 is a constant and the
4139 low word is zero, then we can just examine the high word. */
4141 if (GET_CODE (hi
[1]) == CONST_INT
&& lo
[1] == const0_rtx
4142 && (code
== LT
|| code
== LTU
))
4144 ix86_compare_op0
= hi
[0];
4145 ix86_compare_op1
= hi
[1];
4146 ix86_expand_branch (code
, unordered
, label
);
4150 /* Otherwise, we need two or three jumps. */
4152 label2
= gen_label_rtx ();
4155 code2
= swap_condition (code
);
4156 code3
= unsigned_condition (code
);
4160 case LT
: case GT
: case LTU
: case GTU
:
4163 case LE
: code1
= LT
; code2
= GT
; break;
4164 case GE
: code1
= GT
; code2
= LT
; break;
4165 case LEU
: code1
= LTU
; code2
= GTU
; break;
4166 case GEU
: code1
= GTU
; code2
= LTU
; break;
4168 case EQ
: code1
= NIL
; code2
= NE
; break;
4169 case NE
: code2
= NIL
; break;
4177 * if (hi(a) < hi(b)) goto true;
4178 * if (hi(a) > hi(b)) goto false;
4179 * if (lo(a) < lo(b)) goto true;
4183 ix86_compare_op0
= hi
[0];
4184 ix86_compare_op1
= hi
[1];
4187 ix86_expand_branch (code1
, unordered
, label
);
4189 ix86_expand_branch (code2
, unordered
, label2
);
4191 ix86_compare_op0
= lo
[0];
4192 ix86_compare_op1
= lo
[1];
4193 ix86_expand_branch (code3
, unordered
, label
);
4196 emit_label (label2
);
4200 ix86_expand_setcc (code
, unordered
, dest
)
4208 if (GET_MODE (ix86_compare_op0
) == DImode
)
4209 return 0; /* FAIL */
4211 /* Three modes of generation:
4212 0 -- destination does not overlap compare sources:
4213 clear dest first, emit strict_low_part setcc.
4214 1 -- destination does overlap compare sources:
4215 emit subreg setcc, zero extend.
4216 2 -- destination is in QImode:
4221 /* %%% reload problems with in-out. Revisit. */
4224 if (GET_MODE (dest
) == QImode
)
4226 else if (reg_overlap_mentioned_p (dest
, ix86_compare_op0
)
4227 || reg_overlap_mentioned_p (dest
, ix86_compare_op0
))
4231 emit_move_insn (dest
, const0_rtx
);
4233 ret
= ix86_expand_compare (code
, unordered
);
4234 PUT_MODE (ret
, QImode
);
4239 tmp
= gen_lowpart (QImode
, dest
);
4240 tmp
= gen_rtx_STRICT_LOW_PART (VOIDmode
, tmp
);
4244 if (!cse_not_expected
)
4245 tmp
= gen_reg_rtx (QImode
);
4247 tmp
= gen_lowpart (QImode
, dest
);
4250 emit_insn (gen_rtx_SET (VOIDmode
, tmp
, ret
));
4256 tmp
= gen_rtx_ZERO_EXTEND (GET_MODE (dest
), tmp
);
4257 tmp
= gen_rtx_SET (VOIDmode
, dest
, tmp
);
4258 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
4259 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, clob
));
4263 return 1; /* DONE */
4267 ix86_expand_int_movcc (operands
)
4270 enum rtx_code code
= GET_CODE (operands
[1]), compare_code
;
4271 rtx compare_seq
, compare_op
;
4273 /* When the compare code is not LTU or GEU, we can not use sbbl case.
4274 In case comparsion is done with immediate, we can convert it to LTU or
4275 GEU by altering the integer. */
4277 if ((code
== LEU
|| code
== GTU
)
4278 && GET_CODE (ix86_compare_op1
) == CONST_INT
4279 && GET_MODE (operands
[0]) != HImode
4280 && (unsigned int)INTVAL (ix86_compare_op1
) != 0xffffffff
4281 && GET_CODE (operands
[2]) == CONST_INT
4282 && GET_CODE (operands
[3]) == CONST_INT
)
4288 ix86_compare_op1
= GEN_INT (INTVAL (ix86_compare_op1
) + 1);
4291 compare_op
= ix86_expand_compare (code
, code
== EQ
|| code
== NE
);
4292 compare_seq
= gen_sequence ();
4295 compare_code
= GET_CODE (compare_op
);
4297 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
4298 HImode insns, we'd be swallowed in word prefix ops. */
4300 if (GET_MODE (operands
[0]) != HImode
4301 && GET_CODE (operands
[2]) == CONST_INT
4302 && GET_CODE (operands
[3]) == CONST_INT
)
4304 rtx out
= operands
[0];
4305 HOST_WIDE_INT ct
= INTVAL (operands
[2]);
4306 HOST_WIDE_INT cf
= INTVAL (operands
[3]);
4309 if (compare_code
== LTU
|| compare_code
== GEU
)
4312 /* Detect overlap between destination and compare sources. */
4315 /* To simplify rest of code, restrict to the GEU case. */
4316 if (compare_code
== LTU
)
4321 compare_code
= reverse_condition (compare_code
);
4322 code
= reverse_condition (code
);
4326 if (reg_overlap_mentioned_p (out
, ix86_compare_op0
)
4327 || reg_overlap_mentioned_p (out
, ix86_compare_op0
))
4328 tmp
= gen_reg_rtx (SImode
);
4330 emit_insn (compare_seq
);
4331 emit_insn (gen_x86_movsicc_0_m1 (tmp
));
4343 emit_insn (gen_addsi3 (out
, out
, GEN_INT (ct
)));
4354 emit_insn (gen_iorsi3 (out
, out
, GEN_INT (ct
)));
4356 else if (diff
== -1 && ct
)
4366 emit_insn (gen_one_cmplsi2 (tmp
, tmp
));
4368 emit_insn (gen_addsi3 (out
, out
, GEN_INT (cf
)));
4375 * andl cf - ct, dest
4380 emit_insn (gen_andsi3 (out
, out
, GEN_INT (cf
- ct
)));
4382 emit_insn (gen_addsi3 (out
, out
, GEN_INT (ct
)));
4386 emit_move_insn (out
, tmp
);
4388 return 1; /* DONE */
4395 tmp
= ct
, ct
= cf
, cf
= tmp
;
4397 compare_code
= reverse_condition (compare_code
);
4398 code
= reverse_condition (code
);
4400 if (diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
4401 || diff
== 3 || diff
== 5 || diff
== 9)
4407 * lea cf(dest*(ct-cf)),dest
4411 * This also catches the degenerate setcc-only case.
4417 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
4418 ix86_compare_op1
, VOIDmode
, 0, 1);
4425 tmp
= gen_rtx_MULT (SImode
, out
, GEN_INT (diff
& ~1));
4429 tmp
= gen_rtx_PLUS (SImode
, tmp
, out
);
4435 tmp
= gen_rtx_PLUS (SImode
, tmp
, GEN_INT (cf
));
4441 emit_move_insn (out
, tmp
);
4446 clob
= gen_rtx_REG (CCmode
, FLAGS_REG
);
4447 clob
= gen_rtx_CLOBBER (VOIDmode
, clob
);
4449 tmp
= gen_rtx_SET (VOIDmode
, out
, tmp
);
4450 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, clob
));
4454 emit_insn (gen_rtx_SET (VOIDmode
, out
, tmp
));
4456 if (out
!= operands
[0])
4457 emit_move_insn (operands
[0], out
);
4459 return 1; /* DONE */
4463 * General case: Jumpful:
4464 * xorl dest,dest cmpl op1, op2
4465 * cmpl op1, op2 movl ct, dest
4467 * decl dest movl cf, dest
4468 * andl (cf-ct),dest 1:
4473 * This is reasonably steep, but branch mispredict costs are
4474 * high on modern cpus, so consider failing only if optimizing
4477 * %%% Parameterize branch_cost on the tuning architecture, then
4478 * use that. The 80386 couldn't care less about mispredicts.
4481 if (!optimize_size
&& !TARGET_CMOVE
)
4487 compare_code
= reverse_condition (compare_code
);
4488 code
= reverse_condition (code
);
4491 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
4492 ix86_compare_op1
, VOIDmode
, 0, 1);
4494 emit_insn (gen_addsi3 (out
, out
, constm1_rtx
));
4495 emit_insn (gen_andsi3 (out
, out
, GEN_INT (cf
-ct
)));
4497 emit_insn (gen_addsi3 (out
, out
, GEN_INT (ct
)));
4498 if (out
!= operands
[0])
4499 emit_move_insn (operands
[0], out
);
4501 return 1; /* DONE */
4507 /* Try a few things more with specific constants and a variable. */
4510 rtx var
, orig_out
, out
, tmp
;
4513 return 0; /* FAIL */
4515 /* If one of the two operands is an interesting constant, load a
4516 constant with the above and mask it in with a logical operation. */
4518 if (GET_CODE (operands
[2]) == CONST_INT
)
4521 if (INTVAL (operands
[2]) == 0)
4522 operands
[3] = constm1_rtx
, op
= and_optab
;
4523 else if (INTVAL (operands
[2]) == -1)
4524 operands
[3] = const0_rtx
, op
= ior_optab
;
4526 else if (GET_CODE (operands
[3]) == CONST_INT
)
4529 if (INTVAL (operands
[3]) == 0)
4530 operands
[2] = constm1_rtx
, op
= and_optab
;
4531 else if (INTVAL (operands
[3]) == -1)
4532 operands
[2] = const0_rtx
, op
= ior_optab
;
4536 return 0; /* FAIL */
4538 orig_out
= operands
[0];
4539 tmp
= gen_reg_rtx (GET_MODE (orig_out
));
4542 /* Recurse to get the constant loaded. */
4543 if (ix86_expand_int_movcc (operands
) == 0)
4544 return 0; /* FAIL */
4546 /* Mask in the interesting variable. */
4547 out
= expand_binop (GET_MODE (orig_out
), op
, var
, tmp
, orig_out
, 0,
4549 if (out
!= orig_out
)
4550 emit_move_insn (orig_out
, out
);
4552 return 1; /* DONE */
4556 * For comparison with above,
4566 if (! nonimmediate_operand (operands
[2], GET_MODE (operands
[0])))
4567 operands
[2] = force_reg (GET_MODE (operands
[0]), operands
[2]);
4568 if (! nonimmediate_operand (operands
[3], GET_MODE (operands
[0])))
4569 operands
[3] = force_reg (GET_MODE (operands
[0]), operands
[3]);
4571 emit_insn (compare_seq
);
4572 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
4573 gen_rtx_IF_THEN_ELSE (GET_MODE (operands
[0]),
4574 compare_op
, operands
[2],
4577 return 1; /* DONE */
4581 ix86_expand_fp_movcc (operands
)
4585 enum machine_mode mode
;
4588 /* The floating point conditional move instructions don't directly
4589 support conditions resulting from a signed integer comparison. */
4591 code
= GET_CODE (operands
[1]);
4598 tmp
= gen_reg_rtx (QImode
);
4599 ix86_expand_setcc (code
, 0, tmp
);
4601 ix86_compare_op0
= tmp
;
4602 ix86_compare_op1
= const0_rtx
;
4609 mode
= SELECT_CC_MODE (code
, ix86_compare_op0
, ix86_compare_op1
);
4610 emit_insn (gen_rtx_SET (VOIDmode
, gen_rtx_REG (mode
, FLAGS_REG
),
4611 gen_rtx_COMPARE (mode
,
4613 ix86_compare_op1
)));
4614 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
4615 gen_rtx_IF_THEN_ELSE (GET_MODE (operands
[0]),
4616 gen_rtx_fmt_ee (code
, VOIDmode
,
4617 gen_rtx_REG (mode
, FLAGS_REG
),
4626 ix86_split_movdi (operands
)
4629 split_di (operands
+0, 1, operands
+2, operands
+3);
4630 split_di (operands
+1, 1, operands
+4, operands
+5);
4631 if (reg_overlap_mentioned_p (operands
[2], operands
[1]))
4634 if (!reg_overlap_mentioned_p (operands
[3], operands
[4]))
4636 tmp
= operands
[2], operands
[2] = operands
[3], operands
[3] = tmp
;
4637 tmp
= operands
[4], operands
[4] = operands
[5], operands
[5] = tmp
;
4641 emit_insn (gen_push (operands
[4]));
4642 emit_insn (gen_rtx_SET (VOIDmode
, operands
[3], operands
[5]));
4643 emit_insn (gen_popsi1 (operands
[2]));
4645 return 1; /* DONE */
4653 ix86_split_ashldi (operands
, scratch
)
4654 rtx
*operands
, scratch
;
4656 rtx low
[2], high
[2];
4659 if (GET_CODE (operands
[2]) == CONST_INT
)
4661 split_di (operands
, 2, low
, high
);
4662 count
= INTVAL (operands
[2]) & 63;
4666 emit_move_insn (high
[0], low
[1]);
4667 emit_move_insn (low
[0], const0_rtx
);
4670 emit_insn (gen_ashlsi3 (high
[0], high
[0], GEN_INT (count
- 32)));
4674 if (!rtx_equal_p (operands
[0], operands
[1]))
4675 emit_move_insn (operands
[0], operands
[1]);
4676 emit_insn (gen_x86_shld_1 (high
[0], low
[0], GEN_INT (count
)));
4677 emit_insn (gen_ashlsi3 (low
[0], low
[0], GEN_INT (count
)));
4682 if (!rtx_equal_p (operands
[0], operands
[1]))
4683 emit_move_insn (operands
[0], operands
[1]);
4685 split_di (operands
, 1, low
, high
);
4687 emit_insn (gen_x86_shld_1 (high
[0], low
[0], operands
[2]));
4688 emit_insn (gen_ashlsi3 (low
[0], low
[0], operands
[2]));
4690 if (TARGET_CMOVE
&& (! reload_completed
|| scratch
))
4692 if (! reload_completed
)
4693 scratch
= force_reg (SImode
, const0_rtx
);
4695 emit_move_insn (scratch
, const0_rtx
);
4697 emit_insn (gen_x86_shift_adj_1 (high
[0], low
[0], operands
[2],
4701 emit_insn (gen_x86_shift_adj_2 (high
[0], low
[0], operands
[2]));
4706 ix86_split_ashrdi (operands
, scratch
)
4707 rtx
*operands
, scratch
;
4709 rtx low
[2], high
[2];
4712 if (GET_CODE (operands
[2]) == CONST_INT
)
4714 split_di (operands
, 2, low
, high
);
4715 count
= INTVAL (operands
[2]) & 63;
4719 emit_move_insn (low
[0], high
[1]);
4721 if (! reload_completed
)
4722 emit_insn (gen_ashrsi3 (high
[0], low
[0], GEN_INT (31)));
4725 emit_move_insn (high
[0], low
[0]);
4726 emit_insn (gen_ashrsi3 (high
[0], high
[0], GEN_INT (31)));
4730 emit_insn (gen_ashrsi3 (low
[0], low
[0], GEN_INT (count
- 32)));
4734 if (!rtx_equal_p (operands
[0], operands
[1]))
4735 emit_move_insn (operands
[0], operands
[1]);
4736 emit_insn (gen_x86_shrd_1 (low
[0], high
[0], GEN_INT (count
)));
4737 emit_insn (gen_ashrsi3 (high
[0], high
[0], GEN_INT (count
)));
4742 if (!rtx_equal_p (operands
[0], operands
[1]))
4743 emit_move_insn (operands
[0], operands
[1]);
4745 split_di (operands
, 1, low
, high
);
4747 emit_insn (gen_x86_shrd_1 (low
[0], high
[0], operands
[2]));
4748 emit_insn (gen_ashrsi3 (high
[0], high
[0], operands
[2]));
4750 if (TARGET_CMOVE
&& (!reload_completed
|| scratch
))
4752 if (! reload_completed
)
4753 scratch
= gen_reg_rtx (SImode
);
4754 emit_move_insn (scratch
, high
[0]);
4755 emit_insn (gen_ashrsi3 (scratch
, scratch
, GEN_INT (31)));
4756 emit_insn (gen_x86_shift_adj_1 (low
[0], high
[0], operands
[2],
4760 emit_insn (gen_x86_shift_adj_3 (low
[0], high
[0], operands
[2]));
4765 ix86_split_lshrdi (operands
, scratch
)
4766 rtx
*operands
, scratch
;
4768 rtx low
[2], high
[2];
4771 if (GET_CODE (operands
[2]) == CONST_INT
)
4773 split_di (operands
, 2, low
, high
);
4774 count
= INTVAL (operands
[2]) & 63;
4778 emit_move_insn (low
[0], high
[1]);
4779 emit_move_insn (high
[0], const0_rtx
);
4782 emit_insn (gen_lshrsi3 (low
[0], low
[0], GEN_INT (count
- 32)));
4786 if (!rtx_equal_p (operands
[0], operands
[1]))
4787 emit_move_insn (operands
[0], operands
[1]);
4788 emit_insn (gen_x86_shrd_1 (low
[0], high
[0], GEN_INT (count
)));
4789 emit_insn (gen_lshrsi3 (high
[0], high
[0], GEN_INT (count
)));
4794 if (!rtx_equal_p (operands
[0], operands
[1]))
4795 emit_move_insn (operands
[0], operands
[1]);
4797 split_di (operands
, 1, low
, high
);
4799 emit_insn (gen_x86_shrd_1 (low
[0], high
[0], operands
[2]));
4800 emit_insn (gen_lshrsi3 (high
[0], high
[0], operands
[2]));
4802 /* Heh. By reversing the arguments, we can reuse this pattern. */
4803 if (TARGET_CMOVE
&& (! reload_completed
|| scratch
))
4805 if (! reload_completed
)
4806 scratch
= force_reg (SImode
, const0_rtx
);
4808 emit_move_insn (scratch
, const0_rtx
);
4810 emit_insn (gen_x86_shift_adj_1 (low
[0], high
[0], operands
[2],
4814 emit_insn (gen_x86_shift_adj_2 (low
[0], high
[0], operands
[2]));
4818 /* Expand the appropriate insns for doing strlen if not just doing
4821 out = result, initialized with the start address
4822 align_rtx = alignment of the address.
4823 scratch = scratch register, initialized with the startaddress when
4824 not aligned, otherwise undefined
4826 This is just the body. It needs the initialisations mentioned above and
4827 some address computing at the end. These things are done in i386.md. */
4830 ix86_expand_strlensi_unroll_1 (out
, align_rtx
, scratch
)
4831 rtx out
, align_rtx
, scratch
;
4835 rtx align_2_label
= NULL_RTX
;
4836 rtx align_3_label
= NULL_RTX
;
4837 rtx align_4_label
= gen_label_rtx ();
4838 rtx end_0_label
= gen_label_rtx ();
4839 rtx end_2_label
= gen_label_rtx ();
4840 rtx end_3_label
= gen_label_rtx ();
4842 rtx flags
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
4845 if (GET_CODE (align_rtx
) == CONST_INT
)
4846 align
= INTVAL (align_rtx
);
4848 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
4850 /* Is there a known alignment and is it less than 4? */
4853 /* Is there a known alignment and is it not 2? */
4856 align_3_label
= gen_label_rtx (); /* Label when aligned to 3-byte */
4857 align_2_label
= gen_label_rtx (); /* Label when aligned to 2-byte */
4859 /* Leave just the 3 lower bits. */
4860 align_rtx
= expand_binop (SImode
, and_optab
, scratch
, GEN_INT (3),
4861 NULL_RTX
, 0, OPTAB_WIDEN
);
4863 emit_insn (gen_cmpsi_0 (align_rtx
, const0_rtx
));
4865 tmp
= gen_rtx_EQ (VOIDmode
, flags
, const0_rtx
);
4866 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
4867 gen_rtx_LABEL_REF (VOIDmode
,
4870 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
4872 emit_insn (gen_cmpsi_1 (align_rtx
, GEN_INT (2)));
4874 tmp
= gen_rtx_EQ (VOIDmode
, flags
, const0_rtx
);
4875 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
4876 gen_rtx_LABEL_REF (VOIDmode
,
4879 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
4881 tmp
= gen_rtx_GTU (VOIDmode
, flags
, const0_rtx
);
4882 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
4883 gen_rtx_LABEL_REF (VOIDmode
,
4886 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
4890 /* Since the alignment is 2, we have to check 2 or 0 bytes;
4891 check if is aligned to 4 - byte. */
4893 align_rtx
= expand_binop (SImode
, and_optab
, scratch
, GEN_INT (2),
4894 NULL_RTX
, 0, OPTAB_WIDEN
);
4896 emit_insn (gen_cmpsi_0 (align_rtx
, const0_rtx
));
4898 tmp
= gen_rtx_EQ (VOIDmode
, flags
, const0_rtx
);
4899 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
4900 gen_rtx_LABEL_REF (VOIDmode
,
4903 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
4906 mem
= gen_rtx_MEM (QImode
, out
);
4908 /* Now compare the bytes. */
4910 /* Compare the first n unaligned byte on a byte per byte basis. */
4911 emit_insn (gen_cmpqi_0 (mem
, const0_rtx
));
4913 tmp
= gen_rtx_EQ (VOIDmode
, flags
, const0_rtx
);
4914 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
4915 gen_rtx_LABEL_REF (VOIDmode
, end_0_label
),
4917 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
4919 /* Increment the address. */
4920 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
4922 /* Not needed with an alignment of 2 */
4925 emit_label (align_2_label
);
4927 emit_insn (gen_cmpqi_0 (mem
, const0_rtx
));
4929 tmp
= gen_rtx_EQ (VOIDmode
, flags
, const0_rtx
);
4930 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
4931 gen_rtx_LABEL_REF (VOIDmode
,
4934 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
4936 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
4938 emit_label (align_3_label
);
4941 emit_insn (gen_cmpqi_0 (mem
, const0_rtx
));
4943 tmp
= gen_rtx_EQ (VOIDmode
, flags
, const0_rtx
);
4944 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
4945 gen_rtx_LABEL_REF (VOIDmode
, end_0_label
),
4947 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
4949 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
4952 /* Generate loop to check 4 bytes at a time. It is not a good idea to
4953 align this loop. It gives only huge programs, but does not help to
4955 emit_label (align_4_label
);
4957 mem
= gen_rtx_MEM (SImode
, out
);
4958 emit_move_insn (scratch
, mem
);
4960 /* Check first byte. */
4961 emit_insn (gen_cmpqi_0 (gen_lowpart (QImode
, scratch
), const0_rtx
));
4962 tmp
= gen_rtx_EQ (VOIDmode
, flags
, const0_rtx
);
4963 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
4964 gen_rtx_LABEL_REF (VOIDmode
, end_0_label
),
4966 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
4968 /* Check second byte. */
4969 emit_insn (gen_cmpqi_ext_3 (scratch
, const0_rtx
));
4970 tmp
= gen_rtx_EQ (VOIDmode
, flags
, const0_rtx
);
4971 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
4972 gen_rtx_LABEL_REF (VOIDmode
, end_3_label
),
4974 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
4976 /* Check third byte. */
4977 emit_insn (gen_testsi_1 (scratch
, GEN_INT (0x00ff0000)));
4978 tmp
= gen_rtx_EQ (VOIDmode
, flags
, const0_rtx
);
4979 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
4980 gen_rtx_LABEL_REF (VOIDmode
, end_2_label
),
4982 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
4984 /* Check fourth byte and increment address. */
4985 emit_insn (gen_addsi3 (out
, out
, GEN_INT (4)));
4986 emit_insn (gen_testsi_1 (scratch
, GEN_INT (0xff000000)));
4987 tmp
= gen_rtx_NE (VOIDmode
, flags
, const0_rtx
);
4988 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
4989 gen_rtx_LABEL_REF (VOIDmode
, align_4_label
),
4991 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
4993 /* Now generate fixups when the compare stops within a 4-byte word. */
4994 emit_insn (gen_subsi3 (out
, out
, GEN_INT (3)));
4996 emit_label (end_2_label
);
4997 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
4999 emit_label (end_3_label
);
5000 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
5002 emit_label (end_0_label
);
5005 /* Clear stack slot assignments remembered from previous functions.
5006 This is called from INIT_EXPANDERS once before RTL is emitted for each
5010 ix86_init_machine_status (p
)
5013 enum machine_mode mode
;
5016 = (struct machine_function
*) xmalloc (sizeof (struct machine_function
));
5018 for (mode
= VOIDmode
; (int) mode
< (int) MAX_MACHINE_MODE
;
5019 mode
= (enum machine_mode
) ((int) mode
+ 1))
5020 for (n
= 0; n
< MAX_386_STACK_LOCALS
; n
++)
5021 ix86_stack_locals
[(int) mode
][n
] = NULL_RTX
;
5024 /* Mark machine specific bits of P for GC. */
5026 ix86_mark_machine_status (p
)
5029 enum machine_mode mode
;
5032 for (mode
= VOIDmode
; (int) mode
< (int) MAX_MACHINE_MODE
;
5033 mode
= (enum machine_mode
) ((int) mode
+ 1))
5034 for (n
= 0; n
< MAX_386_STACK_LOCALS
; n
++)
5035 ggc_mark_rtx (p
->machine
->stack_locals
[(int) mode
][n
]);
5038 /* Return a MEM corresponding to a stack slot with mode MODE.
5039 Allocate a new slot if necessary.
5041 The RTL for a function can have several slots available: N is
5042 which slot to use. */
5045 assign_386_stack_local (mode
, n
)
5046 enum machine_mode mode
;
5049 if (n
< 0 || n
>= MAX_386_STACK_LOCALS
)
5052 if (ix86_stack_locals
[(int) mode
][n
] == NULL_RTX
)
5053 ix86_stack_locals
[(int) mode
][n
]
5054 = assign_stack_local (mode
, GET_MODE_SIZE (mode
), 0);
5056 return ix86_stack_locals
[(int) mode
][n
];
5059 /* Calculate the length of the memory address in the instruction
5060 encoding. Does not include the one-byte modrm, opcode, or prefix. */
5063 memory_address_length (addr
)
5066 struct ix86_address parts
;
5067 rtx base
, index
, disp
;
5070 if (GET_CODE (addr
) == PRE_DEC
5071 || GET_CODE (addr
) == POST_INC
)
5074 if (! ix86_decompose_address (addr
, &parts
))
5078 index
= parts
.index
;
5082 /* Register Indirect. */
5083 if (base
&& !index
&& !disp
)
5085 /* Special cases: ebp and esp need the two-byte modrm form. */
5086 if (addr
== stack_pointer_rtx
5087 || addr
== arg_pointer_rtx
5088 || addr
== frame_pointer_rtx
)
5092 /* Direct Addressing. */
5093 else if (disp
&& !base
&& !index
)
5098 /* Find the length of the displacement constant. */
5101 if (GET_CODE (disp
) == CONST_INT
5102 && CONST_OK_FOR_LETTER_P (INTVAL (disp
), 'K'))
5108 /* An index requires the two-byte modrm form. */
5117 ix86_attr_length_default (insn
)
5120 enum attr_type type
;
5123 type
= get_attr_type (insn
);
5124 extract_insn (insn
);
5152 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
5153 if (CONSTANT_P (recog_data
.operand
[i
]))
5155 if (GET_CODE (recog_data
.operand
[i
]) == CONST_INT
5156 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data
.operand
[i
]), 'K'))
5159 len
+= GET_MODE_SIZE (GET_MODE (recog_data
.operand
[0]));
5164 if (CONSTANT_P (recog_data
.operand
[1]))
5165 len
+= GET_MODE_SIZE (GET_MODE (recog_data
.operand
[0]));
5169 if (constant_call_address_operand (recog_data
.operand
[0],
5170 GET_MODE (recog_data
.operand
[0])))
5175 if (constant_call_address_operand (recog_data
.operand
[1],
5176 GET_MODE (recog_data
.operand
[1])))
5182 /* Irritatingly, single_set doesn't work with REG_UNUSED present,
5183 as we'll get from running life_analysis during reg-stack when
5185 rtx set
= PATTERN (insn
);
5186 if (GET_CODE (set
) == SET
)
5188 else if (GET_CODE (set
) == PARALLEL
5189 && XVECLEN (set
, 0) == 2
5190 && GET_CODE (XVECEXP (set
, 0, 0)) == SET
5191 && GET_CODE (XVECEXP (set
, 0, 1)) == CLOBBER
)
5192 set
= XVECEXP (set
, 0, 0);
5196 len
+= memory_address_length (SET_SRC (set
));
5205 if (STACK_TOP_P (recog_data
.operand
[0]))
5206 return 2 + (REGNO (recog_data
.operand
[1]) != FIRST_STACK_REG
+ 1);
5208 return 2 + (REGNO (recog_data
.operand
[0]) != FIRST_STACK_REG
+ 1);
5214 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
5215 if (GET_CODE (recog_data
.operand
[i
]) == MEM
)
5217 len
+= memory_address_length (XEXP (recog_data
.operand
[i
], 0));
5222 len
+= get_attr_length_opcode (insn
);
5223 len
+= get_attr_length_prefix (insn
);
5228 /* Return the maximum number of instructions a cpu can issue. */
5235 case PROCESSOR_PENTIUM
:
5239 case PROCESSOR_PENTIUMPRO
:
5247 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
5248 by DEP_INSN and nothing set by DEP_INSN. */
5251 ix86_flags_dependant (insn
, dep_insn
, insn_type
)
5253 enum attr_type insn_type
;
5257 /* Simplify the test for uninteresting insns. */
5258 if (insn_type
!= TYPE_SETCC
5259 && insn_type
!= TYPE_ICMOV
5260 && insn_type
!= TYPE_FCMOV
5261 && insn_type
!= TYPE_IBR
)
5264 if ((set
= single_set (dep_insn
)) != 0)
5266 set
= SET_DEST (set
);
5269 else if (GET_CODE (PATTERN (dep_insn
)) == PARALLEL
5270 && XVECLEN (PATTERN (dep_insn
), 0) == 2
5271 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 0)) == SET
5272 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 1)) == SET
)
5274 set
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
5275 set2
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
5278 if (set
&& GET_CODE (set
) == REG
&& REGNO (set
) == FLAGS_REG
)
5280 /* This test is true if the dependant insn reads the flags but
5281 not any other potentially set register. */
5282 if (reg_overlap_mentioned_p (set
, PATTERN (insn
))
5283 && (!set2
|| !reg_overlap_mentioned_p (set2
, PATTERN (insn
))))
5290 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
5291 address with operands set by DEP_INSN. */
5294 ix86_agi_dependant (insn
, dep_insn
, insn_type
)
5296 enum attr_type insn_type
;
5300 if (insn_type
== TYPE_LEA
)
5301 addr
= SET_SRC (single_set (insn
));
5305 extract_insn (insn
);
5306 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
5307 if (GET_CODE (recog_data
.operand
[i
]) == MEM
)
5309 addr
= XEXP (recog_data
.operand
[i
], 0);
5316 return modified_in_p (addr
, dep_insn
);
5320 ix86_adjust_cost (insn
, link
, dep_insn
, cost
)
5321 rtx insn
, link
, dep_insn
;
5324 enum attr_type insn_type
, dep_insn_type
;
5326 int dep_insn_code_number
;
5328 /* We describe no anti or output depenancies. */
5329 if (REG_NOTE_KIND (link
) != 0)
5332 dep_insn_code_number
= recog_memoized (dep_insn
);
5334 /* If we can't recognize the insns, we can't really do anything. */
5335 if (dep_insn_code_number
< 0 || recog_memoized (insn
) < 0)
5338 /* Prologue and epilogue allocators have false dependency on ebp.
5339 This results in one cycle extra stall on Pentium prologue scheduling, so
5340 handle this important case manually. */
5342 if ((dep_insn_code_number
== CODE_FOR_prologue_allocate_stack
5343 || dep_insn_code_number
== CODE_FOR_epilogue_deallocate_stack
)
5344 && !reg_mentioned_p (stack_pointer_rtx
, insn
))
5347 insn_type
= get_attr_type (insn
);
5348 dep_insn_type
= get_attr_type (dep_insn
);
5352 case PROCESSOR_PENTIUM
:
5353 /* Address Generation Interlock adds a cycle of latency. */
5354 if (ix86_agi_dependant (insn
, dep_insn
, insn_type
))
5357 /* ??? Compares pair with jump/setcc. */
5358 if (ix86_flags_dependant (insn
, dep_insn
, insn_type
))
5361 /* Floating point stores require value to be ready one cycle ealier. */
5362 if (insn_type
== TYPE_FMOV
5363 && get_attr_memory (insn
) == MEMORY_STORE
5364 && !ix86_agi_dependant (insn
, dep_insn
, insn_type
))
5368 case PROCESSOR_PENTIUMPRO
:
5369 /* Since we can't represent delayed latencies of load+operation,
5370 increase the cost here for non-imov insns. */
5371 if (dep_insn_type
!= TYPE_IMOV
5372 && dep_insn_type
!= TYPE_FMOV
5373 && get_attr_memory (dep_insn
) == MEMORY_LOAD
)
5376 /* INT->FP conversion is expensive. */
5377 if (get_attr_fp_int_src (dep_insn
))
5380 /* There is one cycle extra latency between an FP op and a store. */
5381 if (insn_type
== TYPE_FMOV
5382 && (set
= single_set (dep_insn
)) != NULL_RTX
5383 && (set2
= single_set (insn
)) != NULL_RTX
5384 && rtx_equal_p (SET_DEST (set
), SET_SRC (set2
))
5385 && GET_CODE (SET_DEST (set2
)) == MEM
)
5390 /* The esp dependency is resolved before the instruction is really
5392 if ((insn_type
== TYPE_PUSH
|| insn_type
== TYPE_POP
)
5393 && (dep_insn_type
== TYPE_PUSH
|| dep_insn_type
== TYPE_POP
))
5396 /* Since we can't represent delayed latencies of load+operation,
5397 increase the cost here for non-imov insns. */
5398 if (get_attr_memory (dep_insn
) == MEMORY_LOAD
)
5399 cost
+= (dep_insn_type
!= TYPE_IMOV
) ? 2 : 1;
5401 /* INT->FP conversion is expensive. */
5402 if (get_attr_fp_int_src (dep_insn
))
5415 struct ppro_sched_data
5418 int issued_this_cycle
;
5423 ix86_safe_length (insn
)
5426 if (recog_memoized (insn
) >= 0)
5427 return get_attr_length(insn
);
5433 ix86_safe_length_prefix (insn
)
5436 if (recog_memoized (insn
) >= 0)
5437 return get_attr_length(insn
);
5442 static enum attr_memory
5443 ix86_safe_memory (insn
)
5446 if (recog_memoized (insn
) >= 0)
5447 return get_attr_memory(insn
);
5449 return MEMORY_UNKNOWN
;
5452 static enum attr_pent_pair
5453 ix86_safe_pent_pair (insn
)
5456 if (recog_memoized (insn
) >= 0)
5457 return get_attr_pent_pair(insn
);
5459 return PENT_PAIR_NP
;
5462 static enum attr_ppro_uops
5463 ix86_safe_ppro_uops (insn
)
5466 if (recog_memoized (insn
) >= 0)
5467 return get_attr_ppro_uops (insn
);
5469 return PPRO_UOPS_MANY
;
5473 ix86_dump_ppro_packet (dump
)
5476 if (ix86_sched_data
.ppro
.decode
[0])
5478 fprintf (dump
, "PPRO packet: %d",
5479 INSN_UID (ix86_sched_data
.ppro
.decode
[0]));
5480 if (ix86_sched_data
.ppro
.decode
[1])
5481 fprintf (dump
, " %d", INSN_UID (ix86_sched_data
.ppro
.decode
[1]));
5482 if (ix86_sched_data
.ppro
.decode
[2])
5483 fprintf (dump
, " %d", INSN_UID (ix86_sched_data
.ppro
.decode
[2]));
5488 /* We're beginning a new block. Initialize data structures as necessary. */
5491 ix86_sched_init (dump
, sched_verbose
)
5492 FILE *dump ATTRIBUTE_UNUSED
;
5493 int sched_verbose ATTRIBUTE_UNUSED
;
5495 memset (&ix86_sched_data
, 0, sizeof (ix86_sched_data
));
5498 /* Shift INSN to SLOT, and shift everything else down. */
5501 ix86_reorder_insn (insnp
, slot
)
5508 insnp
[0] = insnp
[1];
5509 while (++insnp
!= slot
);
5514 /* Find an instruction with given pairability and minimal amount of cycles
5515 lost by the fact that the CPU waits for both pipelines to finish before
5516 reading next instructions. Also take care that both instructions together
5517 can not exceed 7 bytes. */
5520 ix86_pent_find_pair (e_ready
, ready
, type
, first
)
5523 enum attr_pent_pair type
;
5526 int mincycles
, cycles
;
5527 enum attr_pent_pair tmp
;
5528 enum attr_memory memory
;
5529 rtx
*insnp
, *bestinsnp
= NULL
;
5531 if (ix86_safe_length (first
) > 7 + ix86_safe_length_prefix (first
))
5534 memory
= ix86_safe_memory (first
);
5535 cycles
= result_ready_cost (first
);
5536 mincycles
= INT_MAX
;
5538 for (insnp
= e_ready
; insnp
>= ready
&& mincycles
; --insnp
)
5539 if ((tmp
= ix86_safe_pent_pair (*insnp
)) == type
5540 && ix86_safe_length (*insnp
) <= 7 + ix86_safe_length_prefix (*insnp
))
5542 enum attr_memory second_memory
;
5543 int secondcycles
, currentcycles
;
5545 second_memory
= ix86_safe_memory (*insnp
);
5546 secondcycles
= result_ready_cost (*insnp
);
5547 currentcycles
= abs (cycles
- secondcycles
);
5549 if (secondcycles
>= 1 && cycles
>= 1)
5551 /* Two read/modify/write instructions together takes two
5553 if (memory
== MEMORY_BOTH
&& second_memory
== MEMORY_BOTH
)
5556 /* Read modify/write instruction followed by read/modify
5557 takes one cycle longer. */
5558 if (memory
== MEMORY_BOTH
&& second_memory
== MEMORY_LOAD
5559 && tmp
!= PENT_PAIR_UV
5560 && ix86_safe_pent_pair (first
) != PENT_PAIR_UV
)
5563 if (currentcycles
< mincycles
)
5564 bestinsnp
= insnp
, mincycles
= currentcycles
;
5570 /* We are about to being issuing insns for this clock cycle.
5571 Override the default sort algorithm to better slot instructions. */
5574 ix86_sched_reorder (dump
, sched_verbose
, ready
, n_ready
, clock_var
)
5575 FILE *dump ATTRIBUTE_UNUSED
;
5576 int sched_verbose ATTRIBUTE_UNUSED
;
5578 int n_ready
, clock_var
;
5580 rtx
*e_ready
= ready
+ n_ready
- 1;
5592 case PROCESSOR_PENTIUM
:
5593 /* This wouldn't be necessary if Haifa knew that static insn ordering
5594 is important to which pipe an insn is issued to. So we have to make
5595 some minor rearrangements. */
5597 enum attr_pent_pair pair1
, pair2
;
5599 pair1
= ix86_safe_pent_pair (*e_ready
);
5601 /* If the first insn is non-pairable, let it be. */
5602 if (pair1
== PENT_PAIR_NP
)
5604 pair2
= PENT_PAIR_NP
;
5606 /* If the first insn is UV or PV pairable, search for a PU
5608 if (pair1
== PENT_PAIR_UV
|| pair1
== PENT_PAIR_PV
)
5610 insnp
= ix86_pent_find_pair (e_ready
-1, ready
,
5611 PENT_PAIR_PU
, *e_ready
);
5613 pair2
= PENT_PAIR_PU
;
5616 /* If the first insn is PU or UV pairable, search for a PV
5618 if (pair2
== PENT_PAIR_NP
5619 && (pair1
== PENT_PAIR_PU
|| pair1
== PENT_PAIR_UV
))
5621 insnp
= ix86_pent_find_pair (e_ready
-1, ready
,
5622 PENT_PAIR_PV
, *e_ready
);
5624 pair2
= PENT_PAIR_PV
;
5627 /* If the first insn is pairable, search for a UV
5629 if (pair2
== PENT_PAIR_NP
)
5631 insnp
= ix86_pent_find_pair (e_ready
-1, ready
,
5632 PENT_PAIR_UV
, *e_ready
);
5634 pair2
= PENT_PAIR_UV
;
5637 if (pair2
== PENT_PAIR_NP
)
5640 /* Found something! Decide if we need to swap the order. */
5641 if (pair1
== PENT_PAIR_PV
|| pair2
== PENT_PAIR_PU
5642 || (pair1
== PENT_PAIR_UV
&& pair2
== PENT_PAIR_UV
5643 && ix86_safe_memory (*e_ready
) == MEMORY_BOTH
5644 && ix86_safe_memory (*insnp
) == MEMORY_LOAD
))
5645 ix86_reorder_insn (insnp
, e_ready
);
5647 ix86_reorder_insn (insnp
, e_ready
- 1);
5651 case PROCESSOR_PENTIUMPRO
:
5654 enum attr_ppro_uops cur_uops
;
5655 int issued_this_cycle
;
5657 /* At this point .ppro.decode contains the state of the three
5658 decoders from last "cycle". That is, those insns that were
5659 actually independant. But here we're scheduling for the
5660 decoder, and we may find things that are decodable in the
5663 memcpy (decode
, ix86_sched_data
.ppro
.decode
, sizeof(decode
));
5664 issued_this_cycle
= 0;
5667 cur_uops
= ix86_safe_ppro_uops (*insnp
);
5669 /* If the decoders are empty, and we've a complex insn at the
5670 head of the priority queue, let it issue without complaint. */
5671 if (decode
[0] == NULL
)
5673 if (cur_uops
== PPRO_UOPS_MANY
)
5679 /* Otherwise, search for a 2-4 uop unsn to issue. */
5680 while (cur_uops
!= PPRO_UOPS_FEW
)
5684 cur_uops
= ix86_safe_ppro_uops (*--insnp
);
5687 /* If so, move it to the head of the line. */
5688 if (cur_uops
== PPRO_UOPS_FEW
)
5689 ix86_reorder_insn (insnp
, e_ready
);
5691 /* Issue the head of the queue. */
5692 issued_this_cycle
= 1;
5693 decode
[0] = *e_ready
--;
5696 /* Look for simple insns to fill in the other two slots. */
5697 for (i
= 1; i
< 3; ++i
)
5698 if (decode
[i
] == NULL
)
5700 if (ready
>= e_ready
)
5704 cur_uops
= ix86_safe_ppro_uops (*insnp
);
5705 while (cur_uops
!= PPRO_UOPS_ONE
)
5709 cur_uops
= ix86_safe_ppro_uops (*--insnp
);
5712 /* Found one. Move it to the head of the queue and issue it. */
5713 if (cur_uops
== PPRO_UOPS_ONE
)
5715 ix86_reorder_insn (insnp
, e_ready
);
5716 decode
[i
] = *e_ready
--;
5717 issued_this_cycle
++;
5721 /* ??? Didn't find one. Ideally, here we would do a lazy split
5722 of 2-uop insns, issue one and queue the other. */
5726 if (issued_this_cycle
== 0)
5727 issued_this_cycle
= 1;
5728 ix86_sched_data
.ppro
.issued_this_cycle
= issued_this_cycle
;
5734 return ix86_issue_rate ();
5737 /* We are about to issue INSN. Return the number of insns left on the
5738 ready queue that can be issued this cycle. */
5741 ix86_variable_issue (dump
, sched_verbose
, insn
, can_issue_more
)
5751 return can_issue_more
- 1;
5753 case PROCESSOR_PENTIUMPRO
:
5755 enum attr_ppro_uops uops
= ix86_safe_ppro_uops (insn
);
5757 if (uops
== PPRO_UOPS_MANY
)
5760 ix86_dump_ppro_packet (dump
);
5761 ix86_sched_data
.ppro
.decode
[0] = insn
;
5762 ix86_sched_data
.ppro
.decode
[1] = NULL
;
5763 ix86_sched_data
.ppro
.decode
[2] = NULL
;
5765 ix86_dump_ppro_packet (dump
);
5766 ix86_sched_data
.ppro
.decode
[0] = NULL
;
5768 else if (uops
== PPRO_UOPS_FEW
)
5771 ix86_dump_ppro_packet (dump
);
5772 ix86_sched_data
.ppro
.decode
[0] = insn
;
5773 ix86_sched_data
.ppro
.decode
[1] = NULL
;
5774 ix86_sched_data
.ppro
.decode
[2] = NULL
;
5778 for (i
= 0; i
< 3; ++i
)
5779 if (ix86_sched_data
.ppro
.decode
[i
] == NULL
)
5781 ix86_sched_data
.ppro
.decode
[i
] = insn
;
5789 ix86_dump_ppro_packet (dump
);
5790 ix86_sched_data
.ppro
.decode
[0] = NULL
;
5791 ix86_sched_data
.ppro
.decode
[1] = NULL
;
5792 ix86_sched_data
.ppro
.decode
[2] = NULL
;
5796 return --ix86_sched_data
.ppro
.issued_this_cycle
;