1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000
3 Free Software Foundation, Inc.
5 This file is part of GNU CC.
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
29 #include "hard-reg-set.h"
31 #include "insn-config.h"
32 #include "conditions.h"
33 #include "insn-flags.h"
35 #include "insn-attr.h"
42 #include "basic-block.h"
45 #ifdef EXTRA_CONSTRAINT
46 /* If EXTRA_CONSTRAINT is defined, then the 'S'
47 constraint in REG_CLASS_FROM_LETTER will no longer work, and various
48 asm statements that need 'S' for class SIREG will break. */
49 error EXTRA_CONSTRAINT conflicts with S constraint letter
50 /* The previous line used to be #error, but some compilers barf
51 even if the conditional was untrue. */
54 #ifndef CHECK_STACK_LIMIT
55 #define CHECK_STACK_LIMIT -1
58 /* Processor costs (relative to an add) */
59 struct processor_costs i386_cost
= { /* 386 specific costs */
60 1, /* cost of an add instruction */
61 1, /* cost of a lea instruction */
62 3, /* variable shift costs */
63 2, /* constant shift costs */
64 6, /* cost of starting a multiply */
65 1, /* cost of multiply per each bit set */
66 23, /* cost of a divide/mod */
67 15, /* "large" insn */
69 4, /* cost for loading QImode using movzbl */
70 {2, 4, 2}, /* cost of loading integer registers
71 in QImode, HImode and SImode.
72 Relative to reg-reg move (2). */
73 {2, 4, 2}, /* cost of storing integer registers */
74 2, /* cost of reg,reg fld/fst */
75 {8, 8, 8}, /* cost of loading fp registers
76 in SFmode, DFmode and XFmode */
77 {8, 8, 8} /* cost of loading integer registers */
80 struct processor_costs i486_cost
= { /* 486 specific costs */
81 1, /* cost of an add instruction */
82 1, /* cost of a lea instruction */
83 3, /* variable shift costs */
84 2, /* constant shift costs */
85 12, /* cost of starting a multiply */
86 1, /* cost of multiply per each bit set */
87 40, /* cost of a divide/mod */
88 15, /* "large" insn */
90 4, /* cost for loading QImode using movzbl */
91 {2, 4, 2}, /* cost of loading integer registers
92 in QImode, HImode and SImode.
93 Relative to reg-reg move (2). */
94 {2, 4, 2}, /* cost of storing integer registers */
95 2, /* cost of reg,reg fld/fst */
96 {8, 8, 8}, /* cost of loading fp registers
97 in SFmode, DFmode and XFmode */
98 {8, 8, 8} /* cost of loading integer registers */
101 struct processor_costs pentium_cost
= {
102 1, /* cost of an add instruction */
103 1, /* cost of a lea instruction */
104 4, /* variable shift costs */
105 1, /* constant shift costs */
106 11, /* cost of starting a multiply */
107 0, /* cost of multiply per each bit set */
108 25, /* cost of a divide/mod */
109 8, /* "large" insn */
111 6, /* cost for loading QImode using movzbl */
112 {2, 4, 2}, /* cost of loading integer registers
113 in QImode, HImode and SImode.
114 Relative to reg-reg move (2). */
115 {2, 4, 2}, /* cost of storing integer registers */
116 2, /* cost of reg,reg fld/fst */
117 {2, 2, 6}, /* cost of loading fp registers
118 in SFmode, DFmode and XFmode */
119 {4, 4, 6} /* cost of loading integer registers */
122 struct processor_costs pentiumpro_cost
= {
123 1, /* cost of an add instruction */
124 1, /* cost of a lea instruction */
125 1, /* variable shift costs */
126 1, /* constant shift costs */
127 1, /* cost of starting a multiply */
128 0, /* cost of multiply per each bit set */
129 17, /* cost of a divide/mod */
130 8, /* "large" insn */
132 2, /* cost for loading QImode using movzbl */
133 {4, 4, 4}, /* cost of loading integer registers
134 in QImode, HImode and SImode.
135 Relative to reg-reg move (2). */
136 {2, 2, 2}, /* cost of storing integer registers */
137 2, /* cost of reg,reg fld/fst */
138 {2, 2, 6}, /* cost of loading fp registers
139 in SFmode, DFmode and XFmode */
140 {4, 4, 6} /* cost of loading integer registers */
143 struct processor_costs k6_cost
= {
144 1, /* cost of an add instruction */
145 2, /* cost of a lea instruction */
146 1, /* variable shift costs */
147 1, /* constant shift costs */
148 3, /* cost of starting a multiply */
149 0, /* cost of multiply per each bit set */
150 18, /* cost of a divide/mod */
151 8, /* "large" insn */
153 3, /* cost for loading QImode using movzbl */
154 {4, 5, 4}, /* cost of loading integer registers
155 in QImode, HImode and SImode.
156 Relative to reg-reg move (2). */
157 {2, 3, 2}, /* cost of storing integer registers */
158 4, /* cost of reg,reg fld/fst */
159 {6, 6, 6}, /* cost of loading fp registers
160 in SFmode, DFmode and XFmode */
161 {4, 4, 4} /* cost of loading integer registers */
164 struct processor_costs athlon_cost
= {
165 1, /* cost of an add instruction */
166 1, /* cost of a lea instruction */
167 1, /* variable shift costs */
168 1, /* constant shift costs */
169 5, /* cost of starting a multiply */
170 0, /* cost of multiply per each bit set */
171 19, /* cost of a divide/mod */
172 8, /* "large" insn */
174 4, /* cost for loading QImode using movzbl */
175 {4, 5, 4}, /* cost of loading integer registers
176 in QImode, HImode and SImode.
177 Relative to reg-reg move (2). */
178 {2, 3, 2}, /* cost of storing integer registers */
179 4, /* cost of reg,reg fld/fst */
180 {6, 6, 6}, /* cost of loading fp registers
181 in SFmode, DFmode and XFmode */
182 {4, 4, 4} /* cost of loading integer registers */
185 struct processor_costs
*ix86_cost
= &pentium_cost
;
187 /* Processor feature/optimization bitmasks. */
188 #define m_386 (1<<PROCESSOR_I386)
189 #define m_486 (1<<PROCESSOR_I486)
190 #define m_PENT (1<<PROCESSOR_PENTIUM)
191 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
192 #define m_K6 (1<<PROCESSOR_K6)
193 #define m_ATHLON (1<<PROCESSOR_ATHLON)
195 const int x86_use_leave
= m_386
| m_K6
| m_ATHLON
;
196 const int x86_push_memory
= m_386
| m_K6
| m_ATHLON
;
197 const int x86_zero_extend_with_and
= m_486
| m_PENT
;
198 const int x86_movx
= m_ATHLON
/* m_386 | m_PPRO | m_K6 */;
199 const int x86_double_with_add
= ~m_386
;
200 const int x86_use_bit_test
= m_386
;
201 const int x86_unroll_strlen
= m_486
| m_PENT
| m_PPRO
| m_ATHLON
| m_K6
;
202 const int x86_use_q_reg
= m_PENT
| m_PPRO
| m_K6
;
203 const int x86_use_any_reg
= m_486
;
204 const int x86_cmove
= m_PPRO
| m_ATHLON
;
205 const int x86_deep_branch
= m_PPRO
| m_K6
| m_ATHLON
;
206 const int x86_use_sahf
= m_PPRO
| m_K6
| m_ATHLON
;
207 const int x86_partial_reg_stall
= m_PPRO
;
208 const int x86_use_loop
= m_K6
;
209 const int x86_use_fiop
= ~(m_PPRO
| m_ATHLON
| m_PENT
);
210 const int x86_use_mov0
= m_K6
;
211 const int x86_use_cltd
= ~(m_PENT
| m_K6
);
212 const int x86_read_modify_write
= ~m_PENT
;
213 const int x86_read_modify
= ~(m_PENT
| m_PPRO
);
214 const int x86_split_long_moves
= m_PPRO
;
215 const int x86_promote_QImode
= m_K6
| m_PENT
| m_386
| m_486
;
216 const int x86_single_stringop
= m_386
;
218 #define AT_BP(mode) (gen_rtx_MEM ((mode), hard_frame_pointer_rtx))
220 const char * const hi_reg_name
[] = HI_REGISTER_NAMES
;
221 const char * const qi_reg_name
[] = QI_REGISTER_NAMES
;
222 const char * const qi_high_reg_name
[] = QI_HIGH_REGISTER_NAMES
;
224 /* Array of the smallest class containing reg number REGNO, indexed by
225 REGNO. Used by REGNO_REG_CLASS in i386.h. */
227 enum reg_class
const regclass_map
[FIRST_PSEUDO_REGISTER
] =
230 AREG
, DREG
, CREG
, BREG
,
232 SIREG
, DIREG
, NON_Q_REGS
, NON_Q_REGS
,
234 FP_TOP_REG
, FP_SECOND_REG
, FLOAT_REGS
, FLOAT_REGS
,
235 FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
,
238 /* flags, fpsr, dirflag, frame */
239 NO_REGS
, NO_REGS
, NO_REGS
, NON_Q_REGS
242 /* The "default" register map. */
244 int const dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
246 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
247 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
248 -1, -1, -1, -1, /* arg, flags, fpsr, dir */
251 /* Define the register numbers to be used in Dwarf debugging information.
252 The SVR4 reference port C compiler uses the following register numbers
253 in its Dwarf output code:
254 0 for %eax (gcc regno = 0)
255 1 for %ecx (gcc regno = 2)
256 2 for %edx (gcc regno = 1)
257 3 for %ebx (gcc regno = 3)
258 4 for %esp (gcc regno = 7)
259 5 for %ebp (gcc regno = 6)
260 6 for %esi (gcc regno = 4)
261 7 for %edi (gcc regno = 5)
262 The following three DWARF register numbers are never generated by
263 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
264 believes these numbers have these meanings.
265 8 for %eip (no gcc equivalent)
266 9 for %eflags (gcc regno = 17)
267 10 for %trapno (no gcc equivalent)
268 It is not at all clear how we should number the FP stack registers
269 for the x86 architecture. If the version of SDB on x86/svr4 were
270 a bit less brain dead with respect to floating-point then we would
271 have a precedent to follow with respect to DWARF register numbers
272 for x86 FP registers, but the SDB on x86/svr4 is so completely
273 broken with respect to FP registers that it is hardly worth thinking
274 of it as something to strive for compatibility with.
275 The version of x86/svr4 SDB I have at the moment does (partially)
276 seem to believe that DWARF register number 11 is associated with
277 the x86 register %st(0), but that's about all. Higher DWARF
278 register numbers don't seem to be associated with anything in
279 particular, and even for DWARF regno 11, SDB only seems to under-
280 stand that it should say that a variable lives in %st(0) (when
281 asked via an `=' command) if we said it was in DWARF regno 11,
282 but SDB still prints garbage when asked for the value of the
283 variable in question (via a `/' command).
284 (Also note that the labels SDB prints for various FP stack regs
285 when doing an `x' command are all wrong.)
286 Note that these problems generally don't affect the native SVR4
287 C compiler because it doesn't allow the use of -O with -g and
288 because when it is *not* optimizing, it allocates a memory
289 location for each floating-point variable, and the memory
290 location is what gets described in the DWARF AT_location
291 attribute for the variable in question.
292 Regardless of the severe mental illness of the x86/svr4 SDB, we
293 do something sensible here and we use the following DWARF
294 register numbers. Note that these are all stack-top-relative
296 11 for %st(0) (gcc regno = 8)
297 12 for %st(1) (gcc regno = 9)
298 13 for %st(2) (gcc regno = 10)
299 14 for %st(3) (gcc regno = 11)
300 15 for %st(4) (gcc regno = 12)
301 16 for %st(5) (gcc regno = 13)
302 17 for %st(6) (gcc regno = 14)
303 18 for %st(7) (gcc regno = 15)
305 int const svr4_dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
307 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
308 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
309 -1, 9, -1, -1, /* arg, flags, fpsr, dir */
314 /* Test and compare insns in i386.md store the information needed to
315 generate branch and scc insns here. */
317 struct rtx_def
*ix86_compare_op0
= NULL_RTX
;
318 struct rtx_def
*ix86_compare_op1
= NULL_RTX
;
320 #define MAX_386_STACK_LOCALS 2
322 /* Define the structure for the machine field in struct function. */
323 struct machine_function
325 rtx stack_locals
[(int) MAX_MACHINE_MODE
][MAX_386_STACK_LOCALS
];
328 #define ix86_stack_locals (cfun->machine->stack_locals)
330 /* which cpu are we scheduling for */
331 enum processor_type ix86_cpu
;
333 /* which instruction set architecture to use. */
336 /* Strings to hold which cpu and instruction set architecture to use. */
337 const char *ix86_cpu_string
; /* for -mcpu=<xxx> */
338 const char *ix86_arch_string
; /* for -march=<xxx> */
340 /* Register allocation order */
341 const char *ix86_reg_alloc_order
;
342 static char regs_allocated
[FIRST_PSEUDO_REGISTER
];
344 /* # of registers to use to pass arguments. */
345 const char *ix86_regparm_string
;
347 /* ix86_regparm_string as a number */
350 /* Alignment to use for loops and jumps: */
352 /* Power of two alignment for loops. */
353 const char *ix86_align_loops_string
;
355 /* Power of two alignment for non-loop jumps. */
356 const char *ix86_align_jumps_string
;
358 /* Power of two alignment for stack boundary in bytes. */
359 const char *ix86_preferred_stack_boundary_string
;
361 /* Preferred alignment for stack boundary in bits. */
362 int ix86_preferred_stack_boundary
;
364 /* Values 1-5: see jump.c */
365 int ix86_branch_cost
;
366 const char *ix86_branch_cost_string
;
368 /* Power of two alignment for functions. */
369 int ix86_align_funcs
;
370 const char *ix86_align_funcs_string
;
372 /* Power of two alignment for loops. */
373 int ix86_align_loops
;
375 /* Power of two alignment for non-loop jumps. */
376 int ix86_align_jumps
;
378 static void output_pic_addr_const
PARAMS ((FILE *, rtx
, int));
379 static void put_condition_code
PARAMS ((enum rtx_code
, enum machine_mode
,
381 static enum rtx_code unsigned_comparison
PARAMS ((enum rtx_code code
));
382 static rtx ix86_expand_int_compare
PARAMS ((enum rtx_code
, rtx
, rtx
));
383 static rtx ix86_expand_fp_compare
PARAMS ((enum rtx_code
, rtx
, rtx
, int));
384 static rtx ix86_expand_compare
PARAMS ((enum rtx_code
, int));
385 static rtx gen_push
PARAMS ((rtx
));
386 static int memory_address_length
PARAMS ((rtx addr
));
387 static int ix86_flags_dependant
PARAMS ((rtx
, rtx
, enum attr_type
));
388 static int ix86_agi_dependant
PARAMS ((rtx
, rtx
, enum attr_type
));
389 static int ix86_safe_length
PARAMS ((rtx
));
390 static enum attr_memory ix86_safe_memory
PARAMS ((rtx
));
391 static enum attr_pent_pair ix86_safe_pent_pair
PARAMS ((rtx
));
392 static enum attr_ppro_uops ix86_safe_ppro_uops
PARAMS ((rtx
));
393 static void ix86_dump_ppro_packet
PARAMS ((FILE *));
394 static void ix86_reorder_insn
PARAMS ((rtx
*, rtx
*));
395 static rtx
* ix86_pent_find_pair
PARAMS ((rtx
*, rtx
*, enum attr_pent_pair
,
397 static void ix86_init_machine_status
PARAMS ((struct function
*));
398 static void ix86_mark_machine_status
PARAMS ((struct function
*));
399 static void ix86_split_to_parts
PARAMS ((rtx
, rtx
*, enum machine_mode
));
400 static int ix86_safe_length_prefix
PARAMS ((rtx
));
401 static HOST_WIDE_INT ix86_compute_frame_size
PARAMS((HOST_WIDE_INT
,
402 int *, int *, int *));
403 static int ix86_nsaved_regs
PARAMS((void));
404 static void ix86_emit_save_regs
PARAMS((void));
405 static void ix86_emit_restore_regs_using_mov
PARAMS ((rtx
, int));
406 static void ix86_emit_epilogue_esp_adjustment
PARAMS((int));
407 static void ix86_sched_reorder_pentium
PARAMS((rtx
*, rtx
*));
408 static void ix86_sched_reorder_ppro
PARAMS((rtx
*, rtx
*));
412 rtx base
, index
, disp
;
416 static int ix86_decompose_address
PARAMS ((rtx
, struct ix86_address
*));
418 /* Sometimes certain combinations of command options do not make
419 sense on a particular target machine. You can define a macro
420 `OVERRIDE_OPTIONS' to take account of this. This macro, if
421 defined, is executed once just after all the command options have
424 Don't use this macro to turn on various extra optimizations for
425 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
430 /* Comes from final.c -- no real reason to change it. */
431 #define MAX_CODE_ALIGN 16
435 struct processor_costs
*cost
; /* Processor costs */
436 int target_enable
; /* Target flags to enable. */
437 int target_disable
; /* Target flags to disable. */
438 int align_loop
; /* Default alignments. */
443 const processor_target_table
[PROCESSOR_max
] =
445 {&i386_cost
, 0, 0, 2, 2, 2, 1},
446 {&i486_cost
, 0, 0, 4, 4, 4, 1},
447 {&pentium_cost
, 0, 0, -4, -4, -4, 1},
448 {&pentiumpro_cost
, 0, 0, 4, -4, 4, 1},
449 {&k6_cost
, 0, 0, -5, -5, 4, 1},
450 {&athlon_cost
, 0, 0, 4, -4, 4, 1}
455 const char *name
; /* processor name or nickname. */
456 enum processor_type processor
;
458 const processor_alias_table
[] =
460 {"i386", PROCESSOR_I386
},
461 {"i486", PROCESSOR_I486
},
462 {"i586", PROCESSOR_PENTIUM
},
463 {"pentium", PROCESSOR_PENTIUM
},
464 {"i686", PROCESSOR_PENTIUMPRO
},
465 {"pentiumpro", PROCESSOR_PENTIUMPRO
},
466 {"k6", PROCESSOR_K6
},
467 {"athlon", PROCESSOR_ATHLON
},
470 int const pta_size
= sizeof(processor_alias_table
)/sizeof(struct pta
);
472 #ifdef SUBTARGET_OVERRIDE_OPTIONS
473 SUBTARGET_OVERRIDE_OPTIONS
;
476 ix86_arch
= PROCESSOR_I386
;
477 ix86_cpu
= (enum processor_type
) TARGET_CPU_DEFAULT
;
479 if (ix86_arch_string
!= 0)
482 for (i
= 0; i
< pta_size
; i
++)
483 if (! strcmp (ix86_arch_string
, processor_alias_table
[i
].name
))
485 ix86_arch
= processor_alias_table
[i
].processor
;
486 /* Default cpu tuning to the architecture. */
487 ix86_cpu
= ix86_arch
;
491 error ("bad value (%s) for -march= switch", ix86_arch_string
);
494 if (ix86_cpu_string
!= 0)
497 for (i
= 0; i
< pta_size
; i
++)
498 if (! strcmp (ix86_cpu_string
, processor_alias_table
[i
].name
))
500 ix86_cpu
= processor_alias_table
[i
].processor
;
504 error ("bad value (%s) for -mcpu= switch", ix86_cpu_string
);
507 ix86_cost
= processor_target_table
[ix86_cpu
].cost
;
508 target_flags
|= processor_target_table
[ix86_cpu
].target_enable
;
509 target_flags
&= ~processor_target_table
[ix86_cpu
].target_disable
;
511 /* Arrange to set up i386_stack_locals for all functions. */
512 init_machine_status
= ix86_init_machine_status
;
513 mark_machine_status
= ix86_mark_machine_status
;
515 /* Validate registers in register allocation order. */
516 if (ix86_reg_alloc_order
)
519 for (i
= 0; (ch
= ix86_reg_alloc_order
[i
]) != '\0'; i
++)
525 case 'a': regno
= 0; break;
526 case 'd': regno
= 1; break;
527 case 'c': regno
= 2; break;
528 case 'b': regno
= 3; break;
529 case 'S': regno
= 4; break;
530 case 'D': regno
= 5; break;
531 case 'B': regno
= 6; break;
533 default: fatal ("Register '%c' is unknown", ch
);
536 if (regs_allocated
[regno
])
537 fatal ("Register '%c' already specified in allocation order", ch
);
539 regs_allocated
[regno
] = 1;
543 /* Validate -mregparm= value. */
544 if (ix86_regparm_string
)
546 ix86_regparm
= atoi (ix86_regparm_string
);
547 if (ix86_regparm
< 0 || ix86_regparm
> REGPARM_MAX
)
548 fatal ("-mregparm=%d is not between 0 and %d",
549 ix86_regparm
, REGPARM_MAX
);
552 /* Validate -malign-loops= value, or provide default. */
553 ix86_align_loops
= processor_target_table
[ix86_cpu
].align_loop
;
554 if (ix86_align_loops_string
)
556 ix86_align_loops
= atoi (ix86_align_loops_string
);
557 if (ix86_align_loops
< 0 || ix86_align_loops
> MAX_CODE_ALIGN
)
558 fatal ("-malign-loops=%d is not between 0 and %d",
559 ix86_align_loops
, MAX_CODE_ALIGN
);
562 /* Validate -malign-jumps= value, or provide default. */
563 ix86_align_jumps
= processor_target_table
[ix86_cpu
].align_jump
;
564 if (ix86_align_jumps_string
)
566 ix86_align_jumps
= atoi (ix86_align_jumps_string
);
567 if (ix86_align_jumps
< 0 || ix86_align_jumps
> MAX_CODE_ALIGN
)
568 fatal ("-malign-jumps=%d is not between 0 and %d",
569 ix86_align_jumps
, MAX_CODE_ALIGN
);
572 /* Validate -malign-functions= value, or provide default. */
573 ix86_align_funcs
= processor_target_table
[ix86_cpu
].align_func
;
574 if (ix86_align_funcs_string
)
576 ix86_align_funcs
= atoi (ix86_align_funcs_string
);
577 if (ix86_align_funcs
< 0 || ix86_align_funcs
> MAX_CODE_ALIGN
)
578 fatal ("-malign-functions=%d is not between 0 and %d",
579 ix86_align_funcs
, MAX_CODE_ALIGN
);
582 /* Validate -mpreferred-stack-boundary= value, or provide default.
583 The default of 128 bits is for Pentium III's SSE __m128. */
584 ix86_preferred_stack_boundary
= 128;
585 if (ix86_preferred_stack_boundary_string
)
587 int i
= atoi (ix86_preferred_stack_boundary_string
);
589 fatal ("-mpreferred-stack-boundary=%d is not between 2 and 31", i
);
590 ix86_preferred_stack_boundary
= (1 << i
) * BITS_PER_UNIT
;
593 /* Validate -mbranch-cost= value, or provide default. */
594 ix86_branch_cost
= processor_target_table
[ix86_cpu
].branch_cost
;
595 if (ix86_branch_cost_string
)
597 ix86_branch_cost
= atoi (ix86_branch_cost_string
);
598 if (ix86_branch_cost
< 0 || ix86_branch_cost
> 5)
599 fatal ("-mbranch-cost=%d is not between 0 and 5",
603 /* Keep nonleaf frame pointers. */
604 if (TARGET_OMIT_LEAF_FRAME_POINTER
)
605 flag_omit_frame_pointer
= 1;
607 /* If we're doing fast math, we don't care about comparison order
608 wrt NaNs. This lets us use a shorter comparison sequence. */
610 target_flags
&= ~MASK_IEEE_FP
;
612 /* If we're planning on using `loop', use it. */
613 if (TARGET_USE_LOOP
&& optimize
)
614 flag_branch_on_count_reg
= 1;
617 /* A C statement (sans semicolon) to choose the order in which to
618 allocate hard registers for pseudo-registers local to a basic
621 Store the desired register order in the array `reg_alloc_order'.
622 Element 0 should be the register to allocate first; element 1, the
623 next register; and so on.
625 The macro body should not assume anything about the contents of
626 `reg_alloc_order' before execution of the macro.
628 On most machines, it is not necessary to define this macro. */
631 order_regs_for_local_alloc ()
635 /* User specified the register allocation order. */
637 if (ix86_reg_alloc_order
)
639 for (i
= order
= 0; (ch
= ix86_reg_alloc_order
[i
]) != '\0'; i
++)
645 case 'a': regno
= 0; break;
646 case 'd': regno
= 1; break;
647 case 'c': regno
= 2; break;
648 case 'b': regno
= 3; break;
649 case 'S': regno
= 4; break;
650 case 'D': regno
= 5; break;
651 case 'B': regno
= 6; break;
654 reg_alloc_order
[order
++] = regno
;
657 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
659 if (! regs_allocated
[i
])
660 reg_alloc_order
[order
++] = i
;
664 /* If user did not specify a register allocation order, use natural order. */
667 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
668 reg_alloc_order
[i
] = i
;
673 optimization_options (level
, size
)
675 int size ATTRIBUTE_UNUSED
;
677 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
678 make the problem with not enough registers even worse. */
679 #ifdef INSN_SCHEDULING
681 flag_schedule_insns
= 0;
685 /* Return nonzero if IDENTIFIER with arguments ARGS is a valid machine specific
686 attribute for DECL. The attributes in ATTRIBUTES have previously been
690 ix86_valid_decl_attribute_p (decl
, attributes
, identifier
, args
)
691 tree decl ATTRIBUTE_UNUSED
;
692 tree attributes ATTRIBUTE_UNUSED
;
693 tree identifier ATTRIBUTE_UNUSED
;
694 tree args ATTRIBUTE_UNUSED
;
699 /* Return nonzero if IDENTIFIER with arguments ARGS is a valid machine specific
700 attribute for TYPE. The attributes in ATTRIBUTES have previously been
704 ix86_valid_type_attribute_p (type
, attributes
, identifier
, args
)
706 tree attributes ATTRIBUTE_UNUSED
;
710 if (TREE_CODE (type
) != FUNCTION_TYPE
711 && TREE_CODE (type
) != METHOD_TYPE
712 && TREE_CODE (type
) != FIELD_DECL
713 && TREE_CODE (type
) != TYPE_DECL
)
716 /* Stdcall attribute says callee is responsible for popping arguments
717 if they are not variable. */
718 if (is_attribute_p ("stdcall", identifier
))
719 return (args
== NULL_TREE
);
721 /* Cdecl attribute says the callee is a normal C declaration. */
722 if (is_attribute_p ("cdecl", identifier
))
723 return (args
== NULL_TREE
);
725 /* Regparm attribute specifies how many integer arguments are to be
726 passed in registers. */
727 if (is_attribute_p ("regparm", identifier
))
731 if (! args
|| TREE_CODE (args
) != TREE_LIST
732 || TREE_CHAIN (args
) != NULL_TREE
733 || TREE_VALUE (args
) == NULL_TREE
)
736 cst
= TREE_VALUE (args
);
737 if (TREE_CODE (cst
) != INTEGER_CST
)
740 if (TREE_INT_CST_HIGH (cst
) != 0
741 || TREE_INT_CST_LOW (cst
) < 0
742 || TREE_INT_CST_LOW (cst
) > REGPARM_MAX
)
751 /* Return 0 if the attributes for two types are incompatible, 1 if they
752 are compatible, and 2 if they are nearly compatible (which causes a
753 warning to be generated). */
756 ix86_comp_type_attributes (type1
, type2
)
760 /* Check for mismatch of non-default calling convention. */
761 const char *rtdstr
= TARGET_RTD
? "cdecl" : "stdcall";
763 if (TREE_CODE (type1
) != FUNCTION_TYPE
)
766 /* Check for mismatched return types (cdecl vs stdcall). */
767 if (!lookup_attribute (rtdstr
, TYPE_ATTRIBUTES (type1
))
768 != !lookup_attribute (rtdstr
, TYPE_ATTRIBUTES (type2
)))
773 /* Value is the number of bytes of arguments automatically
774 popped when returning from a subroutine call.
775 FUNDECL is the declaration node of the function (as a tree),
776 FUNTYPE is the data type of the function (as a tree),
777 or for a library call it is an identifier node for the subroutine name.
778 SIZE is the number of bytes of arguments passed on the stack.
780 On the 80386, the RTD insn may be used to pop them if the number
781 of args is fixed, but if the number is variable then the caller
782 must pop them all. RTD can't be used for library calls now
783 because the library is compiled with the Unix compiler.
784 Use of RTD is a selectable option, since it is incompatible with
785 standard Unix calling sequences. If the option is not selected,
786 the caller must always pop the args.
788 The attribute stdcall is equivalent to RTD on a per module basis. */
791 ix86_return_pops_args (fundecl
, funtype
, size
)
796 int rtd
= TARGET_RTD
&& (!fundecl
|| TREE_CODE (fundecl
) != IDENTIFIER_NODE
);
798 /* Cdecl functions override -mrtd, and never pop the stack. */
799 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype
))) {
801 /* Stdcall functions will pop the stack if not variable args. */
802 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype
)))
806 && (TYPE_ARG_TYPES (funtype
) == NULL_TREE
807 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype
)))
812 /* Lose any fake structure return argument. */
813 if (aggregate_value_p (TREE_TYPE (funtype
)))
814 return GET_MODE_SIZE (Pmode
);
819 /* Argument support functions. */
821 /* Initialize a variable CUM of type CUMULATIVE_ARGS
822 for a call to a function whose data type is FNTYPE.
823 For a library call, FNTYPE is 0. */
826 init_cumulative_args (cum
, fntype
, libname
)
827 CUMULATIVE_ARGS
*cum
; /* Argument info to initialize */
828 tree fntype
; /* tree ptr for function decl */
829 rtx libname
; /* SYMBOL_REF of library name or 0 */
831 static CUMULATIVE_ARGS zero_cum
;
832 tree param
, next_param
;
834 if (TARGET_DEBUG_ARG
)
836 fprintf (stderr
, "\ninit_cumulative_args (");
838 fprintf (stderr
, "fntype code = %s, ret code = %s",
839 tree_code_name
[(int) TREE_CODE (fntype
)],
840 tree_code_name
[(int) TREE_CODE (TREE_TYPE (fntype
))]);
842 fprintf (stderr
, "no fntype");
845 fprintf (stderr
, ", libname = %s", XSTR (libname
, 0));
850 /* Set up the number of registers to use for passing arguments. */
851 cum
->nregs
= ix86_regparm
;
854 tree attr
= lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype
));
857 cum
->nregs
= TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
)));
860 /* Determine if this function has variable arguments. This is
861 indicated by the last argument being 'void_type_mode' if there
862 are no variable arguments. If there are variable arguments, then
863 we won't pass anything in registers */
867 for (param
= (fntype
) ? TYPE_ARG_TYPES (fntype
) : 0;
868 param
!= 0; param
= next_param
)
870 next_param
= TREE_CHAIN (param
);
871 if (next_param
== 0 && TREE_VALUE (param
) != void_type_node
)
876 if (TARGET_DEBUG_ARG
)
877 fprintf (stderr
, ", nregs=%d )\n", cum
->nregs
);
882 /* Update the data in CUM to advance over an argument
883 of mode MODE and data type TYPE.
884 (TYPE is null for libcalls where that information may not be available.) */
887 function_arg_advance (cum
, mode
, type
, named
)
888 CUMULATIVE_ARGS
*cum
; /* current arg information */
889 enum machine_mode mode
; /* current arg mode */
890 tree type
; /* type of the argument or 0 if lib support */
891 int named
; /* whether or not the argument was named */
894 = (mode
== BLKmode
) ? int_size_in_bytes (type
) : GET_MODE_SIZE (mode
);
895 int words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
897 if (TARGET_DEBUG_ARG
)
899 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
900 words
, cum
->words
, cum
->nregs
, GET_MODE_NAME (mode
), named
);
915 /* Define where to put the arguments to a function.
916 Value is zero to push the argument on the stack,
917 or a hard register in which to store the argument.
919 MODE is the argument's machine mode.
920 TYPE is the data type of the argument (as a tree).
921 This is null for libcalls where that information may
923 CUM is a variable of type CUMULATIVE_ARGS which gives info about
924 the preceding args and about the function being called.
925 NAMED is nonzero if this argument is a named parameter
926 (otherwise it is an extra parameter matching an ellipsis). */
929 function_arg (cum
, mode
, type
, named
)
930 CUMULATIVE_ARGS
*cum
; /* current arg information */
931 enum machine_mode mode
; /* current arg mode */
932 tree type
; /* type of the argument or 0 if lib support */
933 int named
; /* != 0 for normal args, == 0 for ... args */
937 = (mode
== BLKmode
) ? int_size_in_bytes (type
) : GET_MODE_SIZE (mode
);
938 int words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
942 /* For now, pass fp/complex values on the stack. */
951 if (words
<= cum
->nregs
)
952 ret
= gen_rtx_REG (mode
, cum
->regno
);
956 if (TARGET_DEBUG_ARG
)
959 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d",
960 words
, cum
->words
, cum
->nregs
, GET_MODE_NAME (mode
), named
);
963 fprintf (stderr
, ", reg=%%e%s", reg_names
[ REGNO(ret
) ]);
965 fprintf (stderr
, ", stack");
967 fprintf (stderr
, " )\n");
973 /* Returns 1 if OP is either a symbol reference or a sum of a symbol
974 reference and a constant. */
977 symbolic_operand (op
, mode
)
979 enum machine_mode mode ATTRIBUTE_UNUSED
;
981 switch (GET_CODE (op
))
989 if (GET_CODE (op
) == SYMBOL_REF
990 || GET_CODE (op
) == LABEL_REF
991 || (GET_CODE (op
) == UNSPEC
993 && XINT (op
, 1) <= 7))
995 if (GET_CODE (op
) != PLUS
996 || GET_CODE (XEXP (op
, 1)) != CONST_INT
)
1000 if (GET_CODE (op
) == SYMBOL_REF
1001 || GET_CODE (op
) == LABEL_REF
)
1003 /* Only @GOTOFF gets offsets. */
1004 if (GET_CODE (op
) != UNSPEC
1005 || XINT (op
, 1) != 7)
1008 op
= XVECEXP (op
, 0, 0);
1009 if (GET_CODE (op
) == SYMBOL_REF
1010 || GET_CODE (op
) == LABEL_REF
)
1019 /* Return true if the operand contains a @GOT or @GOTOFF reference. */
1022 pic_symbolic_operand (op
, mode
)
1024 enum machine_mode mode ATTRIBUTE_UNUSED
;
1026 if (GET_CODE (op
) == CONST
)
1029 if (GET_CODE (op
) == UNSPEC
)
1031 if (GET_CODE (op
) != PLUS
1032 || GET_CODE (XEXP (op
, 1)) != CONST_INT
)
1035 if (GET_CODE (op
) == UNSPEC
)
1041 /* Test for a valid operand for a call instruction. Don't allow the
1042 arg pointer register or virtual regs since they may decay into
1043 reg + const, which the patterns can't handle. */
1046 call_insn_operand (op
, mode
)
1048 enum machine_mode mode ATTRIBUTE_UNUSED
;
1050 if (GET_CODE (op
) != MEM
)
1054 /* Disallow indirect through a virtual register. This leads to
1055 compiler aborts when trying to eliminate them. */
1056 if (GET_CODE (op
) == REG
1057 && (op
== arg_pointer_rtx
1058 || op
== frame_pointer_rtx
1059 || (REGNO (op
) >= FIRST_PSEUDO_REGISTER
1060 && REGNO (op
) <= LAST_VIRTUAL_REGISTER
)))
1063 /* Disallow `call 1234'. Due to varying assembler lameness this
1064 gets either rejected or translated to `call .+1234'. */
1065 if (GET_CODE (op
) == CONST_INT
)
1068 /* Otherwise we can allow any general_operand in the address. */
1069 return general_operand (op
, Pmode
);
1072 /* Like call_insn_operand but allow (mem (symbol_ref ...)) even if pic. */
1075 expander_call_insn_operand (op
, mode
)
1077 enum machine_mode mode
;
1079 if (GET_CODE (op
) == MEM
1080 && GET_CODE (XEXP (op
, 0)) == SYMBOL_REF
)
1083 return call_insn_operand (op
, mode
);
1087 constant_call_address_operand (op
, mode
)
1089 enum machine_mode mode ATTRIBUTE_UNUSED
;
1091 return GET_CODE (op
) == MEM
&&
1092 CONSTANT_ADDRESS_P (XEXP (op
, 0)) &&
1093 GET_CODE (XEXP (op
, 0)) != CONST_INT
;
1096 /* Match exactly zero and one. */
1099 const0_operand (op
, mode
)
1101 enum machine_mode mode
;
1103 return op
== CONST0_RTX (mode
);
1107 const1_operand (op
, mode
)
1109 enum machine_mode mode ATTRIBUTE_UNUSED
;
1111 return op
== const1_rtx
;
1114 /* Match 2, 4, or 8. Used for leal multiplicands. */
1117 const248_operand (op
, mode
)
1119 enum machine_mode mode ATTRIBUTE_UNUSED
;
1121 return (GET_CODE (op
) == CONST_INT
1122 && (INTVAL (op
) == 2 || INTVAL (op
) == 4 || INTVAL (op
) == 8));
1125 /* True if this is a constant appropriate for an increment or decremenmt. */
1128 incdec_operand (op
, mode
)
1130 enum machine_mode mode
;
1132 if (op
== const1_rtx
|| op
== constm1_rtx
)
1134 if (GET_CODE (op
) != CONST_INT
)
1136 if (mode
== SImode
&& INTVAL (op
) == (HOST_WIDE_INT
) 0xffffffff)
1138 if (mode
== HImode
&& INTVAL (op
) == (HOST_WIDE_INT
) 0xffff)
1140 if (mode
== QImode
&& INTVAL (op
) == (HOST_WIDE_INT
) 0xff)
1145 /* Return false if this is the stack pointer, or any other fake
1146 register eliminable to the stack pointer. Otherwise, this is
1149 This is used to prevent esp from being used as an index reg.
1150 Which would only happen in pathological cases. */
1153 reg_no_sp_operand (op
, mode
)
1155 enum machine_mode mode
;
1158 if (GET_CODE (t
) == SUBREG
)
1160 if (t
== stack_pointer_rtx
|| t
== arg_pointer_rtx
|| t
== frame_pointer_rtx
)
1163 return register_operand (op
, mode
);
1166 /* Return false if this is any eliminable register. Otherwise
1170 general_no_elim_operand (op
, mode
)
1172 enum machine_mode mode
;
1175 if (GET_CODE (t
) == SUBREG
)
1177 if (t
== arg_pointer_rtx
|| t
== frame_pointer_rtx
1178 || t
== virtual_incoming_args_rtx
|| t
== virtual_stack_vars_rtx
1179 || t
== virtual_stack_dynamic_rtx
)
1182 return general_operand (op
, mode
);
1185 /* Return false if this is any eliminable register. Otherwise
1186 register_operand or const_int. */
1189 nonmemory_no_elim_operand (op
, mode
)
1191 enum machine_mode mode
;
1194 if (GET_CODE (t
) == SUBREG
)
1196 if (t
== arg_pointer_rtx
|| t
== frame_pointer_rtx
1197 || t
== virtual_incoming_args_rtx
|| t
== virtual_stack_vars_rtx
1198 || t
== virtual_stack_dynamic_rtx
)
1201 return GET_CODE (op
) == CONST_INT
|| register_operand (op
, mode
);
1204 /* Return true if op is a Q_REGS class register. */
1207 q_regs_operand (op
, mode
)
1209 enum machine_mode mode
;
1211 if (mode
!= VOIDmode
&& GET_MODE (op
) != mode
)
1213 if (GET_CODE (op
) == SUBREG
)
1214 op
= SUBREG_REG (op
);
1215 return QI_REG_P (op
);
1218 /* Return true if op is a NON_Q_REGS class register. */
1221 non_q_regs_operand (op
, mode
)
1223 enum machine_mode mode
;
1225 if (mode
!= VOIDmode
&& GET_MODE (op
) != mode
)
1227 if (GET_CODE (op
) == SUBREG
)
1228 op
= SUBREG_REG (op
);
1229 return NON_QI_REG_P (op
);
1232 /* Return 1 if OP is a comparison operator that can use the condition code
1233 generated by a logical operation, which characteristicly does not set
1234 overflow or carry. To be used with CCNOmode. */
1237 no_comparison_operator (op
, mode
)
1239 enum machine_mode mode
;
1241 return ((mode
== VOIDmode
|| GET_MODE (op
) == mode
)
1242 && GET_RTX_CLASS (GET_CODE (op
)) == '<'
1243 && GET_CODE (op
) != LE
1244 && GET_CODE (op
) != GT
);
1247 /* Return 1 if OP is a comparison operator that can be issued by fcmov. */
1250 fcmov_comparison_operator (op
, mode
)
1252 enum machine_mode mode
;
1254 return ((mode
== VOIDmode
|| GET_MODE (op
) == mode
)
1255 && GET_RTX_CLASS (GET_CODE (op
)) == '<'
1256 && GET_CODE (op
) == unsigned_condition (GET_CODE (op
)));
1259 /* Return 1 if OP is a binary operator that can be promoted to wider mode. */
1262 promotable_binary_operator (op
, mode
)
1264 enum machine_mode mode ATTRIBUTE_UNUSED
;
1266 switch (GET_CODE (op
))
1269 /* Modern CPUs have same latency for HImode and SImode multiply,
1270 but 386 and 486 do HImode multiply faster. */
1271 return ix86_cpu
> PROCESSOR_I486
;
1283 /* Nearly general operand, but accept any const_double, since we wish
1284 to be able to drop them into memory rather than have them get pulled
1288 cmp_fp_expander_operand (op
, mode
)
1290 enum machine_mode mode
;
1292 if (mode
!= VOIDmode
&& mode
!= GET_MODE (op
))
1294 if (GET_CODE (op
) == CONST_DOUBLE
)
1296 return general_operand (op
, mode
);
1299 /* Match an SI or HImode register for a zero_extract. */
1302 ext_register_operand (op
, mode
)
1304 enum machine_mode mode ATTRIBUTE_UNUSED
;
1306 if (GET_MODE (op
) != SImode
&& GET_MODE (op
) != HImode
)
1308 return register_operand (op
, VOIDmode
);
1311 /* Return 1 if this is a valid binary floating-point operation.
1312 OP is the expression matched, and MODE is its mode. */
1315 binary_fp_operator (op
, mode
)
1317 enum machine_mode mode
;
1319 if (mode
!= VOIDmode
&& mode
!= GET_MODE (op
))
1322 switch (GET_CODE (op
))
1328 return GET_MODE_CLASS (GET_MODE (op
)) == MODE_FLOAT
;
1336 mult_operator(op
, mode
)
1338 enum machine_mode mode ATTRIBUTE_UNUSED
;
1340 return GET_CODE (op
) == MULT
;
1344 div_operator(op
, mode
)
1346 enum machine_mode mode ATTRIBUTE_UNUSED
;
1348 return GET_CODE (op
) == DIV
;
1352 arith_or_logical_operator (op
, mode
)
1354 enum machine_mode mode
;
1356 return ((mode
== VOIDmode
|| GET_MODE (op
) == mode
)
1357 && (GET_RTX_CLASS (GET_CODE (op
)) == 'c'
1358 || GET_RTX_CLASS (GET_CODE (op
)) == '2'));
1361 /* Returns 1 if OP is memory operand with a displacement. */
1364 memory_displacement_operand (op
, mode
)
1366 enum machine_mode mode
;
1368 struct ix86_address parts
;
1370 if (! memory_operand (op
, mode
))
1373 if (! ix86_decompose_address (XEXP (op
, 0), &parts
))
1376 return parts
.disp
!= NULL_RTX
;
1379 /* To avoid problems when jump re-emits comparisons like testqi_ext_0,
1380 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
1382 ??? It seems likely that this will only work because cmpsi is an
1383 expander, and no actual insns use this. */
1386 cmpsi_operand (op
, mode
)
1388 enum machine_mode mode
;
1390 if (general_operand (op
, mode
))
1393 if (GET_CODE (op
) == AND
1394 && GET_MODE (op
) == SImode
1395 && GET_CODE (XEXP (op
, 0)) == ZERO_EXTRACT
1396 && GET_CODE (XEXP (XEXP (op
, 0), 1)) == CONST_INT
1397 && GET_CODE (XEXP (XEXP (op
, 0), 2)) == CONST_INT
1398 && INTVAL (XEXP (XEXP (op
, 0), 1)) == 8
1399 && INTVAL (XEXP (XEXP (op
, 0), 2)) == 8
1400 && GET_CODE (XEXP (op
, 1)) == CONST_INT
)
1406 /* Returns 1 if OP is memory operand that can not be represented by the
1410 long_memory_operand (op
, mode
)
1412 enum machine_mode mode
;
1414 if (! memory_operand (op
, mode
))
1417 return memory_address_length (op
) != 0;
1420 /* Return nonzero if the rtx is known aligned. */
1423 aligned_operand (op
, mode
)
1425 enum machine_mode mode
;
1427 struct ix86_address parts
;
1429 if (!general_operand (op
, mode
))
1432 /* Registers and immediate operands are always "aligned". */
1433 if (GET_CODE (op
) != MEM
)
1436 /* Don't even try to do any aligned optimizations with volatiles. */
1437 if (MEM_VOLATILE_P (op
))
1442 /* Pushes and pops are only valid on the stack pointer. */
1443 if (GET_CODE (op
) == PRE_DEC
1444 || GET_CODE (op
) == POST_INC
)
1447 /* Decode the address. */
1448 if (! ix86_decompose_address (op
, &parts
))
1451 /* Look for some component that isn't known to be aligned. */
1455 && REGNO_POINTER_ALIGN (REGNO (parts
.index
)) < 4)
1460 if (REGNO_POINTER_ALIGN (REGNO (parts
.base
)) < 4)
1465 if (GET_CODE (parts
.disp
) != CONST_INT
1466 || (INTVAL (parts
.disp
) & 3) != 0)
1470 /* Didn't find one -- this must be an aligned address. */
1474 /* Return true if the constant is something that can be loaded with
1475 a special instruction. Only handle 0.0 and 1.0; others are less
1479 standard_80387_constant_p (x
)
1482 if (GET_CODE (x
) != CONST_DOUBLE
)
1485 #if ! defined (REAL_IS_NOT_DOUBLE) || defined (REAL_ARITHMETIC)
1491 if (setjmp (handler
))
1494 set_float_handler (handler
);
1495 REAL_VALUE_FROM_CONST_DOUBLE (d
, x
);
1496 is0
= REAL_VALUES_EQUAL (d
, dconst0
) && !REAL_VALUE_MINUS_ZERO (d
);
1497 is1
= REAL_VALUES_EQUAL (d
, dconst1
);
1498 set_float_handler (NULL_PTR
);
1506 /* Note that on the 80387, other constants, such as pi,
1507 are much slower to load as standard constants
1508 than to load from doubles in memory! */
1509 /* ??? Not true on K6: all constants are equal cost. */
1516 /* Returns 1 if OP contains a symbol reference */
1519 symbolic_reference_mentioned_p (op
)
1522 register const char *fmt
;
1525 if (GET_CODE (op
) == SYMBOL_REF
|| GET_CODE (op
) == LABEL_REF
)
1528 fmt
= GET_RTX_FORMAT (GET_CODE (op
));
1529 for (i
= GET_RTX_LENGTH (GET_CODE (op
)) - 1; i
>= 0; i
--)
1535 for (j
= XVECLEN (op
, i
) - 1; j
>= 0; j
--)
1536 if (symbolic_reference_mentioned_p (XVECEXP (op
, i
, j
)))
1540 else if (fmt
[i
] == 'e' && symbolic_reference_mentioned_p (XEXP (op
, i
)))
1547 /* Return 1 if it is appropriate to emit `ret' instructions in the
1548 body of a function. Do this only if the epilogue is simple, needing a
1549 couple of insns. Prior to reloading, we can't tell how many registers
1550 must be saved, so return 0 then. Return 0 if there is no frame
1551 marker to de-allocate.
1553 If NON_SAVING_SETJMP is defined and true, then it is not possible
1554 for the epilogue to be simple, so return 0. This is a special case
1555 since NON_SAVING_SETJMP will not cause regs_ever_live to change
1556 until final, but jump_optimize may need to know sooner if a
1560 ix86_can_use_return_insn_p ()
1562 HOST_WIDE_INT tsize
;
1565 #ifdef NON_SAVING_SETJMP
1566 if (NON_SAVING_SETJMP
&& current_function_calls_setjmp
)
1569 #ifdef FUNCTION_BLOCK_PROFILER_EXIT
1570 if (profile_block_flag
== 2)
1574 if (! reload_completed
|| frame_pointer_needed
)
1577 /* Don't allow more than 32 pop, since that's all we can do
1578 with one instruction. */
1579 if (current_function_pops_args
1580 && current_function_args_size
>= 32768)
1583 tsize
= ix86_compute_frame_size (get_frame_size (), &nregs
, NULL
, NULL
);
1584 return tsize
== 0 && nregs
== 0;
1587 static char *pic_label_name
;
1588 static int pic_label_output
;
1589 static char *global_offset_table_name
;
1591 /* This function generates code for -fpic that loads %ebx with
1592 the return address of the caller and then returns. */
1595 asm_output_function_prefix (file
, name
)
1597 const char *name ATTRIBUTE_UNUSED
;
1600 int pic_reg_used
= flag_pic
&& (current_function_uses_pic_offset_table
1601 || current_function_uses_const_pool
);
1602 xops
[0] = pic_offset_table_rtx
;
1603 xops
[1] = stack_pointer_rtx
;
1605 /* Deep branch prediction favors having a return for every call. */
1606 if (pic_reg_used
&& TARGET_DEEP_BRANCH_PREDICTION
)
1608 if (!pic_label_output
)
1610 /* This used to call ASM_DECLARE_FUNCTION_NAME() but since it's an
1611 internal (non-global) label that's being emitted, it didn't make
1612 sense to have .type information for local labels. This caused
1613 the SCO OpenServer 5.0.4 ELF assembler grief (why are you giving
1614 me debug info for a label that you're declaring non-global?) this
1615 was changed to call ASM_OUTPUT_LABEL() instead. */
1617 ASM_OUTPUT_LABEL (file
, pic_label_name
);
1619 xops
[1] = gen_rtx_MEM (SImode
, xops
[1]);
1620 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops
);
1621 output_asm_insn ("ret", xops
);
1623 pic_label_output
= 1;
1629 load_pic_register ()
1633 if (global_offset_table_name
== NULL
)
1635 global_offset_table_name
=
1636 ggc_alloc_string ("_GLOBAL_OFFSET_TABLE_", 21);
1637 ggc_add_string_root (&global_offset_table_name
, 1);
1639 gotsym
= gen_rtx_SYMBOL_REF (Pmode
, global_offset_table_name
);
1641 if (TARGET_DEEP_BRANCH_PREDICTION
)
1643 if (pic_label_name
== NULL
)
1645 pic_label_name
= ggc_alloc_string (NULL
, 32);
1646 ggc_add_string_root (&pic_label_name
, 1);
1647 ASM_GENERATE_INTERNAL_LABEL (pic_label_name
, "LPR", 0);
1649 pclab
= gen_rtx_MEM (QImode
, gen_rtx_SYMBOL_REF (Pmode
, pic_label_name
));
1653 pclab
= gen_rtx_LABEL_REF (VOIDmode
, gen_label_rtx ());
1656 emit_insn (gen_prologue_get_pc (pic_offset_table_rtx
, pclab
));
1658 if (! TARGET_DEEP_BRANCH_PREDICTION
)
1659 emit_insn (gen_popsi1 (pic_offset_table_rtx
));
1661 emit_insn (gen_prologue_set_got (pic_offset_table_rtx
, gotsym
, pclab
));
1664 /* Generate an SImode "push" pattern for input ARG. */
1670 return gen_rtx_SET (VOIDmode
,
1671 gen_rtx_MEM (SImode
,
1672 gen_rtx_PRE_DEC (SImode
,
1673 stack_pointer_rtx
)),
1677 /* Return number of registers to be saved on the stack. */
1683 int pic_reg_used
= flag_pic
&& (current_function_uses_pic_offset_table
1684 || current_function_uses_const_pool
);
1685 int limit
= (frame_pointer_needed
1686 ? HARD_FRAME_POINTER_REGNUM
: STACK_POINTER_REGNUM
);
1689 for (regno
= limit
- 1; regno
>= 0; regno
--)
1690 if ((regs_ever_live
[regno
] && ! call_used_regs
[regno
])
1691 || (regno
== PIC_OFFSET_TABLE_REGNUM
&& pic_reg_used
))
1698 /* Return the offset between two registers, one to be eliminated, and the other
1699 its replacement, at the start of a routine. */
1702 ix86_initial_elimination_offset (from
, to
)
1709 /* Stack grows downward:
1715 saved frame pointer if frame_pointer_needed
1716 <- HARD_FRAME_POINTER
1726 if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
1727 /* Skip saved PC and previous frame pointer.
1728 Executed only when frame_pointer_needed. */
1730 else if (from
== FRAME_POINTER_REGNUM
1731 && to
== HARD_FRAME_POINTER_REGNUM
)
1733 ix86_compute_frame_size (get_frame_size (), &nregs
, &padding1
, (int *)0);
1734 padding1
+= nregs
* UNITS_PER_WORD
;
1739 /* ARG_POINTER or FRAME_POINTER to STACK_POINTER elimination. */
1740 int frame_size
= frame_pointer_needed
? 8 : 4;
1741 HOST_WIDE_INT tsize
= ix86_compute_frame_size (get_frame_size (),
1742 &nregs
, &padding1
, (int *)0);
1745 if (to
!= STACK_POINTER_REGNUM
)
1747 else if (from
== ARG_POINTER_REGNUM
)
1748 return tsize
+ nregs
* UNITS_PER_WORD
+ frame_size
;
1749 else if (from
!= FRAME_POINTER_REGNUM
)
1752 return tsize
- padding1
;
1756 /* Compute the size of local storage taking into consideration the
1757 desired stack alignment which is to be maintained. Also determine
1758 the number of registers saved below the local storage.
1760 PADDING1 returns padding before stack frame and PADDING2 returns
1761 padding after stack frame;
1764 static HOST_WIDE_INT
1765 ix86_compute_frame_size (size
, nregs_on_stack
, rpadding1
, rpadding2
)
1767 int *nregs_on_stack
;
1774 HOST_WIDE_INT total_size
;
1775 int stack_alignment_needed
= cfun
->stack_alignment_needed
/ BITS_PER_UNIT
;
1777 int preferred_alignment
= cfun
->preferred_stack_boundary
/ BITS_PER_UNIT
;
1779 nregs
= ix86_nsaved_regs ();
1782 offset
= frame_pointer_needed
? 8 : 4;
1784 /* Do some sanity checking of stack_alignment_needed and preferred_alignment,
1785 since i386 port is the only using those features that may break easilly. */
1787 if (size
&& !stack_alignment_needed
)
1789 if (!size
&& stack_alignment_needed
)
1791 if (preferred_alignment
< STACK_BOUNDARY
/ BITS_PER_UNIT
)
1793 if (preferred_alignment
> PREFERRED_STACK_BOUNDARY
/ BITS_PER_UNIT
)
1795 if (stack_alignment_needed
> PREFERRED_STACK_BOUNDARY
/ BITS_PER_UNIT
)
1798 if (stack_alignment_needed
< 4)
1799 stack_alignment_needed
= 4;
1801 offset
+= nregs
* UNITS_PER_WORD
;
1803 total_size
+= offset
;
1805 /* Align start of frame for local function. */
1806 padding1
= ((offset
+ stack_alignment_needed
- 1)
1807 & -stack_alignment_needed
) - offset
;
1808 total_size
+= padding1
;
1810 /* Align stack boundary. */
1811 padding2
= ((total_size
+ preferred_alignment
- 1)
1812 & -preferred_alignment
) - total_size
;
1815 *nregs_on_stack
= nregs
;
1817 *rpadding1
= padding1
;
1819 *rpadding2
= padding2
;
1821 return size
+ padding1
+ padding2
;
1824 /* Emit code to save registers in the prologue. */
1827 ix86_emit_save_regs ()
1832 int pic_reg_used
= flag_pic
&& (current_function_uses_pic_offset_table
1833 || current_function_uses_const_pool
);
1834 limit
= (frame_pointer_needed
1835 ? HARD_FRAME_POINTER_REGNUM
: STACK_POINTER_REGNUM
);
1837 for (regno
= limit
- 1; regno
>= 0; regno
--)
1838 if ((regs_ever_live
[regno
] && !call_used_regs
[regno
])
1839 || (regno
== PIC_OFFSET_TABLE_REGNUM
&& pic_reg_used
))
1841 insn
= emit_insn (gen_push (gen_rtx_REG (SImode
, regno
)));
1842 RTX_FRAME_RELATED_P (insn
) = 1;
1846 /* Expand the prologue into a bunch of separate insns. */
1849 ix86_expand_prologue ()
1851 HOST_WIDE_INT tsize
= ix86_compute_frame_size (get_frame_size (), (int *)0, (int *)0,
1854 int pic_reg_used
= flag_pic
&& (current_function_uses_pic_offset_table
1855 || current_function_uses_const_pool
);
1857 /* Note: AT&T enter does NOT have reversed args. Enter is probably
1858 slower on all targets. Also sdb doesn't like it. */
1860 if (frame_pointer_needed
)
1862 insn
= emit_insn (gen_push (hard_frame_pointer_rtx
));
1863 RTX_FRAME_RELATED_P (insn
) = 1;
1865 insn
= emit_move_insn (hard_frame_pointer_rtx
, stack_pointer_rtx
);
1866 RTX_FRAME_RELATED_P (insn
) = 1;
1869 ix86_emit_save_regs ();
1873 else if (! TARGET_STACK_PROBE
|| tsize
< CHECK_STACK_LIMIT
)
1875 if (frame_pointer_needed
)
1876 insn
= emit_insn (gen_pro_epilogue_adjust_stack
1877 (stack_pointer_rtx
, stack_pointer_rtx
,
1878 GEN_INT (-tsize
), hard_frame_pointer_rtx
));
1880 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
1882 RTX_FRAME_RELATED_P (insn
) = 1;
1886 /* ??? Is this only valid for Win32? */
1890 arg0
= gen_rtx_REG (SImode
, 0);
1891 emit_move_insn (arg0
, GEN_INT (tsize
));
1893 sym
= gen_rtx_MEM (FUNCTION_MODE
,
1894 gen_rtx_SYMBOL_REF (Pmode
, "_alloca"));
1895 insn
= emit_call_insn (gen_call (sym
, const0_rtx
));
1897 CALL_INSN_FUNCTION_USAGE (insn
)
1898 = gen_rtx_EXPR_LIST (VOIDmode
, gen_rtx_USE (VOIDmode
, arg0
),
1899 CALL_INSN_FUNCTION_USAGE (insn
));
1902 #ifdef SUBTARGET_PROLOGUE
1907 load_pic_register ();
1909 /* If we are profiling, make sure no instructions are scheduled before
1910 the call to mcount. However, if -fpic, the above call will have
1912 if ((profile_flag
|| profile_block_flag
) && ! pic_reg_used
)
1913 emit_insn (gen_blockage ());
1916 /* Emit code to add TSIZE to esp value. Use POP instruction when
1920 ix86_emit_epilogue_esp_adjustment (tsize
)
1923 /* Intel's docs say that for 4 or 8 bytes of stack frame one should
1924 use `pop' and not `add'. */
1925 int use_pop
= tsize
== 4;
1928 /* Use two pops only for the Pentium processors. */
1929 if (tsize
== 8 && !TARGET_386
&& !TARGET_486
)
1931 rtx retval
= current_function_return_rtx
;
1933 edx
= gen_rtx_REG (SImode
, 1);
1935 /* This case is a bit more complex. Since we cannot pop into
1936 %ecx twice we need a second register. But this is only
1937 available if the return value is not of DImode in which
1938 case the %edx register is not available. */
1939 use_pop
= (retval
== NULL
1940 || !reg_overlap_mentioned_p (edx
, retval
));
1945 ecx
= gen_rtx_REG (SImode
, 2);
1947 /* We have to prevent the two pops here from being scheduled.
1948 GCC otherwise would try in some situation to put other
1949 instructions in between them which has a bad effect. */
1950 emit_insn (gen_blockage ());
1951 emit_insn (gen_popsi1 (ecx
));
1953 emit_insn (gen_popsi1 (edx
));
1957 /* If a frame pointer is present, we must be sure to tie the sp
1958 to the fp so that we don't mis-schedule. */
1959 if (frame_pointer_needed
)
1960 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx
,
1963 hard_frame_pointer_rtx
));
1965 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
1970 /* Emit code to restore saved registers using MOV insns. First register
1971 is restored from POINTER + OFFSET. */
1973 ix86_emit_restore_regs_using_mov (pointer
, offset
)
1978 int pic_reg_used
= flag_pic
&& (current_function_uses_pic_offset_table
1979 || current_function_uses_const_pool
);
1980 int limit
= (frame_pointer_needed
1981 ? HARD_FRAME_POINTER_REGNUM
: STACK_POINTER_REGNUM
);
1983 for (regno
= 0; regno
< limit
; regno
++)
1984 if ((regs_ever_live
[regno
] && !call_used_regs
[regno
])
1985 || (regno
== PIC_OFFSET_TABLE_REGNUM
&& pic_reg_used
))
1987 emit_move_insn (gen_rtx_REG (SImode
, regno
),
1988 adj_offsettable_operand (gen_rtx_MEM (SImode
,
1995 /* Restore function stack, frame, and registers. */
1998 ix86_expand_epilogue ()
2003 int pic_reg_used
= flag_pic
&& (current_function_uses_pic_offset_table
2004 || current_function_uses_const_pool
);
2005 int sp_valid
= !frame_pointer_needed
|| current_function_sp_is_unchanging
;
2006 HOST_WIDE_INT offset
;
2007 HOST_WIDE_INT tsize
= ix86_compute_frame_size (get_frame_size (), &nregs
,
2008 (int *)0, (int *)0);
2011 /* Calculate start of saved registers relative to ebp. */
2012 offset
= -nregs
* UNITS_PER_WORD
;
2014 #ifdef FUNCTION_BLOCK_PROFILER_EXIT
2015 if (profile_block_flag
== 2)
2017 FUNCTION_BLOCK_PROFILER_EXIT
;
2021 /* If we're only restoring one register and sp is not valid then
2022 using a move instruction to restore the register since it's
2023 less work than reloading sp and popping the register.
2025 The default code result in stack adjustment using add/lea instruction,
2026 while this code results in LEAVE instruction (or discrete equivalent),
2027 so it is profitable in some other cases as well. Especially when there
2028 are no registers to restore. We also use this code when TARGET_USE_LEAVE
2029 and there is exactly one register to pop. This heruistic may need some
2030 tuning in future. */
2031 if ((!sp_valid
&& nregs
<= 1)
2032 || (frame_pointer_needed
&& !nregs
&& tsize
)
2033 || (frame_pointer_needed
&& TARGET_USE_LEAVE
&& !optimize_size
2036 /* Restore registers. We can use ebp or esp to address the memory
2037 locations. If both are available, default to ebp, since offsets
2038 are known to be small. Only exception is esp pointing directly to the
2039 end of block of saved registers, where we may simplify addressing
2042 if (!frame_pointer_needed
|| (sp_valid
&& !tsize
))
2043 ix86_emit_restore_regs_using_mov (stack_pointer_rtx
, tsize
);
2045 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx
, offset
);
2047 if (!frame_pointer_needed
)
2048 ix86_emit_epilogue_esp_adjustment (tsize
+ nregs
* UNITS_PER_WORD
);
2049 /* If not an i386, mov & pop is faster than "leave". */
2050 else if (TARGET_USE_LEAVE
|| optimize_size
)
2051 emit_insn (gen_leave ());
2054 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx
,
2055 hard_frame_pointer_rtx
,
2057 hard_frame_pointer_rtx
));
2058 emit_insn (gen_popsi1 (hard_frame_pointer_rtx
));
2063 /* First step is to deallocate the stack frame so that we can
2064 pop the registers. */
2067 if (!frame_pointer_needed
)
2069 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx
,
2070 hard_frame_pointer_rtx
,
2072 hard_frame_pointer_rtx
));
2075 ix86_emit_epilogue_esp_adjustment (tsize
);
2077 for (regno
= 0; regno
< STACK_POINTER_REGNUM
; regno
++)
2078 if ((regs_ever_live
[regno
] && !call_used_regs
[regno
])
2079 || (regno
== PIC_OFFSET_TABLE_REGNUM
&& pic_reg_used
))
2080 emit_insn (gen_popsi1 (gen_rtx_REG (SImode
, regno
)));
2083 if (current_function_pops_args
&& current_function_args_size
)
2085 rtx popc
= GEN_INT (current_function_pops_args
);
2087 /* i386 can only pop 32K bytes (maybe 64K? Is it signed?). If
2088 asked to pop more, pop return address, do explicit add, and jump
2089 indirectly to the caller. */
2091 if (current_function_pops_args
>= 32768)
2093 rtx ecx
= gen_rtx_REG (SImode
, 2);
2095 emit_insn (gen_popsi1 (ecx
));
2096 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, popc
));
2097 emit_indirect_jump (ecx
);
2100 emit_jump_insn (gen_return_pop_internal (popc
));
2103 emit_jump_insn (gen_return_internal ());
2106 /* Extract the parts of an RTL expression that is a valid memory address
2107 for an instruction. Return false if the structure of the address is
2111 ix86_decompose_address (addr
, out
)
2113 struct ix86_address
*out
;
2115 rtx base
= NULL_RTX
;
2116 rtx index
= NULL_RTX
;
2117 rtx disp
= NULL_RTX
;
2118 HOST_WIDE_INT scale
= 1;
2119 rtx scale_rtx
= NULL_RTX
;
2121 if (GET_CODE (addr
) == REG
|| GET_CODE (addr
) == SUBREG
)
2123 else if (GET_CODE (addr
) == PLUS
)
2125 rtx op0
= XEXP (addr
, 0);
2126 rtx op1
= XEXP (addr
, 1);
2127 enum rtx_code code0
= GET_CODE (op0
);
2128 enum rtx_code code1
= GET_CODE (op1
);
2130 if (code0
== REG
|| code0
== SUBREG
)
2132 if (code1
== REG
|| code1
== SUBREG
)
2133 index
= op0
, base
= op1
; /* index + base */
2135 base
= op0
, disp
= op1
; /* base + displacement */
2137 else if (code0
== MULT
)
2139 index
= XEXP (op0
, 0);
2140 scale_rtx
= XEXP (op0
, 1);
2141 if (code1
== REG
|| code1
== SUBREG
)
2142 base
= op1
; /* index*scale + base */
2144 disp
= op1
; /* index*scale + disp */
2146 else if (code0
== PLUS
&& GET_CODE (XEXP (op0
, 0)) == MULT
)
2148 index
= XEXP (XEXP (op0
, 0), 0); /* index*scale + base + disp */
2149 scale_rtx
= XEXP (XEXP (op0
, 0), 1);
2150 base
= XEXP (op0
, 1);
2153 else if (code0
== PLUS
)
2155 index
= XEXP (op0
, 0); /* index + base + disp */
2156 base
= XEXP (op0
, 1);
2162 else if (GET_CODE (addr
) == MULT
)
2164 index
= XEXP (addr
, 0); /* index*scale */
2165 scale_rtx
= XEXP (addr
, 1);
2167 else if (GET_CODE (addr
) == ASHIFT
)
2171 /* We're called for lea too, which implements ashift on occasion. */
2172 index
= XEXP (addr
, 0);
2173 tmp
= XEXP (addr
, 1);
2174 if (GET_CODE (tmp
) != CONST_INT
)
2176 scale
= INTVAL (tmp
);
2177 if ((unsigned HOST_WIDE_INT
) scale
> 3)
2182 disp
= addr
; /* displacement */
2184 /* Extract the integral value of scale. */
2187 if (GET_CODE (scale_rtx
) != CONST_INT
)
2189 scale
= INTVAL (scale_rtx
);
2192 /* Allow arg pointer and stack pointer as index if there is not scaling */
2193 if (base
&& index
&& scale
== 1
2194 && (index
== arg_pointer_rtx
|| index
== frame_pointer_rtx
2195 || index
== stack_pointer_rtx
))
2202 /* Special case: %ebp cannot be encoded as a base without a displacement. */
2203 if ((base
== hard_frame_pointer_rtx
2204 || base
== frame_pointer_rtx
2205 || base
== arg_pointer_rtx
) && !disp
)
2208 /* Special case: on K6, [%esi] makes the instruction vector decoded.
2209 Avoid this by transforming to [%esi+0]. */
2210 if (ix86_cpu
== PROCESSOR_K6
&& !optimize_size
2211 && base
&& !index
&& !disp
2213 && REGNO_REG_CLASS (REGNO (base
)) == SIREG
)
2216 /* Special case: encode reg+reg instead of reg*2. */
2217 if (!base
&& index
&& scale
&& scale
== 2)
2218 base
= index
, scale
= 1;
2220 /* Special case: scaling cannot be encoded without base or displacement. */
2221 if (!base
&& !disp
&& index
&& scale
!= 1)
2232 /* Determine if a given CONST RTX is a valid memory displacement
2236 legitimate_pic_address_disp_p (disp
)
2239 if (GET_CODE (disp
) != CONST
)
2241 disp
= XEXP (disp
, 0);
2243 if (GET_CODE (disp
) == PLUS
)
2245 if (GET_CODE (XEXP (disp
, 1)) != CONST_INT
)
2247 disp
= XEXP (disp
, 0);
2250 if (GET_CODE (disp
) != UNSPEC
2251 || XVECLEN (disp
, 0) != 1)
2254 /* Must be @GOT or @GOTOFF. */
2255 if (XINT (disp
, 1) != 6
2256 && XINT (disp
, 1) != 7)
2259 if (GET_CODE (XVECEXP (disp
, 0, 0)) != SYMBOL_REF
2260 && GET_CODE (XVECEXP (disp
, 0, 0)) != LABEL_REF
)
2266 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
2267 memory address for an instruction. The MODE argument is the machine mode
2268 for the MEM expression that wants to use this address.
2270 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
2271 convert common non-canonical forms to canonical form so that they will
2275 legitimate_address_p (mode
, addr
, strict
)
2276 enum machine_mode mode
;
2280 struct ix86_address parts
;
2281 rtx base
, index
, disp
;
2282 HOST_WIDE_INT scale
;
2283 const char *reason
= NULL
;
2284 rtx reason_rtx
= NULL_RTX
;
2286 if (TARGET_DEBUG_ADDR
)
2289 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
2290 GET_MODE_NAME (mode
), strict
);
2294 if (! ix86_decompose_address (addr
, &parts
))
2296 reason
= "decomposition failed";
2301 index
= parts
.index
;
2303 scale
= parts
.scale
;
2305 /* Validate base register.
2307 Don't allow SUBREG's here, it can lead to spill failures when the base
2308 is one word out of a two word structure, which is represented internally
2315 if (GET_CODE (base
) != REG
)
2317 reason
= "base is not a register";
2321 if (GET_MODE (base
) != Pmode
)
2323 reason
= "base is not in Pmode";
2327 if ((strict
&& ! REG_OK_FOR_BASE_STRICT_P (base
))
2328 || (! strict
&& ! REG_OK_FOR_BASE_NONSTRICT_P (base
)))
2330 reason
= "base is not valid";
2335 /* Validate index register.
2337 Don't allow SUBREG's here, it can lead to spill failures when the index
2338 is one word out of a two word structure, which is represented internally
2345 if (GET_CODE (index
) != REG
)
2347 reason
= "index is not a register";
2351 if (GET_MODE (index
) != Pmode
)
2353 reason
= "index is not in Pmode";
2357 if ((strict
&& ! REG_OK_FOR_INDEX_STRICT_P (index
))
2358 || (! strict
&& ! REG_OK_FOR_INDEX_NONSTRICT_P (index
)))
2360 reason
= "index is not valid";
2365 /* Validate scale factor. */
2368 reason_rtx
= GEN_INT (scale
);
2371 reason
= "scale without index";
2375 if (scale
!= 2 && scale
!= 4 && scale
!= 8)
2377 reason
= "scale is not a valid multiplier";
2382 /* Validate displacement. */
2387 if (!CONSTANT_ADDRESS_P (disp
))
2389 reason
= "displacement is not constant";
2393 if (GET_CODE (disp
) == CONST_DOUBLE
)
2395 reason
= "displacement is a const_double";
2399 if (flag_pic
&& SYMBOLIC_CONST (disp
))
2401 if (! legitimate_pic_address_disp_p (disp
))
2403 reason
= "displacement is an invalid pic construct";
2407 /* Verify that a symbolic pic displacement includes
2408 the pic_offset_table_rtx register. */
2409 if (base
!= pic_offset_table_rtx
2410 && (index
!= pic_offset_table_rtx
|| scale
!= 1))
2412 reason
= "pic displacement against invalid base";
2416 else if (HALF_PIC_P ())
2418 if (! HALF_PIC_ADDRESS_P (disp
)
2419 || (base
!= NULL_RTX
|| index
!= NULL_RTX
))
2421 reason
= "displacement is an invalid half-pic reference";
2427 /* Everything looks valid. */
2428 if (TARGET_DEBUG_ADDR
)
2429 fprintf (stderr
, "Success.\n");
2433 if (TARGET_DEBUG_ADDR
)
2435 fprintf (stderr
, "Error: %s\n", reason
);
2436 debug_rtx (reason_rtx
);
2441 /* Return a legitimate reference for ORIG (an address) using the
2442 register REG. If REG is 0, a new pseudo is generated.
2444 There are two types of references that must be handled:
2446 1. Global data references must load the address from the GOT, via
2447 the PIC reg. An insn is emitted to do this load, and the reg is
2450 2. Static data references, constant pool addresses, and code labels
2451 compute the address as an offset from the GOT, whose base is in
2452 the PIC reg. Static data objects have SYMBOL_REF_FLAG set to
2453 differentiate them from global data objects. The returned
2454 address is the PIC reg + an unspec constant.
2456 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
2457 reg also appears in the address. */
2460 legitimize_pic_address (orig
, reg
)
2468 if (GET_CODE (addr
) == LABEL_REF
2469 || (GET_CODE (addr
) == SYMBOL_REF
2470 && (CONSTANT_POOL_ADDRESS_P (addr
)
2471 || SYMBOL_REF_FLAG (addr
))))
2473 /* This symbol may be referenced via a displacement from the PIC
2474 base address (@GOTOFF). */
2476 current_function_uses_pic_offset_table
= 1;
2477 new = gen_rtx_UNSPEC (VOIDmode
, gen_rtvec (1, addr
), 7);
2478 new = gen_rtx_CONST (VOIDmode
, new);
2479 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
2483 emit_move_insn (reg
, new);
2487 else if (GET_CODE (addr
) == SYMBOL_REF
)
2489 /* This symbol must be referenced via a load from the
2490 Global Offset Table (@GOT). */
2492 current_function_uses_pic_offset_table
= 1;
2493 new = gen_rtx_UNSPEC (VOIDmode
, gen_rtvec (1, addr
), 6);
2494 new = gen_rtx_CONST (VOIDmode
, new);
2495 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
2496 new = gen_rtx_MEM (Pmode
, new);
2497 RTX_UNCHANGING_P (new) = 1;
2500 reg
= gen_reg_rtx (Pmode
);
2501 emit_move_insn (reg
, new);
2506 if (GET_CODE (addr
) == CONST
)
2508 addr
= XEXP (addr
, 0);
2509 if (GET_CODE (addr
) == UNSPEC
)
2511 /* Check that the unspec is one of the ones we generate? */
2513 else if (GET_CODE (addr
) != PLUS
)
2516 if (GET_CODE (addr
) == PLUS
)
2518 rtx op0
= XEXP (addr
, 0), op1
= XEXP (addr
, 1);
2520 /* Check first to see if this is a constant offset from a @GOTOFF
2521 symbol reference. */
2522 if ((GET_CODE (op0
) == LABEL_REF
2523 || (GET_CODE (op0
) == SYMBOL_REF
2524 && (CONSTANT_POOL_ADDRESS_P (op0
)
2525 || SYMBOL_REF_FLAG (op0
))))
2526 && GET_CODE (op1
) == CONST_INT
)
2528 current_function_uses_pic_offset_table
= 1;
2529 new = gen_rtx_UNSPEC (VOIDmode
, gen_rtvec (1, op0
), 7);
2530 new = gen_rtx_PLUS (VOIDmode
, new, op1
);
2531 new = gen_rtx_CONST (VOIDmode
, new);
2532 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
2536 emit_move_insn (reg
, new);
2542 base
= legitimize_pic_address (XEXP (addr
, 0), reg
);
2543 new = legitimize_pic_address (XEXP (addr
, 1),
2544 base
== reg
? NULL_RTX
: reg
);
2546 if (GET_CODE (new) == CONST_INT
)
2547 new = plus_constant (base
, INTVAL (new));
2550 if (GET_CODE (new) == PLUS
&& CONSTANT_P (XEXP (new, 1)))
2552 base
= gen_rtx_PLUS (Pmode
, base
, XEXP (new, 0));
2553 new = XEXP (new, 1);
2555 new = gen_rtx_PLUS (Pmode
, base
, new);
2563 /* Try machine-dependent ways of modifying an illegitimate address
2564 to be legitimate. If we find one, return the new, valid address.
2565 This macro is used in only one place: `memory_address' in explow.c.
2567 OLDX is the address as it was before break_out_memory_refs was called.
2568 In some cases it is useful to look at this to decide what needs to be done.
2570 MODE and WIN are passed so that this macro can use
2571 GO_IF_LEGITIMATE_ADDRESS.
2573 It is always safe for this macro to do nothing. It exists to recognize
2574 opportunities to optimize the output.
2576 For the 80386, we handle X+REG by loading X into a register R and
2577 using R+REG. R will go in a general reg and indexing will be used.
2578 However, if REG is a broken-out memory address or multiplication,
2579 nothing needs to be done because REG can certainly go in a general reg.
2581 When -fpic is used, special handling is needed for symbolic references.
2582 See comments by legitimize_pic_address in i386.c for details. */
2585 legitimize_address (x
, oldx
, mode
)
2587 register rtx oldx ATTRIBUTE_UNUSED
;
2588 enum machine_mode mode
;
2593 if (TARGET_DEBUG_ADDR
)
2595 fprintf (stderr
, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
2596 GET_MODE_NAME (mode
));
2600 if (flag_pic
&& SYMBOLIC_CONST (x
))
2601 return legitimize_pic_address (x
, 0);
2603 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
2604 if (GET_CODE (x
) == ASHIFT
2605 && GET_CODE (XEXP (x
, 1)) == CONST_INT
2606 && (log
= (unsigned)exact_log2 (INTVAL (XEXP (x
, 1)))) < 4)
2609 x
= gen_rtx_MULT (Pmode
, force_reg (Pmode
, XEXP (x
, 0)),
2610 GEN_INT (1 << log
));
2613 if (GET_CODE (x
) == PLUS
)
2615 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
2617 if (GET_CODE (XEXP (x
, 0)) == ASHIFT
2618 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == CONST_INT
2619 && (log
= (unsigned)exact_log2 (INTVAL (XEXP (XEXP (x
, 0), 1)))) < 4)
2622 XEXP (x
, 0) = gen_rtx_MULT (Pmode
,
2623 force_reg (Pmode
, XEXP (XEXP (x
, 0), 0)),
2624 GEN_INT (1 << log
));
2627 if (GET_CODE (XEXP (x
, 1)) == ASHIFT
2628 && GET_CODE (XEXP (XEXP (x
, 1), 1)) == CONST_INT
2629 && (log
= (unsigned)exact_log2 (INTVAL (XEXP (XEXP (x
, 1), 1)))) < 4)
2632 XEXP (x
, 1) = gen_rtx_MULT (Pmode
,
2633 force_reg (Pmode
, XEXP (XEXP (x
, 1), 0)),
2634 GEN_INT (1 << log
));
2637 /* Put multiply first if it isn't already. */
2638 if (GET_CODE (XEXP (x
, 1)) == MULT
)
2640 rtx tmp
= XEXP (x
, 0);
2641 XEXP (x
, 0) = XEXP (x
, 1);
2646 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
2647 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
2648 created by virtual register instantiation, register elimination, and
2649 similar optimizations. */
2650 if (GET_CODE (XEXP (x
, 0)) == MULT
&& GET_CODE (XEXP (x
, 1)) == PLUS
)
2653 x
= gen_rtx_PLUS (Pmode
,
2654 gen_rtx_PLUS (Pmode
, XEXP (x
, 0),
2655 XEXP (XEXP (x
, 1), 0)),
2656 XEXP (XEXP (x
, 1), 1));
2660 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
2661 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
2662 else if (GET_CODE (x
) == PLUS
&& GET_CODE (XEXP (x
, 0)) == PLUS
2663 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
2664 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == PLUS
2665 && CONSTANT_P (XEXP (x
, 1)))
2668 rtx other
= NULL_RTX
;
2670 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
2672 constant
= XEXP (x
, 1);
2673 other
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
2675 else if (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 1), 1)) == CONST_INT
)
2677 constant
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
2678 other
= XEXP (x
, 1);
2686 x
= gen_rtx_PLUS (Pmode
,
2687 gen_rtx_PLUS (Pmode
, XEXP (XEXP (x
, 0), 0),
2688 XEXP (XEXP (XEXP (x
, 0), 1), 0)),
2689 plus_constant (other
, INTVAL (constant
)));
2693 if (changed
&& legitimate_address_p (mode
, x
, FALSE
))
2696 if (GET_CODE (XEXP (x
, 0)) == MULT
)
2699 XEXP (x
, 0) = force_operand (XEXP (x
, 0), 0);
2702 if (GET_CODE (XEXP (x
, 1)) == MULT
)
2705 XEXP (x
, 1) = force_operand (XEXP (x
, 1), 0);
2709 && GET_CODE (XEXP (x
, 1)) == REG
2710 && GET_CODE (XEXP (x
, 0)) == REG
)
2713 if (flag_pic
&& SYMBOLIC_CONST (XEXP (x
, 1)))
2716 x
= legitimize_pic_address (x
, 0);
2719 if (changed
&& legitimate_address_p (mode
, x
, FALSE
))
2722 if (GET_CODE (XEXP (x
, 0)) == REG
)
2724 register rtx temp
= gen_reg_rtx (Pmode
);
2725 register rtx val
= force_operand (XEXP (x
, 1), temp
);
2727 emit_move_insn (temp
, val
);
2733 else if (GET_CODE (XEXP (x
, 1)) == REG
)
2735 register rtx temp
= gen_reg_rtx (Pmode
);
2736 register rtx val
= force_operand (XEXP (x
, 0), temp
);
2738 emit_move_insn (temp
, val
);
2748 /* Print an integer constant expression in assembler syntax. Addition
2749 and subtraction are the only arithmetic that may appear in these
2750 expressions. FILE is the stdio stream to write to, X is the rtx, and
2751 CODE is the operand print code from the output string. */
2754 output_pic_addr_const (file
, x
, code
)
2761 switch (GET_CODE (x
))
2771 assemble_name (file
, XSTR (x
, 0));
2772 if (code
== 'P' && ! SYMBOL_REF_FLAG (x
))
2773 fputs ("@PLT", file
);
2780 ASM_GENERATE_INTERNAL_LABEL (buf
, "L", CODE_LABEL_NUMBER (x
));
2781 assemble_name (asm_out_file
, buf
);
2785 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
2789 /* This used to output parentheses around the expression,
2790 but that does not work on the 386 (either ATT or BSD assembler). */
2791 output_pic_addr_const (file
, XEXP (x
, 0), code
);
2795 if (GET_MODE (x
) == VOIDmode
)
2797 /* We can use %d if the number is <32 bits and positive. */
2798 if (CONST_DOUBLE_HIGH (x
) || CONST_DOUBLE_LOW (x
) < 0)
2799 fprintf (file
, "0x%lx%08lx",
2800 (unsigned long) CONST_DOUBLE_HIGH (x
),
2801 (unsigned long) CONST_DOUBLE_LOW (x
));
2803 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, CONST_DOUBLE_LOW (x
));
2806 /* We can't handle floating point constants;
2807 PRINT_OPERAND must handle them. */
2808 output_operand_lossage ("floating constant misused");
2812 /* Some assemblers need integer constants to appear first. */
2813 if (GET_CODE (XEXP (x
, 0)) == CONST_INT
)
2815 output_pic_addr_const (file
, XEXP (x
, 0), code
);
2817 output_pic_addr_const (file
, XEXP (x
, 1), code
);
2819 else if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
2821 output_pic_addr_const (file
, XEXP (x
, 1), code
);
2823 output_pic_addr_const (file
, XEXP (x
, 0), code
);
2830 putc (ASSEMBLER_DIALECT
? '(' : '[', file
);
2831 output_pic_addr_const (file
, XEXP (x
, 0), code
);
2833 output_pic_addr_const (file
, XEXP (x
, 1), code
);
2834 putc (ASSEMBLER_DIALECT
? ')' : ']', file
);
2838 if (XVECLEN (x
, 0) != 1)
2840 output_pic_addr_const (file
, XVECEXP (x
, 0, 0), code
);
2841 switch (XINT (x
, 1))
2844 fputs ("@GOT", file
);
2847 fputs ("@GOTOFF", file
);
2850 fputs ("@PLT", file
);
2853 output_operand_lossage ("invalid UNSPEC as operand");
2859 output_operand_lossage ("invalid expression as operand");
2863 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
2864 We need to handle our special PIC relocations. */
2867 i386_dwarf_output_addr_const (file
, x
)
2871 fprintf (file
, "\t%s\t", INT_ASM_OP
);
2873 output_pic_addr_const (file
, x
, '\0');
2875 output_addr_const (file
, x
);
2879 /* In the name of slightly smaller debug output, and to cater to
2880 general assembler losage, recognize PIC+GOTOFF and turn it back
2881 into a direct symbol reference. */
2884 i386_simplify_dwarf_addr (orig_x
)
2889 if (GET_CODE (x
) != PLUS
2890 || GET_CODE (XEXP (x
, 0)) != REG
2891 || GET_CODE (XEXP (x
, 1)) != CONST
)
2894 x
= XEXP (XEXP (x
, 1), 0);
2895 if (GET_CODE (x
) == UNSPEC
2896 && XINT (x
, 1) == 7)
2897 return XVECEXP (x
, 0, 0);
2899 if (GET_CODE (x
) == PLUS
2900 && GET_CODE (XEXP (x
, 0)) == UNSPEC
2901 && GET_CODE (XEXP (x
, 1)) == CONST_INT
2902 && XINT (XEXP (x
, 0), 1) == 7)
2903 return gen_rtx_PLUS (VOIDmode
, XVECEXP (XEXP (x
, 0), 0, 0), XEXP (x
, 1));
2909 put_condition_code (code
, mode
, reverse
, fp
, file
)
2911 enum machine_mode mode
;
2918 code
= reverse_condition (code
);
2929 if (mode
== CCNOmode
)
2934 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
2935 Those same assemblers have the same but opposite losage on cmov. */
2936 suffix
= fp
? "nbe" : "a";
2939 if (mode
== CCNOmode
)
2948 if (mode
== CCNOmode
)
2955 suffix
= fp
? "nb" : "ae";
2958 if (mode
== CCNOmode
)
2968 fputs (suffix
, file
);
2972 print_reg (x
, code
, file
)
2977 if (REGNO (x
) == ARG_POINTER_REGNUM
2978 || REGNO (x
) == FRAME_POINTER_REGNUM
2979 || REGNO (x
) == FLAGS_REG
2980 || REGNO (x
) == FPSR_REG
)
2983 if (ASSEMBLER_DIALECT
== 0 || USER_LABEL_PREFIX
[0] == 0)
2988 else if (code
== 'b')
2990 else if (code
== 'k')
2992 else if (code
== 'y')
2994 else if (code
== 'h')
2997 code
= GET_MODE_SIZE (GET_MODE (x
));
3002 if (STACK_TOP_P (x
))
3004 fputs ("st(0)", file
);
3015 fputs (hi_reg_name
[REGNO (x
)], file
);
3018 fputs (qi_reg_name
[REGNO (x
)], file
);
3021 fputs (qi_high_reg_name
[REGNO (x
)], file
);
3029 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
3030 C -- print opcode suffix for set/cmov insn.
3031 c -- like C, but print reversed condition
3032 R -- print the prefix for register names.
3033 z -- print the opcode suffix for the size of the current operand.
3034 * -- print a star (in certain assembler syntax)
3035 w -- print the operand as if it's a "word" (HImode) even if it isn't.
3036 s -- print a shift double count, followed by the assemblers argument
3038 b -- print the QImode name of the register for the indicated operand.
3039 %b0 would print %al if operands[0] is reg 0.
3040 w -- likewise, print the HImode name of the register.
3041 k -- likewise, print the SImode name of the register.
3042 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
3043 y -- print "st(0)" instead of "st" as a register. */
3046 print_operand (file
, x
, code
)
3056 if (ASSEMBLER_DIALECT
== 0)
3061 if (ASSEMBLER_DIALECT
== 0)
3066 if (ASSEMBLER_DIALECT
== 0)
3071 if (ASSEMBLER_DIALECT
== 0)
3076 if (ASSEMBLER_DIALECT
== 0)
3081 if (ASSEMBLER_DIALECT
== 0)
3086 if (ASSEMBLER_DIALECT
== 0)
3091 /* 387 opcodes don't get size suffixes if the operands are
3094 if (STACK_REG_P (x
))
3097 /* Intel syntax has no truck with instruction suffixes. */
3098 if (ASSEMBLER_DIALECT
!= 0)
3101 /* this is the size of op from size of operand */
3102 switch (GET_MODE_SIZE (GET_MODE (x
)))
3113 if (GET_MODE (x
) == SFmode
)
3127 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
3129 #ifdef GAS_MNEMONICS
3151 if (GET_CODE (x
) == CONST_INT
|| ! SHIFT_DOUBLE_OMITS_COUNT
)
3153 PRINT_OPERAND (file
, x
, 0);
3159 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 0, 0, file
);
3162 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 0, 1, file
);
3165 /* Like above, but reverse condition */
3167 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 1, 0, file
);
3170 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 1, 1, file
);
3176 sprintf (str
, "invalid operand code `%c'", code
);
3177 output_operand_lossage (str
);
3182 if (GET_CODE (x
) == REG
)
3184 PRINT_REG (x
, code
, file
);
3187 else if (GET_CODE (x
) == MEM
)
3189 /* No `byte ptr' prefix for call instructions. */
3190 if (ASSEMBLER_DIALECT
!= 0 && code
!= 'X' && code
!= 'P')
3193 switch (GET_MODE_SIZE (GET_MODE (x
)))
3195 case 1: size
= "BYTE"; break;
3196 case 2: size
= "WORD"; break;
3197 case 4: size
= "DWORD"; break;
3198 case 8: size
= "QWORD"; break;
3199 case 12: size
= "XWORD"; break;
3204 fputs (" PTR ", file
);
3208 if (flag_pic
&& CONSTANT_ADDRESS_P (x
))
3209 output_pic_addr_const (file
, x
, code
);
3214 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == SFmode
)
3219 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
3220 REAL_VALUE_TO_TARGET_SINGLE (r
, l
);
3222 if (ASSEMBLER_DIALECT
== 0)
3224 fprintf (file
, "0x%lx", l
);
3227 /* These float cases don't actually occur as immediate operands. */
3228 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == DFmode
)
3233 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
3234 REAL_VALUE_TO_DECIMAL (r
, "%.22e", dstr
);
3235 fprintf (file
, "%s", dstr
);
3238 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == XFmode
)
3243 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
3244 REAL_VALUE_TO_DECIMAL (r
, "%.22e", dstr
);
3245 fprintf (file
, "%s", dstr
);
3251 if (GET_CODE (x
) == CONST_INT
|| GET_CODE (x
) == CONST_DOUBLE
)
3253 if (ASSEMBLER_DIALECT
== 0)
3256 else if (GET_CODE (x
) == CONST
|| GET_CODE (x
) == SYMBOL_REF
3257 || GET_CODE (x
) == LABEL_REF
)
3259 if (ASSEMBLER_DIALECT
== 0)
3262 fputs ("OFFSET FLAT:", file
);
3265 if (GET_CODE (x
) == CONST_INT
)
3266 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
3268 output_pic_addr_const (file
, x
, code
);
3270 output_addr_const (file
, x
);
3274 /* Print a memory operand whose address is ADDR. */
3277 print_operand_address (file
, addr
)
3281 struct ix86_address parts
;
3282 rtx base
, index
, disp
;
3285 if (! ix86_decompose_address (addr
, &parts
))
3289 index
= parts
.index
;
3291 scale
= parts
.scale
;
3293 if (!base
&& !index
)
3295 /* Displacement only requires special attention. */
3297 if (GET_CODE (disp
) == CONST_INT
)
3299 if (ASSEMBLER_DIALECT
!= 0)
3300 fputs ("ds:", file
);
3301 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (addr
));
3304 output_pic_addr_const (file
, addr
, 0);
3306 output_addr_const (file
, addr
);
3310 if (ASSEMBLER_DIALECT
== 0)
3315 output_pic_addr_const (file
, disp
, 0);
3316 else if (GET_CODE (disp
) == LABEL_REF
)
3317 output_asm_label (disp
);
3319 output_addr_const (file
, disp
);
3324 PRINT_REG (base
, 0, file
);
3328 PRINT_REG (index
, 0, file
);
3330 fprintf (file
, ",%d", scale
);
3336 rtx offset
= NULL_RTX
;
3340 /* Pull out the offset of a symbol; print any symbol itself. */
3341 if (GET_CODE (disp
) == CONST
3342 && GET_CODE (XEXP (disp
, 0)) == PLUS
3343 && GET_CODE (XEXP (XEXP (disp
, 0), 1)) == CONST_INT
)
3345 offset
= XEXP (XEXP (disp
, 0), 1);
3346 disp
= gen_rtx_CONST (VOIDmode
,
3347 XEXP (XEXP (disp
, 0), 0));
3351 output_pic_addr_const (file
, disp
, 0);
3352 else if (GET_CODE (disp
) == LABEL_REF
)
3353 output_asm_label (disp
);
3354 else if (GET_CODE (disp
) == CONST_INT
)
3357 output_addr_const (file
, disp
);
3363 PRINT_REG (base
, 0, file
);
3366 if (INTVAL (offset
) >= 0)
3368 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
3372 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
3379 PRINT_REG (index
, 0, file
);
3381 fprintf (file
, "*%d", scale
);
3388 /* Split one or more DImode RTL references into pairs of SImode
3389 references. The RTL can be REG, offsettable MEM, integer constant, or
3390 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
3391 split and "num" is its length. lo_half and hi_half are output arrays
3392 that parallel "operands". */
3395 split_di (operands
, num
, lo_half
, hi_half
)
3398 rtx lo_half
[], hi_half
[];
3402 rtx op
= operands
[num
];
3403 if (CONSTANT_P (op
))
3404 split_double (op
, &lo_half
[num
], &hi_half
[num
]);
3405 else if (! reload_completed
)
3407 lo_half
[num
] = gen_lowpart (SImode
, op
);
3408 hi_half
[num
] = gen_highpart (SImode
, op
);
3410 else if (GET_CODE (op
) == REG
)
3412 lo_half
[num
] = gen_rtx_REG (SImode
, REGNO (op
));
3413 hi_half
[num
] = gen_rtx_REG (SImode
, REGNO (op
) + 1);
3415 else if (offsettable_memref_p (op
))
3417 rtx lo_addr
= XEXP (op
, 0);
3418 rtx hi_addr
= XEXP (adj_offsettable_operand (op
, 4), 0);
3419 lo_half
[num
] = change_address (op
, SImode
, lo_addr
);
3420 hi_half
[num
] = change_address (op
, SImode
, hi_addr
);
3427 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
3428 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
3429 is the expression of the binary operation. The output may either be
3430 emitted here, or returned to the caller, like all output_* functions.
3432 There is no guarantee that the operands are the same mode, as they
3433 might be within FLOAT or FLOAT_EXTEND expressions. */
3436 output_387_binary_op (insn
, operands
)
3440 static char buf
[100];
3444 switch (GET_CODE (operands
[3]))
3447 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
3448 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
3455 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
3456 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
3463 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
3464 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
3471 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
3472 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
3484 switch (GET_CODE (operands
[3]))
3488 if (REG_P (operands
[2]) && REGNO (operands
[0]) == REGNO (operands
[2]))
3491 operands
[2] = operands
[1];
3495 if (GET_CODE (operands
[2]) == MEM
)
3501 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
3503 if (STACK_TOP_P (operands
[0]))
3504 p
= "p\t{%0,%2|%2, %0}";
3506 p
= "p\t{%2,%0|%0, %2}";
3510 if (STACK_TOP_P (operands
[0]))
3511 p
= "\t{%y2,%0|%0, %y2}";
3513 p
= "\t{%2,%0|%0, %2}";
3518 if (GET_CODE (operands
[1]) == MEM
)
3524 if (GET_CODE (operands
[2]) == MEM
)
3530 if (! STACK_REG_P (operands
[1]) || ! STACK_REG_P (operands
[2]))
3533 /* Note that the Unixware assembler, and the AT&T assembler before
3534 that, are confusingly not reversed from Intel syntax in this
3536 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
3538 if (STACK_TOP_P (operands
[0]))
3545 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
3547 if (STACK_TOP_P (operands
[0]))
3554 if (STACK_TOP_P (operands
[0]))
3556 if (STACK_TOP_P (operands
[1]))
3562 else if (STACK_TOP_P (operands
[1]))
3576 /* Output code for INSN to convert a float to a signed int. OPERANDS
3577 are the insn operands. The output may be [SD]Imode and the input
3578 operand may be [SDX]Fmode. */
3581 output_fix_trunc (insn
, operands
)
3585 int stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
3586 int dimode_p
= GET_MODE (operands
[0]) == DImode
;
3589 /* Jump through a hoop or two for DImode, since the hardware has no
3590 non-popping instruction. We used to do this a different way, but
3591 that was somewhat fragile and broke with post-reload splitters. */
3592 if (dimode_p
&& !stack_top_dies
)
3593 output_asm_insn ("fld\t%y1", operands
);
3595 if (! STACK_TOP_P (operands
[1]))
3598 xops
[0] = GEN_INT (12);
3599 xops
[1] = adj_offsettable_operand (operands
[2], 1);
3600 xops
[1] = change_address (xops
[1], QImode
, NULL_RTX
);
3602 xops
[2] = operands
[0];
3603 if (GET_CODE (operands
[0]) != MEM
)
3604 xops
[2] = operands
[3];
3606 output_asm_insn ("fnstcw\t%2", operands
);
3607 output_asm_insn ("mov{l}\t{%2, %4|%4, %2}", operands
);
3608 output_asm_insn ("mov{b}\t{%0, %1|%1, %0}", xops
);
3609 output_asm_insn ("fldcw\t%2", operands
);
3610 output_asm_insn ("mov{l}\t{%4, %2|%2, %4}", operands
);
3612 if (stack_top_dies
|| dimode_p
)
3613 output_asm_insn ("fistp%z2\t%2", xops
);
3615 output_asm_insn ("fist%z2\t%2", xops
);
3617 output_asm_insn ("fldcw\t%2", operands
);
3619 if (GET_CODE (operands
[0]) != MEM
)
3623 split_di (operands
+0, 1, xops
+0, xops
+1);
3624 split_di (operands
+3, 1, xops
+2, xops
+3);
3625 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops
);
3626 output_asm_insn ("mov{l}\t{%3, %1|%1, %3}", xops
);
3629 output_asm_insn ("mov{l}\t{%3,%0|%0, %3}", operands
);
3635 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
3636 should be used and 2 when fnstsw should be used. UNORDERED_P is true
3637 when fucom should be used. */
3640 output_fp_compare (insn
, operands
, eflags_p
, unordered_p
)
3643 int eflags_p
, unordered_p
;
3646 rtx cmp_op0
= operands
[0];
3647 rtx cmp_op1
= operands
[1];
3652 cmp_op1
= operands
[2];
3655 if (! STACK_TOP_P (cmp_op0
))
3658 stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
3660 if (STACK_REG_P (cmp_op1
)
3662 && find_regno_note (insn
, REG_DEAD
, REGNO (cmp_op1
))
3663 && REGNO (cmp_op1
) != FIRST_STACK_REG
)
3665 /* If both the top of the 387 stack dies, and the other operand
3666 is also a stack register that dies, then this must be a
3667 `fcompp' float compare */
3671 /* There is no double popping fcomi variant. Fortunately,
3672 eflags is immune from the fstp's cc clobbering. */
3674 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands
);
3676 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands
);
3684 return "fucompp\n\tfnstsw\t%0";
3686 return "fcompp\n\tfnstsw\t%0";
3699 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
3701 static const char * const alt
[24] =
3713 "fcomi\t{%y1, %0|%0, %y1}",
3714 "fcomip\t{%y1, %0|%0, %y1}",
3715 "fucomi\t{%y1, %0|%0, %y1}",
3716 "fucomip\t{%y1, %0|%0, %y1}",
3723 "fcom%z2\t%y2\n\tfnstsw\t%0",
3724 "fcomp%z2\t%y2\n\tfnstsw\t%0",
3725 "fucom%z2\t%y2\n\tfnstsw\t%0",
3726 "fucomp%z2\t%y2\n\tfnstsw\t%0",
3728 "ficom%z2\t%y2\n\tfnstsw\t%0",
3729 "ficomp%z2\t%y2\n\tfnstsw\t%0",
3737 mask
= eflags_p
<< 3;
3738 mask
|= (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
) << 2;
3739 mask
|= unordered_p
<< 1;
3740 mask
|= stack_top_dies
;
3752 /* Output assembler code to FILE to initialize basic-block profiling.
3754 If profile_block_flag == 2
3756 Output code to call the subroutine `__bb_init_trace_func'
3757 and pass two parameters to it. The first parameter is
3758 the address of a block allocated in the object module.
3759 The second parameter is the number of the first basic block
3762 The name of the block is a local symbol made with this statement:
3764 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 0);
3766 Of course, since you are writing the definition of
3767 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
3768 can take a short cut in the definition of this macro and use the
3769 name that you know will result.
3771 The number of the first basic block of the function is
3772 passed to the macro in BLOCK_OR_LABEL.
3774 If described in a virtual assembler language the code to be
3778 parameter2 <- BLOCK_OR_LABEL
3779 call __bb_init_trace_func
3781 else if profile_block_flag != 0
3783 Output code to call the subroutine `__bb_init_func'
3784 and pass one single parameter to it, which is the same
3785 as the first parameter to `__bb_init_trace_func'.
3787 The first word of this parameter is a flag which will be nonzero if
3788 the object module has already been initialized. So test this word
3789 first, and do not call `__bb_init_func' if the flag is nonzero.
3790 Note: When profile_block_flag == 2 the test need not be done
3791 but `__bb_init_trace_func' *must* be called.
3793 BLOCK_OR_LABEL may be used to generate a label number as a
3794 branch destination in case `__bb_init_func' will not be called.
3796 If described in a virtual assembler language the code to be
3807 ix86_output_function_block_profiler (file
, block_or_label
)
3811 static int num_func
= 0;
3813 char block_table
[80], false_label
[80];
3815 ASM_GENERATE_INTERNAL_LABEL (block_table
, "LPBX", 0);
3817 xops
[1] = gen_rtx_SYMBOL_REF (VOIDmode
, block_table
);
3818 xops
[5] = stack_pointer_rtx
;
3819 xops
[7] = gen_rtx_REG (Pmode
, 0); /* eax */
3821 CONSTANT_POOL_ADDRESS_P (xops
[1]) = TRUE
;
3823 switch (profile_block_flag
)
3826 xops
[2] = GEN_INT (block_or_label
);
3827 xops
[3] = gen_rtx_MEM (Pmode
,
3828 gen_rtx_SYMBOL_REF (VOIDmode
, "__bb_init_trace_func"));
3829 xops
[6] = GEN_INT (8);
3831 output_asm_insn ("push{l}\t%2", xops
);
3833 output_asm_insn ("push{l}\t%1", xops
);
3836 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a1}", xops
);
3837 output_asm_insn ("push{l}\t%7", xops
);
3839 output_asm_insn ("call\t%P3", xops
);
3840 output_asm_insn ("add{l}\t{%6, %5|%5, %6}", xops
);
3844 ASM_GENERATE_INTERNAL_LABEL (false_label
, "LPBZ", num_func
);
3846 xops
[0] = const0_rtx
;
3847 xops
[2] = gen_rtx_MEM (Pmode
,
3848 gen_rtx_SYMBOL_REF (VOIDmode
, false_label
));
3849 xops
[3] = gen_rtx_MEM (Pmode
,
3850 gen_rtx_SYMBOL_REF (VOIDmode
, "__bb_init_func"));
3851 xops
[4] = gen_rtx_MEM (Pmode
, xops
[1]);
3852 xops
[6] = GEN_INT (4);
3854 CONSTANT_POOL_ADDRESS_P (xops
[2]) = TRUE
;
3856 output_asm_insn ("cmp{l}\t{%0, %4|%4, %0}", xops
);
3857 output_asm_insn ("jne\t%2", xops
);
3860 output_asm_insn ("push{l}\t%1", xops
);
3863 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a2}", xops
);
3864 output_asm_insn ("push{l}\t%7", xops
);
3866 output_asm_insn ("call\t%P3", xops
);
3867 output_asm_insn ("add{l}\t{%6, %5|%5, %6}", xops
);
3868 ASM_OUTPUT_INTERNAL_LABEL (file
, "LPBZ", num_func
);
3874 /* Output assembler code to FILE to increment a counter associated
3875 with basic block number BLOCKNO.
3877 If profile_block_flag == 2
3879 Output code to initialize the global structure `__bb' and
3880 call the function `__bb_trace_func' which will increment the
3883 `__bb' consists of two words. In the first word the number
3884 of the basic block has to be stored. In the second word
3885 the address of a block allocated in the object module
3888 The basic block number is given by BLOCKNO.
3890 The address of the block is given by the label created with
3892 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 0);
3894 by FUNCTION_BLOCK_PROFILER.
3896 Of course, since you are writing the definition of
3897 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
3898 can take a short cut in the definition of this macro and use the
3899 name that you know will result.
3901 If described in a virtual assembler language the code to be
3904 move BLOCKNO -> (__bb)
3905 move LPBX0 -> (__bb+4)
3906 call __bb_trace_func
3908 Note that function `__bb_trace_func' must not change the
3909 machine state, especially the flag register. To grant
3910 this, you must output code to save and restore registers
3911 either in this macro or in the macros MACHINE_STATE_SAVE
3912 and MACHINE_STATE_RESTORE. The last two macros will be
3913 used in the function `__bb_trace_func', so you must make
3914 sure that the function prologue does not change any
3915 register prior to saving it with MACHINE_STATE_SAVE.
3917 else if profile_block_flag != 0
3919 Output code to increment the counter directly.
3920 Basic blocks are numbered separately from zero within each
3921 compiled object module. The count associated with block number
3922 BLOCKNO is at index BLOCKNO in an array of words; the name of
3923 this array is a local symbol made with this statement:
3925 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 2);
3927 Of course, since you are writing the definition of
3928 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
3929 can take a short cut in the definition of this macro and use the
3930 name that you know will result.
3932 If described in a virtual assembler language the code to be
3935 inc (LPBX2+4*BLOCKNO)
3939 ix86_output_block_profiler (file
, blockno
)
3940 FILE *file ATTRIBUTE_UNUSED
;
3943 rtx xops
[8], cnt_rtx
;
3945 char *block_table
= counts
;
3947 switch (profile_block_flag
)
3950 ASM_GENERATE_INTERNAL_LABEL (block_table
, "LPBX", 0);
3952 xops
[1] = gen_rtx_SYMBOL_REF (VOIDmode
, block_table
);
3953 xops
[2] = GEN_INT (blockno
);
3954 xops
[3] = gen_rtx_MEM (Pmode
,
3955 gen_rtx_SYMBOL_REF (VOIDmode
, "__bb_trace_func"));
3956 xops
[4] = gen_rtx_SYMBOL_REF (VOIDmode
, "__bb");
3957 xops
[5] = plus_constant (xops
[4], 4);
3958 xops
[0] = gen_rtx_MEM (SImode
, xops
[4]);
3959 xops
[6] = gen_rtx_MEM (SImode
, xops
[5]);
3961 CONSTANT_POOL_ADDRESS_P (xops
[1]) = TRUE
;
3963 output_asm_insn ("pushf", xops
);
3964 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops
);
3967 xops
[7] = gen_rtx_REG (Pmode
, 0); /* eax */
3968 output_asm_insn ("push{l}\t%7", xops
);
3969 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a1}", xops
);
3970 output_asm_insn ("mov{l}\t{%7, %6|%6, %7}", xops
);
3971 output_asm_insn ("pop{l}\t%7", xops
);
3974 output_asm_insn ("mov{l}\t{%1, %6|%6, %1}", xops
);
3975 output_asm_insn ("call\t%P3", xops
);
3976 output_asm_insn ("popf", xops
);
3981 ASM_GENERATE_INTERNAL_LABEL (counts
, "LPBX", 2);
3982 cnt_rtx
= gen_rtx_SYMBOL_REF (VOIDmode
, counts
);
3983 SYMBOL_REF_FLAG (cnt_rtx
) = TRUE
;
3986 cnt_rtx
= plus_constant (cnt_rtx
, blockno
*4);
3989 cnt_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, cnt_rtx
);
3991 xops
[0] = gen_rtx_MEM (SImode
, cnt_rtx
);
3992 output_asm_insn ("inc{l}\t%0", xops
);
3999 ix86_expand_move (mode
, operands
)
4000 enum machine_mode mode
;
4003 int strict
= (reload_in_progress
|| reload_completed
);
4006 if (flag_pic
&& mode
== Pmode
&& symbolic_operand (operands
[1], Pmode
))
4008 /* Emit insns to move operands[1] into operands[0]. */
4010 if (GET_CODE (operands
[0]) == MEM
)
4011 operands
[1] = force_reg (Pmode
, operands
[1]);
4014 rtx temp
= operands
[0];
4015 if (GET_CODE (temp
) != REG
)
4016 temp
= gen_reg_rtx (Pmode
);
4017 temp
= legitimize_pic_address (operands
[1], temp
);
4018 if (temp
== operands
[0])
4025 if (GET_CODE (operands
[0]) == MEM
4026 && (GET_MODE (operands
[0]) == QImode
4027 || !push_operand (operands
[0], mode
))
4028 && GET_CODE (operands
[1]) == MEM
)
4029 operands
[1] = force_reg (mode
, operands
[1]);
4031 if (push_operand (operands
[0], mode
)
4032 && ! general_no_elim_operand (operands
[1], mode
))
4033 operands
[1] = copy_to_mode_reg (mode
, operands
[1]);
4035 if (FLOAT_MODE_P (mode
))
4037 /* If we are loading a floating point constant to a register,
4038 force the value to memory now, since we'll get better code
4039 out the back end. */
4043 else if (GET_CODE (operands
[1]) == CONST_DOUBLE
4044 && register_operand (operands
[0], mode
))
4045 operands
[1] = validize_mem (force_const_mem (mode
, operands
[1]));
4049 insn
= gen_rtx_SET (VOIDmode
, operands
[0], operands
[1]);
4054 /* Attempt to expand a binary operator. Make the expansion closer to the
4055 actual machine, then just general_operand, which will allow 3 separate
4056 memory references (one output, two input) in a single insn. */
4059 ix86_expand_binary_operator (code
, mode
, operands
)
4061 enum machine_mode mode
;
4064 int matching_memory
;
4065 rtx src1
, src2
, dst
, op
, clob
;
4071 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
4072 if (GET_RTX_CLASS (code
) == 'c'
4073 && (rtx_equal_p (dst
, src2
)
4074 || immediate_operand (src1
, mode
)))
4081 /* If the destination is memory, and we do not have matching source
4082 operands, do things in registers. */
4083 matching_memory
= 0;
4084 if (GET_CODE (dst
) == MEM
)
4086 if (rtx_equal_p (dst
, src1
))
4087 matching_memory
= 1;
4088 else if (GET_RTX_CLASS (code
) == 'c'
4089 && rtx_equal_p (dst
, src2
))
4090 matching_memory
= 2;
4092 dst
= gen_reg_rtx (mode
);
4095 /* Both source operands cannot be in memory. */
4096 if (GET_CODE (src1
) == MEM
&& GET_CODE (src2
) == MEM
)
4098 if (matching_memory
!= 2)
4099 src2
= force_reg (mode
, src2
);
4101 src1
= force_reg (mode
, src1
);
4104 /* If the operation is not commutable, source 1 cannot be a constant
4105 or non-matching memory. */
4106 if ((CONSTANT_P (src1
)
4107 || (!matching_memory
&& GET_CODE (src1
) == MEM
))
4108 && GET_RTX_CLASS (code
) != 'c')
4109 src1
= force_reg (mode
, src1
);
4111 /* If optimizing, copy to regs to improve CSE */
4112 if (optimize
&& !reload_in_progress
&& !reload_completed
)
4114 if (GET_CODE (dst
) == MEM
)
4115 dst
= gen_reg_rtx (mode
);
4116 if (GET_CODE (src1
) == MEM
)
4117 src1
= force_reg (mode
, src1
);
4118 if (GET_CODE (src2
) == MEM
)
4119 src2
= force_reg (mode
, src2
);
4122 /* Emit the instruction. */
4124 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_ee (code
, mode
, src1
, src2
));
4125 if (reload_in_progress
)
4127 /* Reload doesn't know about the flags register, and doesn't know that
4128 it doesn't want to clobber it. We can only do this with PLUS. */
4135 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
4136 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
4139 /* Fix up the destination if needed. */
4140 if (dst
!= operands
[0])
4141 emit_move_insn (operands
[0], dst
);
4144 /* Return TRUE or FALSE depending on whether the binary operator meets the
4145 appropriate constraints. */
4148 ix86_binary_operator_ok (code
, mode
, operands
)
4150 enum machine_mode mode ATTRIBUTE_UNUSED
;
4153 /* Both source operands cannot be in memory. */
4154 if (GET_CODE (operands
[1]) == MEM
&& GET_CODE (operands
[2]) == MEM
)
4156 /* If the operation is not commutable, source 1 cannot be a constant. */
4157 if (CONSTANT_P (operands
[1]) && GET_RTX_CLASS (code
) != 'c')
4159 /* If the destination is memory, we must have a matching source operand. */
4160 if (GET_CODE (operands
[0]) == MEM
4161 && ! (rtx_equal_p (operands
[0], operands
[1])
4162 || (GET_RTX_CLASS (code
) == 'c'
4163 && rtx_equal_p (operands
[0], operands
[2]))))
4165 /* If the operation is not commutable and the source 1 is memory, we must
4166 have a matching destionation. */
4167 if (GET_CODE (operands
[1]) == MEM
4168 && GET_RTX_CLASS (code
) != 'c'
4169 && ! rtx_equal_p (operands
[0], operands
[1]))
4174 /* Attempt to expand a unary operator. Make the expansion closer to the
4175 actual machine, then just general_operand, which will allow 2 separate
4176 memory references (one output, one input) in a single insn. */
4179 ix86_expand_unary_operator (code
, mode
, operands
)
4181 enum machine_mode mode
;
4184 int matching_memory
;
4185 rtx src
, dst
, op
, clob
;
4190 /* If the destination is memory, and we do not have matching source
4191 operands, do things in registers. */
4192 matching_memory
= 0;
4193 if (GET_CODE (dst
) == MEM
)
4195 if (rtx_equal_p (dst
, src
))
4196 matching_memory
= 1;
4198 dst
= gen_reg_rtx (mode
);
4201 /* When source operand is memory, destination must match. */
4202 if (!matching_memory
&& GET_CODE (src
) == MEM
)
4203 src
= force_reg (mode
, src
);
4205 /* If optimizing, copy to regs to improve CSE */
4206 if (optimize
&& !reload_in_progress
&& !reload_completed
)
4208 if (GET_CODE (dst
) == MEM
)
4209 dst
= gen_reg_rtx (mode
);
4210 if (GET_CODE (src
) == MEM
)
4211 src
= force_reg (mode
, src
);
4214 /* Emit the instruction. */
4216 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_e (code
, mode
, src
));
4217 if (reload_in_progress
|| code
== NOT
)
4219 /* Reload doesn't know about the flags register, and doesn't know that
4220 it doesn't want to clobber it. */
4227 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
4228 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
4231 /* Fix up the destination if needed. */
4232 if (dst
!= operands
[0])
4233 emit_move_insn (operands
[0], dst
);
4236 /* Return TRUE or FALSE depending on whether the unary operator meets the
4237 appropriate constraints. */
4240 ix86_unary_operator_ok (code
, mode
, operands
)
4241 enum rtx_code code ATTRIBUTE_UNUSED
;
4242 enum machine_mode mode ATTRIBUTE_UNUSED
;
4243 rtx operands
[2] ATTRIBUTE_UNUSED
;
4245 /* If one of operands is memory, source and destination must match. */
4246 if ((GET_CODE (operands
[0]) == MEM
4247 || GET_CODE (operands
[1]) == MEM
)
4248 && ! rtx_equal_p (operands
[0], operands
[1]))
4253 /* Produce an unsigned comparison for a given signed comparison. */
4255 static enum rtx_code
4256 unsigned_comparison (code
)
4286 /* Generate insn patterns to do an integer compare of OPERANDS. */
4289 ix86_expand_int_compare (code
, op0
, op1
)
4293 enum machine_mode cmpmode
;
4296 cmpmode
= SELECT_CC_MODE (code
, op0
, op1
);
4297 flags
= gen_rtx_REG (cmpmode
, FLAGS_REG
);
4299 /* This is very simple, but making the interface the same as in the
4300 FP case makes the rest of the code easier. */
4301 tmp
= gen_rtx_COMPARE (cmpmode
, op0
, op1
);
4302 emit_insn (gen_rtx_SET (VOIDmode
, flags
, tmp
));
4304 /* Return the test that should be put into the flags user, i.e.
4305 the bcc, scc, or cmov instruction. */
4306 return gen_rtx_fmt_ee (code
, VOIDmode
, flags
, const0_rtx
);
4309 /* Generate insn patterns to do a floating point compare of OPERANDS.
4310 If UNORDERED, allow for unordered compares. */
4313 ix86_expand_fp_compare (code
, op0
, op1
, unordered
)
4318 enum machine_mode fpcmp_mode
;
4319 enum machine_mode intcmp_mode
;
4322 /* When not doing IEEE compliant compares, disable unordered. */
4323 if (! TARGET_IEEE_FP
)
4325 fpcmp_mode
= unordered
? CCFPUmode
: CCFPmode
;
4327 /* ??? If we knew whether invalid-operand exceptions were masked,
4328 we could rely on fcom to raise an exception and take care of
4329 NaNs. But we don't. We could know this from c9x math bits. */
4333 /* All of the unordered compare instructions only work on registers.
4334 The same is true of the XFmode compare instructions. */
4335 if (unordered
|| GET_MODE (op0
) == XFmode
)
4337 op0
= force_reg (GET_MODE (op0
), op0
);
4338 op1
= force_reg (GET_MODE (op1
), op1
);
4342 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
4343 things around if they appear profitable, otherwise force op0
4346 if (standard_80387_constant_p (op0
) == 0
4347 || (GET_CODE (op0
) == MEM
4348 && ! (standard_80387_constant_p (op1
) == 0
4349 || GET_CODE (op1
) == MEM
)))
4352 tmp
= op0
, op0
= op1
, op1
= tmp
;
4353 code
= swap_condition (code
);
4356 if (GET_CODE (op0
) != REG
)
4357 op0
= force_reg (GET_MODE (op0
), op0
);
4359 if (CONSTANT_P (op1
))
4361 if (standard_80387_constant_p (op1
))
4362 op1
= force_reg (GET_MODE (op1
), op1
);
4364 op1
= validize_mem (force_const_mem (GET_MODE (op1
), op1
));
4368 /* %%% fcomi is probably always faster, even when dealing with memory,
4369 since compare-and-branch would be three insns instead of four. */
4370 if (TARGET_CMOVE
&& !unordered
)
4372 if (GET_CODE (op0
) != REG
)
4373 op0
= force_reg (GET_MODE (op0
), op0
);
4374 if (GET_CODE (op1
) != REG
)
4375 op1
= force_reg (GET_MODE (op1
), op1
);
4377 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
4378 tmp
= gen_rtx_SET (VOIDmode
, gen_rtx_REG (fpcmp_mode
, FLAGS_REG
), tmp
);
4381 /* The FP codes work out to act like unsigned. */
4382 code
= unsigned_comparison (code
);
4383 intcmp_mode
= fpcmp_mode
;
4387 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
4390 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
4391 tmp2
= gen_rtx_UNSPEC (HImode
, gen_rtvec (1, tmp
), 9);
4392 tmp
= gen_reg_rtx (HImode
);
4393 emit_insn (gen_rtx_SET (VOIDmode
, tmp
, tmp2
));
4397 /* We have two options here -- use sahf, or testing bits of ah
4398 directly. On PPRO, they are equivalent, sahf being one byte
4399 smaller. On Pentium, sahf is non-pairable while test is UV
4402 if (TARGET_USE_SAHF
|| optimize_size
)
4406 /* The FP codes work out to act like unsigned. */
4407 code
= unsigned_comparison (code
);
4408 emit_insn (gen_x86_sahf_1 (tmp
));
4409 intcmp_mode
= CCmode
;
4414 * The numbers below correspond to the bits of the FPSW in AH.
4415 * C3, C2, and C0 are in bits 0x40, 0x4, and 0x01 respectively.
4437 /* We'd have to use `xorb 1,ah; andb 0x41,ah', so it's
4438 faster in all cases to just fall back on sahf. */
4456 emit_insn (gen_testqi_ext_0 (tmp
, GEN_INT (mask
)));
4457 intcmp_mode
= CCNOmode
;
4462 /* In the unordered case, we have to check C2 for NaN's, which
4463 doesn't happen to work out to anything nice combination-wise.
4464 So do some bit twiddling on the value we've got in AH to come
4465 up with an appropriate set of condition codes. */
4467 intcmp_mode
= CCNOmode
;
4471 emit_insn (gen_testqi_ext_0 (tmp
, GEN_INT (0x45)));
4475 emit_insn (gen_andqi_ext_0 (tmp
, tmp
, GEN_INT (0x45)));
4476 emit_insn (gen_cmpqi_ext_3 (tmp
, GEN_INT (0x01)));
4477 intcmp_mode
= CCmode
;
4481 emit_insn (gen_testqi_ext_0 (tmp
, GEN_INT (0x05)));
4485 emit_insn (gen_andqi_ext_0 (tmp
, tmp
, GEN_INT (0x45)));
4486 emit_insn (gen_addqi_ext_1 (tmp
, tmp
, constm1_rtx
));
4487 emit_insn (gen_cmpqi_ext_3 (tmp
, GEN_INT (0x40)));
4488 intcmp_mode
= CCmode
;
4492 emit_insn (gen_andqi_ext_0 (tmp
, tmp
, GEN_INT (0x45)));
4493 emit_insn (gen_cmpqi_ext_3 (tmp
, GEN_INT (0x40)));
4494 intcmp_mode
= CCmode
;
4498 emit_insn (gen_andqi_ext_0 (tmp
, tmp
, GEN_INT (0x45)));
4499 emit_insn (gen_xorqi_cc_ext_1 (tmp
, tmp
, GEN_INT (0x40)));
4508 /* Return the test that should be put into the flags user, i.e.
4509 the bcc, scc, or cmov instruction. */
4510 return gen_rtx_fmt_ee (code
, VOIDmode
,
4511 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
4516 ix86_expand_compare (code
, unordered
)
4521 op0
= ix86_compare_op0
;
4522 op1
= ix86_compare_op1
;
4524 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_FLOAT
)
4525 ret
= ix86_expand_fp_compare (code
, op0
, op1
, unordered
);
4527 ret
= ix86_expand_int_compare (code
, op0
, op1
);
4533 ix86_expand_branch (code
, unordered
, label
)
4538 rtx tmp
, lo
[2], hi
[2], label2
;
4539 enum rtx_code code1
, code2
, code3
;
4541 if (GET_MODE (ix86_compare_op0
) != DImode
)
4543 tmp
= ix86_expand_compare (code
, unordered
);
4544 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
4545 gen_rtx_LABEL_REF (VOIDmode
, label
),
4547 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
4551 /* Expand DImode branch into multiple compare+branch. */
4553 if (CONSTANT_P (ix86_compare_op0
) && ! CONSTANT_P (ix86_compare_op1
))
4555 tmp
= ix86_compare_op0
;
4556 ix86_compare_op0
= ix86_compare_op1
;
4557 ix86_compare_op1
= tmp
;
4558 code
= swap_condition (code
);
4560 split_di (&ix86_compare_op0
, 1, lo
+0, hi
+0);
4561 split_di (&ix86_compare_op1
, 1, lo
+1, hi
+1);
4563 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to avoid
4564 two branches. This costs one extra insn, so disable when optimizing
4567 if ((code
== EQ
|| code
== NE
)
4569 || hi
[1] == const0_rtx
|| lo
[1] == const0_rtx
))
4574 if (hi
[1] != const0_rtx
)
4576 xor1
= expand_binop (SImode
, xor_optab
, xor1
, hi
[1],
4577 NULL_RTX
, 0, OPTAB_WIDEN
);
4581 if (lo
[1] != const0_rtx
)
4583 xor0
= expand_binop (SImode
, xor_optab
, xor0
, lo
[1],
4584 NULL_RTX
, 0, OPTAB_WIDEN
);
4587 tmp
= expand_binop (SImode
, ior_optab
, xor1
, xor0
,
4588 NULL_RTX
, 0, OPTAB_WIDEN
);
4590 ix86_compare_op0
= tmp
;
4591 ix86_compare_op1
= const0_rtx
;
4592 ix86_expand_branch (code
, unordered
, label
);
4596 /* Otherwise, if we are doing less-than, op1 is a constant and the
4597 low word is zero, then we can just examine the high word. */
4599 if (GET_CODE (hi
[1]) == CONST_INT
&& lo
[1] == const0_rtx
4600 && (code
== LT
|| code
== LTU
))
4602 ix86_compare_op0
= hi
[0];
4603 ix86_compare_op1
= hi
[1];
4604 ix86_expand_branch (code
, unordered
, label
);
4608 /* Otherwise, we need two or three jumps. */
4610 label2
= gen_label_rtx ();
4613 code2
= swap_condition (code
);
4614 code3
= unsigned_condition (code
);
4618 case LT
: case GT
: case LTU
: case GTU
:
4621 case LE
: code1
= LT
; code2
= GT
; break;
4622 case GE
: code1
= GT
; code2
= LT
; break;
4623 case LEU
: code1
= LTU
; code2
= GTU
; break;
4624 case GEU
: code1
= GTU
; code2
= LTU
; break;
4626 case EQ
: code1
= NIL
; code2
= NE
; break;
4627 case NE
: code2
= NIL
; break;
4635 * if (hi(a) < hi(b)) goto true;
4636 * if (hi(a) > hi(b)) goto false;
4637 * if (lo(a) < lo(b)) goto true;
4641 ix86_compare_op0
= hi
[0];
4642 ix86_compare_op1
= hi
[1];
4645 ix86_expand_branch (code1
, unordered
, label
);
4647 ix86_expand_branch (code2
, unordered
, label2
);
4649 ix86_compare_op0
= lo
[0];
4650 ix86_compare_op1
= lo
[1];
4651 ix86_expand_branch (code3
, unordered
, label
);
4654 emit_label (label2
);
4658 ix86_expand_setcc (code
, unordered
, dest
)
4666 if (GET_MODE (ix86_compare_op0
) == DImode
)
4667 return 0; /* FAIL */
4669 /* Three modes of generation:
4670 0 -- destination does not overlap compare sources:
4671 clear dest first, emit strict_low_part setcc.
4672 1 -- destination does overlap compare sources:
4673 emit subreg setcc, zero extend.
4674 2 -- destination is in QImode:
4680 if (GET_MODE (dest
) == QImode
)
4682 else if (reg_overlap_mentioned_p (dest
, ix86_compare_op0
)
4683 || reg_overlap_mentioned_p (dest
, ix86_compare_op1
))
4687 emit_move_insn (dest
, const0_rtx
);
4689 ret
= ix86_expand_compare (code
, unordered
);
4690 PUT_MODE (ret
, QImode
);
4695 tmp
= gen_lowpart (QImode
, dest
);
4696 tmp
= gen_rtx_STRICT_LOW_PART (VOIDmode
, tmp
);
4700 if (!cse_not_expected
)
4701 tmp
= gen_reg_rtx (QImode
);
4703 tmp
= gen_lowpart (QImode
, dest
);
4706 emit_insn (gen_rtx_SET (VOIDmode
, tmp
, ret
));
4712 tmp
= gen_rtx_ZERO_EXTEND (GET_MODE (dest
), tmp
);
4713 tmp
= gen_rtx_SET (VOIDmode
, dest
, tmp
);
4714 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
4715 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, clob
));
4719 return 1; /* DONE */
4723 ix86_expand_int_movcc (operands
)
4726 enum rtx_code code
= GET_CODE (operands
[1]), compare_code
;
4727 rtx compare_seq
, compare_op
;
4729 /* When the compare code is not LTU or GEU, we can not use sbbl case.
4730 In case comparsion is done with immediate, we can convert it to LTU or
4731 GEU by altering the integer. */
4733 if ((code
== LEU
|| code
== GTU
)
4734 && GET_CODE (ix86_compare_op1
) == CONST_INT
4735 && GET_MODE (operands
[0]) != HImode
4736 && (unsigned int)INTVAL (ix86_compare_op1
) != 0xffffffff
4737 && GET_CODE (operands
[2]) == CONST_INT
4738 && GET_CODE (operands
[3]) == CONST_INT
)
4744 ix86_compare_op1
= GEN_INT (INTVAL (ix86_compare_op1
) + 1);
4747 compare_op
= ix86_expand_compare (code
, code
== EQ
|| code
== NE
);
4748 compare_seq
= gen_sequence ();
4751 compare_code
= GET_CODE (compare_op
);
4753 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
4754 HImode insns, we'd be swallowed in word prefix ops. */
4756 if (GET_MODE (operands
[0]) != HImode
4757 && GET_CODE (operands
[2]) == CONST_INT
4758 && GET_CODE (operands
[3]) == CONST_INT
)
4760 rtx out
= operands
[0];
4761 HOST_WIDE_INT ct
= INTVAL (operands
[2]);
4762 HOST_WIDE_INT cf
= INTVAL (operands
[3]);
4765 if (compare_code
== LTU
|| compare_code
== GEU
)
4768 /* Detect overlap between destination and compare sources. */
4771 /* To simplify rest of code, restrict to the GEU case. */
4772 if (compare_code
== LTU
)
4777 compare_code
= reverse_condition (compare_code
);
4778 code
= reverse_condition (code
);
4782 if (reg_overlap_mentioned_p (out
, ix86_compare_op0
)
4783 || reg_overlap_mentioned_p (out
, ix86_compare_op1
))
4784 tmp
= gen_reg_rtx (SImode
);
4786 emit_insn (compare_seq
);
4787 emit_insn (gen_x86_movsicc_0_m1 (tmp
));
4799 emit_insn (gen_addsi3 (out
, out
, GEN_INT (ct
)));
4810 emit_insn (gen_iorsi3 (out
, out
, GEN_INT (ct
)));
4812 else if (diff
== -1 && ct
)
4822 emit_insn (gen_one_cmplsi2 (tmp
, tmp
));
4824 emit_insn (gen_addsi3 (out
, out
, GEN_INT (cf
)));
4831 * andl cf - ct, dest
4836 emit_insn (gen_andsi3 (out
, out
, GEN_INT (cf
- ct
)));
4838 emit_insn (gen_addsi3 (out
, out
, GEN_INT (ct
)));
4842 emit_move_insn (out
, tmp
);
4844 return 1; /* DONE */
4851 tmp
= ct
, ct
= cf
, cf
= tmp
;
4853 compare_code
= reverse_condition (compare_code
);
4854 code
= reverse_condition (code
);
4856 if (diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
4857 || diff
== 3 || diff
== 5 || diff
== 9)
4863 * lea cf(dest*(ct-cf)),dest
4867 * This also catches the degenerate setcc-only case.
4873 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
4874 ix86_compare_op1
, VOIDmode
, 0, 1);
4881 tmp
= gen_rtx_MULT (SImode
, out
, GEN_INT (diff
& ~1));
4885 tmp
= gen_rtx_PLUS (SImode
, tmp
, out
);
4891 tmp
= gen_rtx_PLUS (SImode
, tmp
, GEN_INT (cf
));
4897 emit_move_insn (out
, tmp
);
4902 clob
= gen_rtx_REG (CCmode
, FLAGS_REG
);
4903 clob
= gen_rtx_CLOBBER (VOIDmode
, clob
);
4905 tmp
= gen_rtx_SET (VOIDmode
, out
, tmp
);
4906 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, clob
));
4910 emit_insn (gen_rtx_SET (VOIDmode
, out
, tmp
));
4912 if (out
!= operands
[0])
4913 emit_move_insn (operands
[0], out
);
4915 return 1; /* DONE */
4919 * General case: Jumpful:
4920 * xorl dest,dest cmpl op1, op2
4921 * cmpl op1, op2 movl ct, dest
4923 * decl dest movl cf, dest
4924 * andl (cf-ct),dest 1:
4929 * This is reasonably steep, but branch mispredict costs are
4930 * high on modern cpus, so consider failing only if optimizing
4933 * %%% Parameterize branch_cost on the tuning architecture, then
4934 * use that. The 80386 couldn't care less about mispredicts.
4937 if (!optimize_size
&& !TARGET_CMOVE
)
4943 compare_code
= reverse_condition (compare_code
);
4944 code
= reverse_condition (code
);
4947 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
4948 ix86_compare_op1
, VOIDmode
, 0, 1);
4950 emit_insn (gen_addsi3 (out
, out
, constm1_rtx
));
4951 emit_insn (gen_andsi3 (out
, out
, GEN_INT (cf
-ct
)));
4953 emit_insn (gen_addsi3 (out
, out
, GEN_INT (ct
)));
4954 if (out
!= operands
[0])
4955 emit_move_insn (operands
[0], out
);
4957 return 1; /* DONE */
4963 /* Try a few things more with specific constants and a variable. */
4966 rtx var
, orig_out
, out
, tmp
;
4969 return 0; /* FAIL */
4971 /* If one of the two operands is an interesting constant, load a
4972 constant with the above and mask it in with a logical operation. */
4974 if (GET_CODE (operands
[2]) == CONST_INT
)
4977 if (INTVAL (operands
[2]) == 0)
4978 operands
[3] = constm1_rtx
, op
= and_optab
;
4979 else if (INTVAL (operands
[2]) == -1)
4980 operands
[3] = const0_rtx
, op
= ior_optab
;
4982 return 0; /* FAIL */
4984 else if (GET_CODE (operands
[3]) == CONST_INT
)
4987 if (INTVAL (operands
[3]) == 0)
4988 operands
[2] = constm1_rtx
, op
= and_optab
;
4989 else if (INTVAL (operands
[3]) == -1)
4990 operands
[2] = const0_rtx
, op
= ior_optab
;
4992 return 0; /* FAIL */
4995 return 0; /* FAIL */
4997 orig_out
= operands
[0];
4998 tmp
= gen_reg_rtx (GET_MODE (orig_out
));
5001 /* Recurse to get the constant loaded. */
5002 if (ix86_expand_int_movcc (operands
) == 0)
5003 return 0; /* FAIL */
5005 /* Mask in the interesting variable. */
5006 out
= expand_binop (GET_MODE (orig_out
), op
, var
, tmp
, orig_out
, 0,
5008 if (out
!= orig_out
)
5009 emit_move_insn (orig_out
, out
);
5011 return 1; /* DONE */
5015 * For comparison with above,
5025 if (! nonimmediate_operand (operands
[2], GET_MODE (operands
[0])))
5026 operands
[2] = force_reg (GET_MODE (operands
[0]), operands
[2]);
5027 if (! nonimmediate_operand (operands
[3], GET_MODE (operands
[0])))
5028 operands
[3] = force_reg (GET_MODE (operands
[0]), operands
[3]);
5030 emit_insn (compare_seq
);
5031 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
5032 gen_rtx_IF_THEN_ELSE (GET_MODE (operands
[0]),
5033 compare_op
, operands
[2],
5036 return 1; /* DONE */
5040 ix86_expand_fp_movcc (operands
)
5044 enum machine_mode mode
;
5047 /* The floating point conditional move instructions don't directly
5048 support conditions resulting from a signed integer comparison. */
5050 code
= GET_CODE (operands
[1]);
5057 tmp
= gen_reg_rtx (QImode
);
5058 ix86_expand_setcc (code
, 0, tmp
);
5060 ix86_compare_op0
= tmp
;
5061 ix86_compare_op1
= const0_rtx
;
5068 mode
= SELECT_CC_MODE (code
, ix86_compare_op0
, ix86_compare_op1
);
5069 emit_insn (gen_rtx_SET (VOIDmode
, gen_rtx_REG (mode
, FLAGS_REG
),
5070 gen_rtx_COMPARE (mode
,
5072 ix86_compare_op1
)));
5073 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
5074 gen_rtx_IF_THEN_ELSE (GET_MODE (operands
[0]),
5075 gen_rtx_fmt_ee (code
, VOIDmode
,
5076 gen_rtx_REG (mode
, FLAGS_REG
),
5084 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
5085 works for floating pointer parameters and nonoffsetable memories.
5086 For pushes, it returns just stack offsets; the values will be saved
5087 in the right order. Maximally three parts are generated. */
5090 ix86_split_to_parts (operand
, parts
, mode
)
5093 enum machine_mode mode
;
5095 int size
= GET_MODE_SIZE (mode
) / 4;
5097 if (size
< 2 || size
> 3)
5100 /* Optimize constant pool reference to immediates. This is used by fp moves,
5101 that force all constants to memory to allow combining. */
5103 if (GET_CODE (operand
) == MEM
5104 && GET_CODE (XEXP (operand
, 0)) == SYMBOL_REF
5105 && CONSTANT_POOL_ADDRESS_P (XEXP (operand
, 0)))
5106 operand
= get_pool_constant (XEXP (operand
, 0));
5108 if (GET_CODE (operand
) == MEM
&& !offsettable_memref_p (operand
))
5110 /* The only non-offsetable memories we handle are pushes. */
5111 if (! push_operand (operand
, VOIDmode
))
5114 PUT_MODE (operand
, SImode
);
5115 parts
[0] = parts
[1] = parts
[2] = operand
;
5120 split_di (&operand
, 1, &parts
[0], &parts
[1]);
5123 if (REG_P (operand
))
5125 if (!reload_completed
)
5127 parts
[0] = gen_rtx_REG (SImode
, REGNO (operand
) + 0);
5128 parts
[1] = gen_rtx_REG (SImode
, REGNO (operand
) + 1);
5130 parts
[2] = gen_rtx_REG (SImode
, REGNO (operand
) + 2);
5132 else if (offsettable_memref_p (operand
))
5134 PUT_MODE (operand
, SImode
);
5136 parts
[1] = adj_offsettable_operand (operand
, 4);
5138 parts
[2] = adj_offsettable_operand (operand
, 8);
5140 else if (GET_CODE (operand
) == CONST_DOUBLE
)
5145 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
5149 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r
, l
);
5150 parts
[2] = GEN_INT (l
[2]);
5153 REAL_VALUE_TO_TARGET_DOUBLE (r
, l
);
5158 parts
[1] = GEN_INT (l
[1]);
5159 parts
[0] = GEN_INT (l
[0]);
5169 /* Emit insns to perform a move or push of DI, DF, and XF values.
5170 Return false when normal moves are needed; true when all required
5171 insns have been emitted. Operands 2-4 contain the input values
5172 int the correct order; operands 5-7 contain the output values. */
5175 ix86_split_long_move (operands1
)
5180 int size
= GET_MODE_SIZE (GET_MODE (operands1
[0])) / 4;
5184 /* Make our own copy to avoid clobbering the operands. */
5185 operands
[0] = copy_rtx (operands1
[0]);
5186 operands
[1] = copy_rtx (operands1
[1]);
5188 if (size
< 2 || size
> 3)
5191 /* The only non-offsettable memory we handle is push. */
5192 if (push_operand (operands
[0], VOIDmode
))
5194 else if (GET_CODE (operands
[0]) == MEM
5195 && ! offsettable_memref_p (operands
[0]))
5198 ix86_split_to_parts (operands
[0], part
[0], GET_MODE (operands1
[0]));
5199 ix86_split_to_parts (operands
[1], part
[1], GET_MODE (operands1
[0]));
5201 /* When emitting push, take care for source operands on the stack. */
5202 if (push
&& GET_CODE (operands
[1]) == MEM
5203 && reg_overlap_mentioned_p (stack_pointer_rtx
, operands
[1]))
5206 part
[1][1] = part
[1][2];
5207 part
[1][0] = part
[1][1];
5210 /* We need to do copy in the right order in case an address register
5211 of the source overlaps the destination. */
5212 if (REG_P (part
[0][0]) && GET_CODE (part
[1][0]) == MEM
)
5214 if (reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0)))
5216 if (reg_overlap_mentioned_p (part
[0][1], XEXP (part
[1][0], 0)))
5219 && reg_overlap_mentioned_p (part
[0][2], XEXP (part
[1][0], 0)))
5222 /* Collision in the middle part can be handled by reordering. */
5223 if (collisions
== 1 && size
== 3
5224 && reg_overlap_mentioned_p (part
[0][1], XEXP (part
[1][0], 0)))
5227 tmp
= part
[0][1]; part
[0][1] = part
[0][2]; part
[0][2] = tmp
;
5228 tmp
= part
[1][1]; part
[1][1] = part
[1][2]; part
[1][2] = tmp
;
5231 /* If there are more collisions, we can't handle it by reordering.
5232 Do an lea to the last part and use only one colliding move. */
5233 else if (collisions
> 1)
5236 emit_insn (gen_rtx_SET (VOIDmode
, part
[0][size
- 1],
5237 XEXP (part
[1][0], 0)));
5238 part
[1][0] = change_address (part
[1][0], SImode
, part
[0][size
- 1]);
5239 part
[1][1] = adj_offsettable_operand (part
[1][0], 4);
5241 part
[1][2] = adj_offsettable_operand (part
[1][0], 8);
5248 emit_insn (gen_push (part
[1][2]));
5249 emit_insn (gen_push (part
[1][1]));
5250 emit_insn (gen_push (part
[1][0]));
5254 /* Choose correct order to not overwrite the source before it is copied. */
5255 if ((REG_P (part
[0][0])
5256 && REG_P (part
[1][1])
5257 && (REGNO (part
[0][0]) == REGNO (part
[1][1])
5259 && REGNO (part
[0][0]) == REGNO (part
[1][2]))))
5261 && reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0))))
5265 operands1
[2] = part
[0][2];
5266 operands1
[3] = part
[0][1];
5267 operands1
[4] = part
[0][0];
5268 operands1
[5] = part
[1][2];
5269 operands1
[6] = part
[1][1];
5270 operands1
[7] = part
[1][0];
5274 operands1
[2] = part
[0][1];
5275 operands1
[3] = part
[0][0];
5276 operands1
[5] = part
[1][1];
5277 operands1
[6] = part
[1][0];
5284 operands1
[2] = part
[0][0];
5285 operands1
[3] = part
[0][1];
5286 operands1
[4] = part
[0][2];
5287 operands1
[5] = part
[1][0];
5288 operands1
[6] = part
[1][1];
5289 operands1
[7] = part
[1][2];
5293 operands1
[2] = part
[0][0];
5294 operands1
[3] = part
[0][1];
5295 operands1
[5] = part
[1][0];
5296 operands1
[6] = part
[1][1];
5304 ix86_split_ashldi (operands
, scratch
)
5305 rtx
*operands
, scratch
;
5307 rtx low
[2], high
[2];
5310 if (GET_CODE (operands
[2]) == CONST_INT
)
5312 split_di (operands
, 2, low
, high
);
5313 count
= INTVAL (operands
[2]) & 63;
5317 emit_move_insn (high
[0], low
[1]);
5318 emit_move_insn (low
[0], const0_rtx
);
5321 emit_insn (gen_ashlsi3 (high
[0], high
[0], GEN_INT (count
- 32)));
5325 if (!rtx_equal_p (operands
[0], operands
[1]))
5326 emit_move_insn (operands
[0], operands
[1]);
5327 emit_insn (gen_x86_shld_1 (high
[0], low
[0], GEN_INT (count
)));
5328 emit_insn (gen_ashlsi3 (low
[0], low
[0], GEN_INT (count
)));
5333 if (!rtx_equal_p (operands
[0], operands
[1]))
5334 emit_move_insn (operands
[0], operands
[1]);
5336 split_di (operands
, 1, low
, high
);
5338 emit_insn (gen_x86_shld_1 (high
[0], low
[0], operands
[2]));
5339 emit_insn (gen_ashlsi3 (low
[0], low
[0], operands
[2]));
5341 if (TARGET_CMOVE
&& (! reload_completed
|| scratch
))
5343 if (! reload_completed
)
5344 scratch
= force_reg (SImode
, const0_rtx
);
5346 emit_move_insn (scratch
, const0_rtx
);
5348 emit_insn (gen_x86_shift_adj_1 (high
[0], low
[0], operands
[2],
5352 emit_insn (gen_x86_shift_adj_2 (high
[0], low
[0], operands
[2]));
5357 ix86_split_ashrdi (operands
, scratch
)
5358 rtx
*operands
, scratch
;
5360 rtx low
[2], high
[2];
5363 if (GET_CODE (operands
[2]) == CONST_INT
)
5365 split_di (operands
, 2, low
, high
);
5366 count
= INTVAL (operands
[2]) & 63;
5370 emit_move_insn (low
[0], high
[1]);
5372 if (! reload_completed
)
5373 emit_insn (gen_ashrsi3 (high
[0], low
[0], GEN_INT (31)));
5376 emit_move_insn (high
[0], low
[0]);
5377 emit_insn (gen_ashrsi3 (high
[0], high
[0], GEN_INT (31)));
5381 emit_insn (gen_ashrsi3 (low
[0], low
[0], GEN_INT (count
- 32)));
5385 if (!rtx_equal_p (operands
[0], operands
[1]))
5386 emit_move_insn (operands
[0], operands
[1]);
5387 emit_insn (gen_x86_shrd_1 (low
[0], high
[0], GEN_INT (count
)));
5388 emit_insn (gen_ashrsi3 (high
[0], high
[0], GEN_INT (count
)));
5393 if (!rtx_equal_p (operands
[0], operands
[1]))
5394 emit_move_insn (operands
[0], operands
[1]);
5396 split_di (operands
, 1, low
, high
);
5398 emit_insn (gen_x86_shrd_1 (low
[0], high
[0], operands
[2]));
5399 emit_insn (gen_ashrsi3 (high
[0], high
[0], operands
[2]));
5401 if (TARGET_CMOVE
&& (!reload_completed
|| scratch
))
5403 if (! reload_completed
)
5404 scratch
= gen_reg_rtx (SImode
);
5405 emit_move_insn (scratch
, high
[0]);
5406 emit_insn (gen_ashrsi3 (scratch
, scratch
, GEN_INT (31)));
5407 emit_insn (gen_x86_shift_adj_1 (low
[0], high
[0], operands
[2],
5411 emit_insn (gen_x86_shift_adj_3 (low
[0], high
[0], operands
[2]));
5416 ix86_split_lshrdi (operands
, scratch
)
5417 rtx
*operands
, scratch
;
5419 rtx low
[2], high
[2];
5422 if (GET_CODE (operands
[2]) == CONST_INT
)
5424 split_di (operands
, 2, low
, high
);
5425 count
= INTVAL (operands
[2]) & 63;
5429 emit_move_insn (low
[0], high
[1]);
5430 emit_move_insn (high
[0], const0_rtx
);
5433 emit_insn (gen_lshrsi3 (low
[0], low
[0], GEN_INT (count
- 32)));
5437 if (!rtx_equal_p (operands
[0], operands
[1]))
5438 emit_move_insn (operands
[0], operands
[1]);
5439 emit_insn (gen_x86_shrd_1 (low
[0], high
[0], GEN_INT (count
)));
5440 emit_insn (gen_lshrsi3 (high
[0], high
[0], GEN_INT (count
)));
5445 if (!rtx_equal_p (operands
[0], operands
[1]))
5446 emit_move_insn (operands
[0], operands
[1]);
5448 split_di (operands
, 1, low
, high
);
5450 emit_insn (gen_x86_shrd_1 (low
[0], high
[0], operands
[2]));
5451 emit_insn (gen_lshrsi3 (high
[0], high
[0], operands
[2]));
5453 /* Heh. By reversing the arguments, we can reuse this pattern. */
5454 if (TARGET_CMOVE
&& (! reload_completed
|| scratch
))
5456 if (! reload_completed
)
5457 scratch
= force_reg (SImode
, const0_rtx
);
5459 emit_move_insn (scratch
, const0_rtx
);
5461 emit_insn (gen_x86_shift_adj_1 (low
[0], high
[0], operands
[2],
5465 emit_insn (gen_x86_shift_adj_2 (low
[0], high
[0], operands
[2]));
5469 /* Expand the appropriate insns for doing strlen if not just doing
5472 out = result, initialized with the start address
5473 align_rtx = alignment of the address.
5474 scratch = scratch register, initialized with the startaddress when
5475 not aligned, otherwise undefined
5477 This is just the body. It needs the initialisations mentioned above and
5478 some address computing at the end. These things are done in i386.md. */
5481 ix86_expand_strlensi_unroll_1 (out
, align_rtx
, scratch
)
5482 rtx out
, align_rtx
, scratch
;
5486 rtx align_2_label
= NULL_RTX
;
5487 rtx align_3_label
= NULL_RTX
;
5488 rtx align_4_label
= gen_label_rtx ();
5489 rtx end_0_label
= gen_label_rtx ();
5491 rtx flags
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
5492 rtx tmpreg
= gen_reg_rtx (SImode
);
5495 if (GET_CODE (align_rtx
) == CONST_INT
)
5496 align
= INTVAL (align_rtx
);
5498 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
5500 /* Is there a known alignment and is it less than 4? */
5503 /* Is there a known alignment and is it not 2? */
5506 align_3_label
= gen_label_rtx (); /* Label when aligned to 3-byte */
5507 align_2_label
= gen_label_rtx (); /* Label when aligned to 2-byte */
5509 /* Leave just the 3 lower bits. */
5510 align_rtx
= expand_binop (SImode
, and_optab
, scratch
, GEN_INT (3),
5511 NULL_RTX
, 0, OPTAB_WIDEN
);
5513 emit_insn (gen_cmpsi_0 (align_rtx
, const0_rtx
));
5515 tmp
= gen_rtx_EQ (VOIDmode
, flags
, const0_rtx
);
5516 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
5517 gen_rtx_LABEL_REF (VOIDmode
,
5520 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
5522 emit_insn (gen_cmpsi_1 (align_rtx
, GEN_INT (2)));
5524 tmp
= gen_rtx_EQ (VOIDmode
, flags
, const0_rtx
);
5525 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
5526 gen_rtx_LABEL_REF (VOIDmode
,
5529 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
5531 tmp
= gen_rtx_GTU (VOIDmode
, flags
, const0_rtx
);
5532 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
5533 gen_rtx_LABEL_REF (VOIDmode
,
5536 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
5540 /* Since the alignment is 2, we have to check 2 or 0 bytes;
5541 check if is aligned to 4 - byte. */
5543 align_rtx
= expand_binop (SImode
, and_optab
, scratch
, GEN_INT (2),
5544 NULL_RTX
, 0, OPTAB_WIDEN
);
5546 emit_insn (gen_cmpsi_0 (align_rtx
, const0_rtx
));
5548 tmp
= gen_rtx_EQ (VOIDmode
, flags
, const0_rtx
);
5549 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
5550 gen_rtx_LABEL_REF (VOIDmode
,
5553 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
5556 mem
= gen_rtx_MEM (QImode
, out
);
5558 /* Now compare the bytes. */
5560 /* Compare the first n unaligned byte on a byte per byte basis. */
5561 emit_insn (gen_cmpqi_0 (mem
, const0_rtx
));
5563 tmp
= gen_rtx_EQ (VOIDmode
, flags
, const0_rtx
);
5564 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
5565 gen_rtx_LABEL_REF (VOIDmode
, end_0_label
),
5567 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
5569 /* Increment the address. */
5570 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
5572 /* Not needed with an alignment of 2 */
5575 emit_label (align_2_label
);
5577 emit_insn (gen_cmpqi_0 (mem
, const0_rtx
));
5579 tmp
= gen_rtx_EQ (VOIDmode
, flags
, const0_rtx
);
5580 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
5581 gen_rtx_LABEL_REF (VOIDmode
,
5584 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
5586 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
5588 emit_label (align_3_label
);
5591 emit_insn (gen_cmpqi_0 (mem
, const0_rtx
));
5593 tmp
= gen_rtx_EQ (VOIDmode
, flags
, const0_rtx
);
5594 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
5595 gen_rtx_LABEL_REF (VOIDmode
, end_0_label
),
5597 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
5599 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
5602 /* Generate loop to check 4 bytes at a time. It is not a good idea to
5603 align this loop. It gives only huge programs, but does not help to
5605 emit_label (align_4_label
);
5607 mem
= gen_rtx_MEM (SImode
, out
);
5608 emit_move_insn (scratch
, mem
);
5609 emit_insn (gen_addsi3 (out
, out
, GEN_INT (4)));
5611 /* This formula yields a nonzero result iff one of the bytes is zero.
5612 This saves three branches inside loop and many cycles. */
5614 emit_insn (gen_addsi3 (tmpreg
, scratch
, GEN_INT (-0x01010101)));
5615 emit_insn (gen_one_cmplsi2 (scratch
, scratch
));
5616 emit_insn (gen_andsi3 (tmpreg
, tmpreg
, scratch
));
5617 emit_insn (gen_andsi3 (tmpreg
, tmpreg
, GEN_INT (0x80808080)));
5618 emit_cmp_and_jump_insns (tmpreg
, const0_rtx
, EQ
, 0, SImode
, 1, 0, align_4_label
);
5622 rtx reg
= gen_reg_rtx (SImode
);
5623 emit_move_insn (reg
, tmpreg
);
5624 emit_insn (gen_lshrsi3 (reg
, reg
, GEN_INT (16)));
5626 /* If zero is not in the first two bytes, move two bytes forward. */
5627 emit_insn (gen_testsi_1 (tmpreg
, GEN_INT (0x8080)));
5628 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
5629 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
5630 emit_insn (gen_rtx_SET (VOIDmode
, tmpreg
,
5631 gen_rtx_IF_THEN_ELSE (SImode
, tmp
,
5634 /* Emit lea manually to avoid clobbering of flags. */
5635 emit_insn (gen_rtx_SET (SImode
, reg
,
5636 gen_rtx_PLUS (SImode
, out
, GEN_INT (2))));
5638 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
5639 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
5640 emit_insn (gen_rtx_SET (VOIDmode
, out
,
5641 gen_rtx_IF_THEN_ELSE (SImode
, tmp
,
5648 rtx end_2_label
= gen_label_rtx ();
5649 /* Is zero in the first two bytes? */
5651 emit_insn (gen_testsi_1 (tmpreg
, GEN_INT (0x8080)));
5652 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
5653 tmp
= gen_rtx_NE (VOIDmode
, tmp
, const0_rtx
);
5654 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
5655 gen_rtx_LABEL_REF (VOIDmode
, end_2_label
),
5657 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
5658 JUMP_LABEL (tmp
) = end_2_label
;
5660 /* Not in the first two. Move two bytes forward. */
5661 emit_insn (gen_lshrsi3 (tmpreg
, tmpreg
, GEN_INT (16)));
5662 emit_insn (gen_addsi3 (out
, out
, GEN_INT (2)));
5664 emit_label (end_2_label
);
5668 /* Avoid branch in fixing the byte. */
5669 tmpreg
= gen_lowpart (QImode
, tmpreg
);
5670 emit_insn (gen_addqi3_cc (tmpreg
, tmpreg
, tmpreg
));
5671 emit_insn (gen_subsi3_carry (out
, out
, GEN_INT (3)));
5673 emit_label (end_0_label
);
5676 /* Clear stack slot assignments remembered from previous functions.
5677 This is called from INIT_EXPANDERS once before RTL is emitted for each
5681 ix86_init_machine_status (p
)
5684 enum machine_mode mode
;
5687 = (struct machine_function
*) xmalloc (sizeof (struct machine_function
));
5689 for (mode
= VOIDmode
; (int) mode
< (int) MAX_MACHINE_MODE
;
5690 mode
= (enum machine_mode
) ((int) mode
+ 1))
5691 for (n
= 0; n
< MAX_386_STACK_LOCALS
; n
++)
5692 ix86_stack_locals
[(int) mode
][n
] = NULL_RTX
;
5695 /* Mark machine specific bits of P for GC. */
5697 ix86_mark_machine_status (p
)
5700 enum machine_mode mode
;
5703 for (mode
= VOIDmode
; (int) mode
< (int) MAX_MACHINE_MODE
;
5704 mode
= (enum machine_mode
) ((int) mode
+ 1))
5705 for (n
= 0; n
< MAX_386_STACK_LOCALS
; n
++)
5706 ggc_mark_rtx (p
->machine
->stack_locals
[(int) mode
][n
]);
5709 /* Return a MEM corresponding to a stack slot with mode MODE.
5710 Allocate a new slot if necessary.
5712 The RTL for a function can have several slots available: N is
5713 which slot to use. */
5716 assign_386_stack_local (mode
, n
)
5717 enum machine_mode mode
;
5720 if (n
< 0 || n
>= MAX_386_STACK_LOCALS
)
5723 if (ix86_stack_locals
[(int) mode
][n
] == NULL_RTX
)
5724 ix86_stack_locals
[(int) mode
][n
]
5725 = assign_stack_local (mode
, GET_MODE_SIZE (mode
), 0);
5727 return ix86_stack_locals
[(int) mode
][n
];
5730 /* Calculate the length of the memory address in the instruction
5731 encoding. Does not include the one-byte modrm, opcode, or prefix. */
5734 memory_address_length (addr
)
5737 struct ix86_address parts
;
5738 rtx base
, index
, disp
;
5741 if (GET_CODE (addr
) == PRE_DEC
5742 || GET_CODE (addr
) == POST_INC
)
5745 if (! ix86_decompose_address (addr
, &parts
))
5749 index
= parts
.index
;
5753 /* Register Indirect. */
5754 if (base
&& !index
&& !disp
)
5756 /* Special cases: ebp and esp need the two-byte modrm form. */
5757 if (addr
== stack_pointer_rtx
5758 || addr
== arg_pointer_rtx
5759 || addr
== frame_pointer_rtx
5760 || addr
== hard_frame_pointer_rtx
)
5764 /* Direct Addressing. */
5765 else if (disp
&& !base
&& !index
)
5770 /* Find the length of the displacement constant. */
5773 if (GET_CODE (disp
) == CONST_INT
5774 && CONST_OK_FOR_LETTER_P (INTVAL (disp
), 'K'))
5780 /* An index requires the two-byte modrm form. */
5789 ix86_attr_length_default (insn
)
5792 enum attr_type type
;
5795 type
= get_attr_type (insn
);
5796 extract_insn (insn
);
5827 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
5828 if (CONSTANT_P (recog_data
.operand
[i
]))
5830 if (GET_CODE (recog_data
.operand
[i
]) == CONST_INT
5831 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data
.operand
[i
]), 'K'))
5834 len
+= GET_MODE_SIZE (GET_MODE (recog_data
.operand
[0]));
5839 if (CONSTANT_P (recog_data
.operand
[1]))
5840 len
+= GET_MODE_SIZE (GET_MODE (recog_data
.operand
[0]));
5844 if (constant_call_address_operand (recog_data
.operand
[0],
5845 GET_MODE (recog_data
.operand
[0])))
5850 if (constant_call_address_operand (recog_data
.operand
[1],
5851 GET_MODE (recog_data
.operand
[1])))
5857 /* Irritatingly, single_set doesn't work with REG_UNUSED present,
5858 as we'll get from running life_analysis during reg-stack when
5859 not optimizing. Not that it matters anyway, now that
5860 pro_epilogue_adjust_stack uses lea, and is by design not
5862 rtx set
= PATTERN (insn
);
5863 if (GET_CODE (set
) == SET
)
5865 else if (GET_CODE (set
) == PARALLEL
5866 && GET_CODE (XVECEXP (set
, 0, 0)) == SET
)
5867 set
= XVECEXP (set
, 0, 0);
5871 len
+= memory_address_length (SET_SRC (set
));
5880 if (STACK_TOP_P (recog_data
.operand
[0]))
5881 return 2 + (REGNO (recog_data
.operand
[1]) != FIRST_STACK_REG
+ 1);
5883 return 2 + (REGNO (recog_data
.operand
[0]) != FIRST_STACK_REG
+ 1);
5889 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
5890 if (GET_CODE (recog_data
.operand
[i
]) == MEM
)
5892 len
+= memory_address_length (XEXP (recog_data
.operand
[i
], 0));
5897 len
+= get_attr_length_opcode (insn
);
5898 len
+= get_attr_length_prefix (insn
);
5903 /* Return the maximum number of instructions a cpu can issue. */
5910 case PROCESSOR_PENTIUM
:
5914 case PROCESSOR_PENTIUMPRO
:
5922 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
5923 by DEP_INSN and nothing set by DEP_INSN. */
5926 ix86_flags_dependant (insn
, dep_insn
, insn_type
)
5928 enum attr_type insn_type
;
5932 /* Simplify the test for uninteresting insns. */
5933 if (insn_type
!= TYPE_SETCC
5934 && insn_type
!= TYPE_ICMOV
5935 && insn_type
!= TYPE_FCMOV
5936 && insn_type
!= TYPE_IBR
)
5939 if ((set
= single_set (dep_insn
)) != 0)
5941 set
= SET_DEST (set
);
5944 else if (GET_CODE (PATTERN (dep_insn
)) == PARALLEL
5945 && XVECLEN (PATTERN (dep_insn
), 0) == 2
5946 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 0)) == SET
5947 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 1)) == SET
)
5949 set
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
5950 set2
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
5955 if (GET_CODE (set
) != REG
|| REGNO (set
) != FLAGS_REG
)
5958 /* This test is true if the dependant insn reads the flags but
5959 not any other potentially set register. */
5960 if (!reg_overlap_mentioned_p (set
, PATTERN (insn
)))
5963 if (set2
&& reg_overlap_mentioned_p (set2
, PATTERN (insn
)))
5969 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
5970 address with operands set by DEP_INSN. */
5973 ix86_agi_dependant (insn
, dep_insn
, insn_type
)
5975 enum attr_type insn_type
;
5979 if (insn_type
== TYPE_LEA
)
5981 addr
= PATTERN (insn
);
5982 if (GET_CODE (addr
) == SET
)
5984 else if (GET_CODE (addr
) == PARALLEL
5985 && GET_CODE (XVECEXP (addr
, 0, 0)) == SET
)
5986 addr
= XVECEXP (addr
, 0, 0);
5989 addr
= SET_SRC (addr
);
5994 extract_insn (insn
);
5995 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
5996 if (GET_CODE (recog_data
.operand
[i
]) == MEM
)
5998 addr
= XEXP (recog_data
.operand
[i
], 0);
6005 return modified_in_p (addr
, dep_insn
);
6009 ix86_adjust_cost (insn
, link
, dep_insn
, cost
)
6010 rtx insn
, link
, dep_insn
;
6013 enum attr_type insn_type
, dep_insn_type
;
6015 int dep_insn_code_number
;
6017 /* Anti and output depenancies have zero cost on all CPUs. */
6018 if (REG_NOTE_KIND (link
) != 0)
6021 dep_insn_code_number
= recog_memoized (dep_insn
);
6023 /* If we can't recognize the insns, we can't really do anything. */
6024 if (dep_insn_code_number
< 0 || recog_memoized (insn
) < 0)
6027 insn_type
= get_attr_type (insn
);
6028 dep_insn_type
= get_attr_type (dep_insn
);
6030 /* Prologue and epilogue allocators can have a false dependency on ebp.
6031 This results in one cycle extra stall on Pentium prologue scheduling,
6032 so handle this important case manually. */
6033 if (dep_insn_code_number
== CODE_FOR_pro_epilogue_adjust_stack
6034 && dep_insn_type
== TYPE_ALU
6035 && !reg_mentioned_p (stack_pointer_rtx
, insn
))
6040 case PROCESSOR_PENTIUM
:
6041 /* Address Generation Interlock adds a cycle of latency. */
6042 if (ix86_agi_dependant (insn
, dep_insn
, insn_type
))
6045 /* ??? Compares pair with jump/setcc. */
6046 if (ix86_flags_dependant (insn
, dep_insn
, insn_type
))
6049 /* Floating point stores require value to be ready one cycle ealier. */
6050 if (insn_type
== TYPE_FMOV
6051 && get_attr_memory (insn
) == MEMORY_STORE
6052 && !ix86_agi_dependant (insn
, dep_insn
, insn_type
))
6056 case PROCESSOR_PENTIUMPRO
:
6057 /* Since we can't represent delayed latencies of load+operation,
6058 increase the cost here for non-imov insns. */
6059 if (dep_insn_type
!= TYPE_IMOV
6060 && dep_insn_type
!= TYPE_FMOV
6061 && get_attr_memory (dep_insn
) == MEMORY_LOAD
)
6064 /* INT->FP conversion is expensive. */
6065 if (get_attr_fp_int_src (dep_insn
))
6068 /* There is one cycle extra latency between an FP op and a store. */
6069 if (insn_type
== TYPE_FMOV
6070 && (set
= single_set (dep_insn
)) != NULL_RTX
6071 && (set2
= single_set (insn
)) != NULL_RTX
6072 && rtx_equal_p (SET_DEST (set
), SET_SRC (set2
))
6073 && GET_CODE (SET_DEST (set2
)) == MEM
)
6078 /* The esp dependency is resolved before the instruction is really
6080 if ((insn_type
== TYPE_PUSH
|| insn_type
== TYPE_POP
)
6081 && (dep_insn_type
== TYPE_PUSH
|| dep_insn_type
== TYPE_POP
))
6084 /* Since we can't represent delayed latencies of load+operation,
6085 increase the cost here for non-imov insns. */
6086 if (get_attr_memory (dep_insn
) == MEMORY_LOAD
)
6087 cost
+= (dep_insn_type
!= TYPE_IMOV
) ? 2 : 1;
6089 /* INT->FP conversion is expensive. */
6090 if (get_attr_fp_int_src (dep_insn
))
6094 case PROCESSOR_ATHLON
:
6095 /* Address Generation Interlock cause problems on the Athlon CPU because
6096 the loads and stores are done in order so once one load or store has
6097 to wait, others must too, so penalize the AGIs slightly by one cycle.
6098 We might experiment with this value later. */
6099 if (ix86_agi_dependant (insn
, dep_insn
, insn_type
))
6102 /* Since we can't represent delayed latencies of load+operation,
6103 increase the cost here for non-imov insns. */
6104 if (dep_insn_type
!= TYPE_IMOV
6105 && dep_insn_type
!= TYPE_FMOV
6106 && get_attr_memory (dep_insn
) == MEMORY_LOAD
)
6117 struct ppro_sched_data
6120 int issued_this_cycle
;
6125 ix86_safe_length (insn
)
6128 if (recog_memoized (insn
) >= 0)
6129 return get_attr_length(insn
);
6135 ix86_safe_length_prefix (insn
)
6138 if (recog_memoized (insn
) >= 0)
6139 return get_attr_length(insn
);
6144 static enum attr_memory
6145 ix86_safe_memory (insn
)
6148 if (recog_memoized (insn
) >= 0)
6149 return get_attr_memory(insn
);
6151 return MEMORY_UNKNOWN
;
6154 static enum attr_pent_pair
6155 ix86_safe_pent_pair (insn
)
6158 if (recog_memoized (insn
) >= 0)
6159 return get_attr_pent_pair(insn
);
6161 return PENT_PAIR_NP
;
6164 static enum attr_ppro_uops
6165 ix86_safe_ppro_uops (insn
)
6168 if (recog_memoized (insn
) >= 0)
6169 return get_attr_ppro_uops (insn
);
6171 return PPRO_UOPS_MANY
;
6175 ix86_dump_ppro_packet (dump
)
6178 if (ix86_sched_data
.ppro
.decode
[0])
6180 fprintf (dump
, "PPRO packet: %d",
6181 INSN_UID (ix86_sched_data
.ppro
.decode
[0]));
6182 if (ix86_sched_data
.ppro
.decode
[1])
6183 fprintf (dump
, " %d", INSN_UID (ix86_sched_data
.ppro
.decode
[1]));
6184 if (ix86_sched_data
.ppro
.decode
[2])
6185 fprintf (dump
, " %d", INSN_UID (ix86_sched_data
.ppro
.decode
[2]));
6190 /* We're beginning a new block. Initialize data structures as necessary. */
6193 ix86_sched_init (dump
, sched_verbose
)
6194 FILE *dump ATTRIBUTE_UNUSED
;
6195 int sched_verbose ATTRIBUTE_UNUSED
;
6197 memset (&ix86_sched_data
, 0, sizeof (ix86_sched_data
));
6200 /* Shift INSN to SLOT, and shift everything else down. */
6203 ix86_reorder_insn (insnp
, slot
)
6210 insnp
[0] = insnp
[1];
6211 while (++insnp
!= slot
);
6216 /* Find an instruction with given pairability and minimal amount of cycles
6217 lost by the fact that the CPU waits for both pipelines to finish before
6218 reading next instructions. Also take care that both instructions together
6219 can not exceed 7 bytes. */
6222 ix86_pent_find_pair (e_ready
, ready
, type
, first
)
6225 enum attr_pent_pair type
;
6228 int mincycles
, cycles
;
6229 enum attr_pent_pair tmp
;
6230 enum attr_memory memory
;
6231 rtx
*insnp
, *bestinsnp
= NULL
;
6233 if (ix86_safe_length (first
) > 7 + ix86_safe_length_prefix (first
))
6236 memory
= ix86_safe_memory (first
);
6237 cycles
= result_ready_cost (first
);
6238 mincycles
= INT_MAX
;
6240 for (insnp
= e_ready
; insnp
>= ready
&& mincycles
; --insnp
)
6241 if ((tmp
= ix86_safe_pent_pair (*insnp
)) == type
6242 && ix86_safe_length (*insnp
) <= 7 + ix86_safe_length_prefix (*insnp
))
6244 enum attr_memory second_memory
;
6245 int secondcycles
, currentcycles
;
6247 second_memory
= ix86_safe_memory (*insnp
);
6248 secondcycles
= result_ready_cost (*insnp
);
6249 currentcycles
= abs (cycles
- secondcycles
);
6251 if (secondcycles
>= 1 && cycles
>= 1)
6253 /* Two read/modify/write instructions together takes two
6255 if (memory
== MEMORY_BOTH
&& second_memory
== MEMORY_BOTH
)
6258 /* Read modify/write instruction followed by read/modify
6259 takes one cycle longer. */
6260 if (memory
== MEMORY_BOTH
&& second_memory
== MEMORY_LOAD
6261 && tmp
!= PENT_PAIR_UV
6262 && ix86_safe_pent_pair (first
) != PENT_PAIR_UV
)
6265 if (currentcycles
< mincycles
)
6266 bestinsnp
= insnp
, mincycles
= currentcycles
;
6272 /* Subroutines of ix86_sched_reorder. */
6275 ix86_sched_reorder_pentium (ready
, e_ready
)
6279 enum attr_pent_pair pair1
, pair2
;
6282 /* This wouldn't be necessary if Haifa knew that static insn ordering
6283 is important to which pipe an insn is issued to. So we have to make
6284 some minor rearrangements. */
6286 pair1
= ix86_safe_pent_pair (*e_ready
);
6288 /* If the first insn is non-pairable, let it be. */
6289 if (pair1
== PENT_PAIR_NP
)
6292 pair2
= PENT_PAIR_NP
;
6295 /* If the first insn is UV or PV pairable, search for a PU
6297 if (pair1
== PENT_PAIR_UV
|| pair1
== PENT_PAIR_PV
)
6299 insnp
= ix86_pent_find_pair (e_ready
-1, ready
,
6300 PENT_PAIR_PU
, *e_ready
);
6302 pair2
= PENT_PAIR_PU
;
6305 /* If the first insn is PU or UV pairable, search for a PV
6307 if (pair2
== PENT_PAIR_NP
6308 && (pair1
== PENT_PAIR_PU
|| pair1
== PENT_PAIR_UV
))
6310 insnp
= ix86_pent_find_pair (e_ready
-1, ready
,
6311 PENT_PAIR_PV
, *e_ready
);
6313 pair2
= PENT_PAIR_PV
;
6316 /* If the first insn is pairable, search for a UV
6318 if (pair2
== PENT_PAIR_NP
)
6320 insnp
= ix86_pent_find_pair (e_ready
-1, ready
,
6321 PENT_PAIR_UV
, *e_ready
);
6323 pair2
= PENT_PAIR_UV
;
6326 if (pair2
== PENT_PAIR_NP
)
6329 /* Found something! Decide if we need to swap the order. */
6330 if (pair1
== PENT_PAIR_PV
|| pair2
== PENT_PAIR_PU
6331 || (pair1
== PENT_PAIR_UV
&& pair2
== PENT_PAIR_UV
6332 && ix86_safe_memory (*e_ready
) == MEMORY_BOTH
6333 && ix86_safe_memory (*insnp
) == MEMORY_LOAD
))
6334 ix86_reorder_insn (insnp
, e_ready
);
6336 ix86_reorder_insn (insnp
, e_ready
- 1);
6340 ix86_sched_reorder_ppro (ready
, e_ready
)
6345 enum attr_ppro_uops cur_uops
;
6346 int issued_this_cycle
;
6350 /* At this point .ppro.decode contains the state of the three
6351 decoders from last "cycle". That is, those insns that were
6352 actually independent. But here we're scheduling for the
6353 decoder, and we may find things that are decodable in the
6356 memcpy (decode
, ix86_sched_data
.ppro
.decode
, sizeof(decode
));
6357 issued_this_cycle
= 0;
6360 cur_uops
= ix86_safe_ppro_uops (*insnp
);
6362 /* If the decoders are empty, and we've a complex insn at the
6363 head of the priority queue, let it issue without complaint. */
6364 if (decode
[0] == NULL
)
6366 if (cur_uops
== PPRO_UOPS_MANY
)
6372 /* Otherwise, search for a 2-4 uop unsn to issue. */
6373 while (cur_uops
!= PPRO_UOPS_FEW
)
6377 cur_uops
= ix86_safe_ppro_uops (*--insnp
);
6380 /* If so, move it to the head of the line. */
6381 if (cur_uops
== PPRO_UOPS_FEW
)
6382 ix86_reorder_insn (insnp
, e_ready
);
6384 /* Issue the head of the queue. */
6385 issued_this_cycle
= 1;
6386 decode
[0] = *e_ready
--;
6389 /* Look for simple insns to fill in the other two slots. */
6390 for (i
= 1; i
< 3; ++i
)
6391 if (decode
[i
] == NULL
)
6393 if (ready
>= e_ready
)
6397 cur_uops
= ix86_safe_ppro_uops (*insnp
);
6398 while (cur_uops
!= PPRO_UOPS_ONE
)
6402 cur_uops
= ix86_safe_ppro_uops (*--insnp
);
6405 /* Found one. Move it to the head of the queue and issue it. */
6406 if (cur_uops
== PPRO_UOPS_ONE
)
6408 ix86_reorder_insn (insnp
, e_ready
);
6409 decode
[i
] = *e_ready
--;
6410 issued_this_cycle
++;
6414 /* ??? Didn't find one. Ideally, here we would do a lazy split
6415 of 2-uop insns, issue one and queue the other. */
6419 if (issued_this_cycle
== 0)
6420 issued_this_cycle
= 1;
6421 ix86_sched_data
.ppro
.issued_this_cycle
= issued_this_cycle
;
6425 /* We are about to being issuing insns for this clock cycle.
6426 Override the default sort algorithm to better slot instructions. */
6428 ix86_sched_reorder (dump
, sched_verbose
, ready
, n_ready
, clock_var
)
6429 FILE *dump ATTRIBUTE_UNUSED
;
6430 int sched_verbose ATTRIBUTE_UNUSED
;
6433 int clock_var ATTRIBUTE_UNUSED
;
6435 rtx
*e_ready
= ready
+ n_ready
- 1;
6445 case PROCESSOR_PENTIUM
:
6446 ix86_sched_reorder_pentium (ready
, e_ready
);
6449 case PROCESSOR_PENTIUMPRO
:
6450 ix86_sched_reorder_ppro (ready
, e_ready
);
6455 return ix86_issue_rate ();
6458 /* We are about to issue INSN. Return the number of insns left on the
6459 ready queue that can be issued this cycle. */
6462 ix86_variable_issue (dump
, sched_verbose
, insn
, can_issue_more
)
6472 return can_issue_more
- 1;
6474 case PROCESSOR_PENTIUMPRO
:
6476 enum attr_ppro_uops uops
= ix86_safe_ppro_uops (insn
);
6478 if (uops
== PPRO_UOPS_MANY
)
6481 ix86_dump_ppro_packet (dump
);
6482 ix86_sched_data
.ppro
.decode
[0] = insn
;
6483 ix86_sched_data
.ppro
.decode
[1] = NULL
;
6484 ix86_sched_data
.ppro
.decode
[2] = NULL
;
6486 ix86_dump_ppro_packet (dump
);
6487 ix86_sched_data
.ppro
.decode
[0] = NULL
;
6489 else if (uops
== PPRO_UOPS_FEW
)
6492 ix86_dump_ppro_packet (dump
);
6493 ix86_sched_data
.ppro
.decode
[0] = insn
;
6494 ix86_sched_data
.ppro
.decode
[1] = NULL
;
6495 ix86_sched_data
.ppro
.decode
[2] = NULL
;
6499 for (i
= 0; i
< 3; ++i
)
6500 if (ix86_sched_data
.ppro
.decode
[i
] == NULL
)
6502 ix86_sched_data
.ppro
.decode
[i
] = insn
;
6510 ix86_dump_ppro_packet (dump
);
6511 ix86_sched_data
.ppro
.decode
[0] = NULL
;
6512 ix86_sched_data
.ppro
.decode
[1] = NULL
;
6513 ix86_sched_data
.ppro
.decode
[2] = NULL
;
6517 return --ix86_sched_data
.ppro
.issued_this_cycle
;