* rtl.h (rtunion_def): Constify member `rtstr'.
[official-gcc.git] / gcc / config / i386 / i386.c
blobb977c32cc570f51462fef1ff499b7c561abd6a59
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000
3 Free Software Foundation, Inc.
5 This file is part of GNU CC.
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
22 #include <setjmp.h>
23 #include "config.h"
24 #include "system.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "tm_p.h"
28 #include "regs.h"
29 #include "hard-reg-set.h"
30 #include "real.h"
31 #include "insn-config.h"
32 #include "conditions.h"
33 #include "insn-flags.h"
34 #include "output.h"
35 #include "insn-attr.h"
36 #include "flags.h"
37 #include "except.h"
38 #include "function.h"
39 #include "recog.h"
40 #include "expr.h"
41 #include "toplev.h"
42 #include "basic-block.h"
43 #include "ggc.h"
45 #ifdef EXTRA_CONSTRAINT
46 /* If EXTRA_CONSTRAINT is defined, then the 'S'
47 constraint in REG_CLASS_FROM_LETTER will no longer work, and various
48 asm statements that need 'S' for class SIREG will break. */
49 error EXTRA_CONSTRAINT conflicts with S constraint letter
50 /* The previous line used to be #error, but some compilers barf
51 even if the conditional was untrue. */
52 #endif
54 #ifndef CHECK_STACK_LIMIT
55 #define CHECK_STACK_LIMIT -1
56 #endif
58 /* Processor costs (relative to an add) */
59 struct processor_costs i386_cost = { /* 386 specific costs */
60 1, /* cost of an add instruction */
61 1, /* cost of a lea instruction */
62 3, /* variable shift costs */
63 2, /* constant shift costs */
64 6, /* cost of starting a multiply */
65 1, /* cost of multiply per each bit set */
66 23, /* cost of a divide/mod */
67 15, /* "large" insn */
68 3, /* MOVE_RATIO */
69 4, /* cost for loading QImode using movzbl */
70 {2, 4, 2}, /* cost of loading integer registers
71 in QImode, HImode and SImode.
72 Relative to reg-reg move (2). */
73 {2, 4, 2}, /* cost of storing integer registers */
74 2, /* cost of reg,reg fld/fst */
75 {8, 8, 8}, /* cost of loading fp registers
76 in SFmode, DFmode and XFmode */
77 {8, 8, 8} /* cost of loading integer registers */
80 struct processor_costs i486_cost = { /* 486 specific costs */
81 1, /* cost of an add instruction */
82 1, /* cost of a lea instruction */
83 3, /* variable shift costs */
84 2, /* constant shift costs */
85 12, /* cost of starting a multiply */
86 1, /* cost of multiply per each bit set */
87 40, /* cost of a divide/mod */
88 15, /* "large" insn */
89 3, /* MOVE_RATIO */
90 4, /* cost for loading QImode using movzbl */
91 {2, 4, 2}, /* cost of loading integer registers
92 in QImode, HImode and SImode.
93 Relative to reg-reg move (2). */
94 {2, 4, 2}, /* cost of storing integer registers */
95 2, /* cost of reg,reg fld/fst */
96 {8, 8, 8}, /* cost of loading fp registers
97 in SFmode, DFmode and XFmode */
98 {8, 8, 8} /* cost of loading integer registers */
101 struct processor_costs pentium_cost = {
102 1, /* cost of an add instruction */
103 1, /* cost of a lea instruction */
104 4, /* variable shift costs */
105 1, /* constant shift costs */
106 11, /* cost of starting a multiply */
107 0, /* cost of multiply per each bit set */
108 25, /* cost of a divide/mod */
109 8, /* "large" insn */
110 6, /* MOVE_RATIO */
111 6, /* cost for loading QImode using movzbl */
112 {2, 4, 2}, /* cost of loading integer registers
113 in QImode, HImode and SImode.
114 Relative to reg-reg move (2). */
115 {2, 4, 2}, /* cost of storing integer registers */
116 2, /* cost of reg,reg fld/fst */
117 {2, 2, 6}, /* cost of loading fp registers
118 in SFmode, DFmode and XFmode */
119 {4, 4, 6} /* cost of loading integer registers */
122 struct processor_costs pentiumpro_cost = {
123 1, /* cost of an add instruction */
124 1, /* cost of a lea instruction */
125 1, /* variable shift costs */
126 1, /* constant shift costs */
127 1, /* cost of starting a multiply */
128 0, /* cost of multiply per each bit set */
129 17, /* cost of a divide/mod */
130 8, /* "large" insn */
131 6, /* MOVE_RATIO */
132 2, /* cost for loading QImode using movzbl */
133 {4, 4, 4}, /* cost of loading integer registers
134 in QImode, HImode and SImode.
135 Relative to reg-reg move (2). */
136 {2, 2, 2}, /* cost of storing integer registers */
137 2, /* cost of reg,reg fld/fst */
138 {2, 2, 6}, /* cost of loading fp registers
139 in SFmode, DFmode and XFmode */
140 {4, 4, 6} /* cost of loading integer registers */
143 struct processor_costs k6_cost = {
144 1, /* cost of an add instruction */
145 2, /* cost of a lea instruction */
146 1, /* variable shift costs */
147 1, /* constant shift costs */
148 3, /* cost of starting a multiply */
149 0, /* cost of multiply per each bit set */
150 18, /* cost of a divide/mod */
151 8, /* "large" insn */
152 4, /* MOVE_RATIO */
153 3, /* cost for loading QImode using movzbl */
154 {4, 5, 4}, /* cost of loading integer registers
155 in QImode, HImode and SImode.
156 Relative to reg-reg move (2). */
157 {2, 3, 2}, /* cost of storing integer registers */
158 4, /* cost of reg,reg fld/fst */
159 {6, 6, 6}, /* cost of loading fp registers
160 in SFmode, DFmode and XFmode */
161 {4, 4, 4} /* cost of loading integer registers */
164 struct processor_costs athlon_cost = {
165 1, /* cost of an add instruction */
166 1, /* cost of a lea instruction */
167 1, /* variable shift costs */
168 1, /* constant shift costs */
169 5, /* cost of starting a multiply */
170 0, /* cost of multiply per each bit set */
171 19, /* cost of a divide/mod */
172 8, /* "large" insn */
173 9, /* MOVE_RATIO */
174 4, /* cost for loading QImode using movzbl */
175 {4, 5, 4}, /* cost of loading integer registers
176 in QImode, HImode and SImode.
177 Relative to reg-reg move (2). */
178 {2, 3, 2}, /* cost of storing integer registers */
179 4, /* cost of reg,reg fld/fst */
180 {6, 6, 6}, /* cost of loading fp registers
181 in SFmode, DFmode and XFmode */
182 {4, 4, 4} /* cost of loading integer registers */
185 struct processor_costs *ix86_cost = &pentium_cost;
187 /* Processor feature/optimization bitmasks. */
188 #define m_386 (1<<PROCESSOR_I386)
189 #define m_486 (1<<PROCESSOR_I486)
190 #define m_PENT (1<<PROCESSOR_PENTIUM)
191 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
192 #define m_K6 (1<<PROCESSOR_K6)
193 #define m_ATHLON (1<<PROCESSOR_ATHLON)
195 const int x86_use_leave = m_386 | m_K6 | m_ATHLON;
196 const int x86_push_memory = m_386 | m_K6 | m_ATHLON;
197 const int x86_zero_extend_with_and = m_486 | m_PENT;
198 const int x86_movx = m_ATHLON /* m_386 | m_PPRO | m_K6 */;
199 const int x86_double_with_add = ~m_386;
200 const int x86_use_bit_test = m_386;
201 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON | m_K6;
202 const int x86_use_q_reg = m_PENT | m_PPRO | m_K6;
203 const int x86_use_any_reg = m_486;
204 const int x86_cmove = m_PPRO | m_ATHLON;
205 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON;
206 const int x86_use_sahf = m_PPRO | m_K6 | m_ATHLON;
207 const int x86_partial_reg_stall = m_PPRO;
208 const int x86_use_loop = m_K6;
209 const int x86_use_fiop = ~(m_PPRO | m_ATHLON | m_PENT);
210 const int x86_use_mov0 = m_K6;
211 const int x86_use_cltd = ~(m_PENT | m_K6);
212 const int x86_read_modify_write = ~m_PENT;
213 const int x86_read_modify = ~(m_PENT | m_PPRO);
214 const int x86_split_long_moves = m_PPRO;
215 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486;
216 const int x86_single_stringop = m_386;
218 #define AT_BP(mode) (gen_rtx_MEM ((mode), hard_frame_pointer_rtx))
220 const char * const hi_reg_name[] = HI_REGISTER_NAMES;
221 const char * const qi_reg_name[] = QI_REGISTER_NAMES;
222 const char * const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
224 /* Array of the smallest class containing reg number REGNO, indexed by
225 REGNO. Used by REGNO_REG_CLASS in i386.h. */
227 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
229 /* ax, dx, cx, bx */
230 AREG, DREG, CREG, BREG,
231 /* si, di, bp, sp */
232 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
233 /* FP registers */
234 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
235 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
236 /* arg pointer */
237 NON_Q_REGS,
238 /* flags, fpsr, dirflag, frame */
239 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS
242 /* The "default" register map. */
244 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
246 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
247 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
248 -1, -1, -1, -1, /* arg, flags, fpsr, dir */
251 /* Define the register numbers to be used in Dwarf debugging information.
252 The SVR4 reference port C compiler uses the following register numbers
253 in its Dwarf output code:
254 0 for %eax (gcc regno = 0)
255 1 for %ecx (gcc regno = 2)
256 2 for %edx (gcc regno = 1)
257 3 for %ebx (gcc regno = 3)
258 4 for %esp (gcc regno = 7)
259 5 for %ebp (gcc regno = 6)
260 6 for %esi (gcc regno = 4)
261 7 for %edi (gcc regno = 5)
262 The following three DWARF register numbers are never generated by
263 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
264 believes these numbers have these meanings.
265 8 for %eip (no gcc equivalent)
266 9 for %eflags (gcc regno = 17)
267 10 for %trapno (no gcc equivalent)
268 It is not at all clear how we should number the FP stack registers
269 for the x86 architecture. If the version of SDB on x86/svr4 were
270 a bit less brain dead with respect to floating-point then we would
271 have a precedent to follow with respect to DWARF register numbers
272 for x86 FP registers, but the SDB on x86/svr4 is so completely
273 broken with respect to FP registers that it is hardly worth thinking
274 of it as something to strive for compatibility with.
275 The version of x86/svr4 SDB I have at the moment does (partially)
276 seem to believe that DWARF register number 11 is associated with
277 the x86 register %st(0), but that's about all. Higher DWARF
278 register numbers don't seem to be associated with anything in
279 particular, and even for DWARF regno 11, SDB only seems to under-
280 stand that it should say that a variable lives in %st(0) (when
281 asked via an `=' command) if we said it was in DWARF regno 11,
282 but SDB still prints garbage when asked for the value of the
283 variable in question (via a `/' command).
284 (Also note that the labels SDB prints for various FP stack regs
285 when doing an `x' command are all wrong.)
286 Note that these problems generally don't affect the native SVR4
287 C compiler because it doesn't allow the use of -O with -g and
288 because when it is *not* optimizing, it allocates a memory
289 location for each floating-point variable, and the memory
290 location is what gets described in the DWARF AT_location
291 attribute for the variable in question.
292 Regardless of the severe mental illness of the x86/svr4 SDB, we
293 do something sensible here and we use the following DWARF
294 register numbers. Note that these are all stack-top-relative
295 numbers.
296 11 for %st(0) (gcc regno = 8)
297 12 for %st(1) (gcc regno = 9)
298 13 for %st(2) (gcc regno = 10)
299 14 for %st(3) (gcc regno = 11)
300 15 for %st(4) (gcc regno = 12)
301 16 for %st(5) (gcc regno = 13)
302 17 for %st(6) (gcc regno = 14)
303 18 for %st(7) (gcc regno = 15)
305 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
307 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
308 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
309 -1, 9, -1, -1, /* arg, flags, fpsr, dir */
314 /* Test and compare insns in i386.md store the information needed to
315 generate branch and scc insns here. */
317 struct rtx_def *ix86_compare_op0 = NULL_RTX;
318 struct rtx_def *ix86_compare_op1 = NULL_RTX;
320 #define MAX_386_STACK_LOCALS 2
322 /* Define the structure for the machine field in struct function. */
323 struct machine_function
325 rtx stack_locals[(int) MAX_MACHINE_MODE][MAX_386_STACK_LOCALS];
328 #define ix86_stack_locals (cfun->machine->stack_locals)
330 /* which cpu are we scheduling for */
331 enum processor_type ix86_cpu;
333 /* which instruction set architecture to use. */
334 int ix86_arch;
336 /* Strings to hold which cpu and instruction set architecture to use. */
337 const char *ix86_cpu_string; /* for -mcpu=<xxx> */
338 const char *ix86_arch_string; /* for -march=<xxx> */
340 /* Register allocation order */
341 const char *ix86_reg_alloc_order;
342 static char regs_allocated[FIRST_PSEUDO_REGISTER];
344 /* # of registers to use to pass arguments. */
345 const char *ix86_regparm_string;
347 /* ix86_regparm_string as a number */
348 int ix86_regparm;
350 /* Alignment to use for loops and jumps: */
352 /* Power of two alignment for loops. */
353 const char *ix86_align_loops_string;
355 /* Power of two alignment for non-loop jumps. */
356 const char *ix86_align_jumps_string;
358 /* Power of two alignment for stack boundary in bytes. */
359 const char *ix86_preferred_stack_boundary_string;
361 /* Preferred alignment for stack boundary in bits. */
362 int ix86_preferred_stack_boundary;
364 /* Values 1-5: see jump.c */
365 int ix86_branch_cost;
366 const char *ix86_branch_cost_string;
368 /* Power of two alignment for functions. */
369 int ix86_align_funcs;
370 const char *ix86_align_funcs_string;
372 /* Power of two alignment for loops. */
373 int ix86_align_loops;
375 /* Power of two alignment for non-loop jumps. */
376 int ix86_align_jumps;
378 static void output_pic_addr_const PARAMS ((FILE *, rtx, int));
379 static void put_condition_code PARAMS ((enum rtx_code, enum machine_mode,
380 int, int, FILE *));
381 static enum rtx_code unsigned_comparison PARAMS ((enum rtx_code code));
382 static rtx ix86_expand_int_compare PARAMS ((enum rtx_code, rtx, rtx));
383 static rtx ix86_expand_fp_compare PARAMS ((enum rtx_code, rtx, rtx, int));
384 static rtx ix86_expand_compare PARAMS ((enum rtx_code, int));
385 static rtx gen_push PARAMS ((rtx));
386 static int memory_address_length PARAMS ((rtx addr));
387 static int ix86_flags_dependant PARAMS ((rtx, rtx, enum attr_type));
388 static int ix86_agi_dependant PARAMS ((rtx, rtx, enum attr_type));
389 static int ix86_safe_length PARAMS ((rtx));
390 static enum attr_memory ix86_safe_memory PARAMS ((rtx));
391 static enum attr_pent_pair ix86_safe_pent_pair PARAMS ((rtx));
392 static enum attr_ppro_uops ix86_safe_ppro_uops PARAMS ((rtx));
393 static void ix86_dump_ppro_packet PARAMS ((FILE *));
394 static void ix86_reorder_insn PARAMS ((rtx *, rtx *));
395 static rtx * ix86_pent_find_pair PARAMS ((rtx *, rtx *, enum attr_pent_pair,
396 rtx));
397 static void ix86_init_machine_status PARAMS ((struct function *));
398 static void ix86_mark_machine_status PARAMS ((struct function *));
399 static void ix86_split_to_parts PARAMS ((rtx, rtx *, enum machine_mode));
400 static int ix86_safe_length_prefix PARAMS ((rtx));
401 static HOST_WIDE_INT ix86_compute_frame_size PARAMS((HOST_WIDE_INT,
402 int *, int *, int *));
403 static int ix86_nsaved_regs PARAMS((void));
404 static void ix86_emit_save_regs PARAMS((void));
405 static void ix86_emit_restore_regs_using_mov PARAMS ((rtx, int));
406 static void ix86_emit_epilogue_esp_adjustment PARAMS((int));
407 static void ix86_sched_reorder_pentium PARAMS((rtx *, rtx *));
408 static void ix86_sched_reorder_ppro PARAMS((rtx *, rtx *));
410 struct ix86_address
412 rtx base, index, disp;
413 HOST_WIDE_INT scale;
416 static int ix86_decompose_address PARAMS ((rtx, struct ix86_address *));
418 /* Sometimes certain combinations of command options do not make
419 sense on a particular target machine. You can define a macro
420 `OVERRIDE_OPTIONS' to take account of this. This macro, if
421 defined, is executed once just after all the command options have
422 been parsed.
424 Don't use this macro to turn on various extra optimizations for
425 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
427 void
428 override_options ()
430 /* Comes from final.c -- no real reason to change it. */
431 #define MAX_CODE_ALIGN 16
433 static struct ptt
435 struct processor_costs *cost; /* Processor costs */
436 int target_enable; /* Target flags to enable. */
437 int target_disable; /* Target flags to disable. */
438 int align_loop; /* Default alignments. */
439 int align_jump;
440 int align_func;
441 int branch_cost;
443 const processor_target_table[PROCESSOR_max] =
445 {&i386_cost, 0, 0, 2, 2, 2, 1},
446 {&i486_cost, 0, 0, 4, 4, 4, 1},
447 {&pentium_cost, 0, 0, -4, -4, -4, 1},
448 {&pentiumpro_cost, 0, 0, 4, -4, 4, 1},
449 {&k6_cost, 0, 0, -5, -5, 4, 1},
450 {&athlon_cost, 0, 0, 4, -4, 4, 1}
453 static struct pta
455 const char *name; /* processor name or nickname. */
456 enum processor_type processor;
458 const processor_alias_table[] =
460 {"i386", PROCESSOR_I386},
461 {"i486", PROCESSOR_I486},
462 {"i586", PROCESSOR_PENTIUM},
463 {"pentium", PROCESSOR_PENTIUM},
464 {"i686", PROCESSOR_PENTIUMPRO},
465 {"pentiumpro", PROCESSOR_PENTIUMPRO},
466 {"k6", PROCESSOR_K6},
467 {"athlon", PROCESSOR_ATHLON},
470 int const pta_size = sizeof(processor_alias_table)/sizeof(struct pta);
472 #ifdef SUBTARGET_OVERRIDE_OPTIONS
473 SUBTARGET_OVERRIDE_OPTIONS;
474 #endif
476 ix86_arch = PROCESSOR_I386;
477 ix86_cpu = (enum processor_type) TARGET_CPU_DEFAULT;
479 if (ix86_arch_string != 0)
481 int i;
482 for (i = 0; i < pta_size; i++)
483 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
485 ix86_arch = processor_alias_table[i].processor;
486 /* Default cpu tuning to the architecture. */
487 ix86_cpu = ix86_arch;
488 break;
490 if (i == pta_size)
491 error ("bad value (%s) for -march= switch", ix86_arch_string);
494 if (ix86_cpu_string != 0)
496 int i;
497 for (i = 0; i < pta_size; i++)
498 if (! strcmp (ix86_cpu_string, processor_alias_table[i].name))
500 ix86_cpu = processor_alias_table[i].processor;
501 break;
503 if (i == pta_size)
504 error ("bad value (%s) for -mcpu= switch", ix86_cpu_string);
507 ix86_cost = processor_target_table[ix86_cpu].cost;
508 target_flags |= processor_target_table[ix86_cpu].target_enable;
509 target_flags &= ~processor_target_table[ix86_cpu].target_disable;
511 /* Arrange to set up i386_stack_locals for all functions. */
512 init_machine_status = ix86_init_machine_status;
513 mark_machine_status = ix86_mark_machine_status;
515 /* Validate registers in register allocation order. */
516 if (ix86_reg_alloc_order)
518 int i, ch;
519 for (i = 0; (ch = ix86_reg_alloc_order[i]) != '\0'; i++)
521 int regno = 0;
523 switch (ch)
525 case 'a': regno = 0; break;
526 case 'd': regno = 1; break;
527 case 'c': regno = 2; break;
528 case 'b': regno = 3; break;
529 case 'S': regno = 4; break;
530 case 'D': regno = 5; break;
531 case 'B': regno = 6; break;
533 default: fatal ("Register '%c' is unknown", ch);
536 if (regs_allocated[regno])
537 fatal ("Register '%c' already specified in allocation order", ch);
539 regs_allocated[regno] = 1;
543 /* Validate -mregparm= value. */
544 if (ix86_regparm_string)
546 ix86_regparm = atoi (ix86_regparm_string);
547 if (ix86_regparm < 0 || ix86_regparm > REGPARM_MAX)
548 fatal ("-mregparm=%d is not between 0 and %d",
549 ix86_regparm, REGPARM_MAX);
552 /* Validate -malign-loops= value, or provide default. */
553 ix86_align_loops = processor_target_table[ix86_cpu].align_loop;
554 if (ix86_align_loops_string)
556 ix86_align_loops = atoi (ix86_align_loops_string);
557 if (ix86_align_loops < 0 || ix86_align_loops > MAX_CODE_ALIGN)
558 fatal ("-malign-loops=%d is not between 0 and %d",
559 ix86_align_loops, MAX_CODE_ALIGN);
562 /* Validate -malign-jumps= value, or provide default. */
563 ix86_align_jumps = processor_target_table[ix86_cpu].align_jump;
564 if (ix86_align_jumps_string)
566 ix86_align_jumps = atoi (ix86_align_jumps_string);
567 if (ix86_align_jumps < 0 || ix86_align_jumps > MAX_CODE_ALIGN)
568 fatal ("-malign-jumps=%d is not between 0 and %d",
569 ix86_align_jumps, MAX_CODE_ALIGN);
572 /* Validate -malign-functions= value, or provide default. */
573 ix86_align_funcs = processor_target_table[ix86_cpu].align_func;
574 if (ix86_align_funcs_string)
576 ix86_align_funcs = atoi (ix86_align_funcs_string);
577 if (ix86_align_funcs < 0 || ix86_align_funcs > MAX_CODE_ALIGN)
578 fatal ("-malign-functions=%d is not between 0 and %d",
579 ix86_align_funcs, MAX_CODE_ALIGN);
582 /* Validate -mpreferred-stack-boundary= value, or provide default.
583 The default of 128 bits is for Pentium III's SSE __m128. */
584 ix86_preferred_stack_boundary = 128;
585 if (ix86_preferred_stack_boundary_string)
587 int i = atoi (ix86_preferred_stack_boundary_string);
588 if (i < 2 || i > 31)
589 fatal ("-mpreferred-stack-boundary=%d is not between 2 and 31", i);
590 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
593 /* Validate -mbranch-cost= value, or provide default. */
594 ix86_branch_cost = processor_target_table[ix86_cpu].branch_cost;
595 if (ix86_branch_cost_string)
597 ix86_branch_cost = atoi (ix86_branch_cost_string);
598 if (ix86_branch_cost < 0 || ix86_branch_cost > 5)
599 fatal ("-mbranch-cost=%d is not between 0 and 5",
600 ix86_branch_cost);
603 /* Keep nonleaf frame pointers. */
604 if (TARGET_OMIT_LEAF_FRAME_POINTER)
605 flag_omit_frame_pointer = 1;
607 /* If we're doing fast math, we don't care about comparison order
608 wrt NaNs. This lets us use a shorter comparison sequence. */
609 if (flag_fast_math)
610 target_flags &= ~MASK_IEEE_FP;
612 /* If we're planning on using `loop', use it. */
613 if (TARGET_USE_LOOP && optimize)
614 flag_branch_on_count_reg = 1;
617 /* A C statement (sans semicolon) to choose the order in which to
618 allocate hard registers for pseudo-registers local to a basic
619 block.
621 Store the desired register order in the array `reg_alloc_order'.
622 Element 0 should be the register to allocate first; element 1, the
623 next register; and so on.
625 The macro body should not assume anything about the contents of
626 `reg_alloc_order' before execution of the macro.
628 On most machines, it is not necessary to define this macro. */
630 void
631 order_regs_for_local_alloc ()
633 int i, ch, order;
635 /* User specified the register allocation order. */
637 if (ix86_reg_alloc_order)
639 for (i = order = 0; (ch = ix86_reg_alloc_order[i]) != '\0'; i++)
641 int regno = 0;
643 switch (ch)
645 case 'a': regno = 0; break;
646 case 'd': regno = 1; break;
647 case 'c': regno = 2; break;
648 case 'b': regno = 3; break;
649 case 'S': regno = 4; break;
650 case 'D': regno = 5; break;
651 case 'B': regno = 6; break;
654 reg_alloc_order[order++] = regno;
657 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
659 if (! regs_allocated[i])
660 reg_alloc_order[order++] = i;
664 /* If user did not specify a register allocation order, use natural order. */
665 else
667 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
668 reg_alloc_order[i] = i;
672 void
673 optimization_options (level, size)
674 int level;
675 int size ATTRIBUTE_UNUSED;
677 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
678 make the problem with not enough registers even worse. */
679 #ifdef INSN_SCHEDULING
680 if (level > 1)
681 flag_schedule_insns = 0;
682 #endif
685 /* Return nonzero if IDENTIFIER with arguments ARGS is a valid machine specific
686 attribute for DECL. The attributes in ATTRIBUTES have previously been
687 assigned to DECL. */
690 ix86_valid_decl_attribute_p (decl, attributes, identifier, args)
691 tree decl ATTRIBUTE_UNUSED;
692 tree attributes ATTRIBUTE_UNUSED;
693 tree identifier ATTRIBUTE_UNUSED;
694 tree args ATTRIBUTE_UNUSED;
696 return 0;
699 /* Return nonzero if IDENTIFIER with arguments ARGS is a valid machine specific
700 attribute for TYPE. The attributes in ATTRIBUTES have previously been
701 assigned to TYPE. */
704 ix86_valid_type_attribute_p (type, attributes, identifier, args)
705 tree type;
706 tree attributes ATTRIBUTE_UNUSED;
707 tree identifier;
708 tree args;
710 if (TREE_CODE (type) != FUNCTION_TYPE
711 && TREE_CODE (type) != METHOD_TYPE
712 && TREE_CODE (type) != FIELD_DECL
713 && TREE_CODE (type) != TYPE_DECL)
714 return 0;
716 /* Stdcall attribute says callee is responsible for popping arguments
717 if they are not variable. */
718 if (is_attribute_p ("stdcall", identifier))
719 return (args == NULL_TREE);
721 /* Cdecl attribute says the callee is a normal C declaration. */
722 if (is_attribute_p ("cdecl", identifier))
723 return (args == NULL_TREE);
725 /* Regparm attribute specifies how many integer arguments are to be
726 passed in registers. */
727 if (is_attribute_p ("regparm", identifier))
729 tree cst;
731 if (! args || TREE_CODE (args) != TREE_LIST
732 || TREE_CHAIN (args) != NULL_TREE
733 || TREE_VALUE (args) == NULL_TREE)
734 return 0;
736 cst = TREE_VALUE (args);
737 if (TREE_CODE (cst) != INTEGER_CST)
738 return 0;
740 if (TREE_INT_CST_HIGH (cst) != 0
741 || TREE_INT_CST_LOW (cst) < 0
742 || TREE_INT_CST_LOW (cst) > REGPARM_MAX)
743 return 0;
745 return 1;
748 return 0;
751 /* Return 0 if the attributes for two types are incompatible, 1 if they
752 are compatible, and 2 if they are nearly compatible (which causes a
753 warning to be generated). */
756 ix86_comp_type_attributes (type1, type2)
757 tree type1;
758 tree type2;
760 /* Check for mismatch of non-default calling convention. */
761 const char *rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
763 if (TREE_CODE (type1) != FUNCTION_TYPE)
764 return 1;
766 /* Check for mismatched return types (cdecl vs stdcall). */
767 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
768 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
769 return 0;
770 return 1;
773 /* Value is the number of bytes of arguments automatically
774 popped when returning from a subroutine call.
775 FUNDECL is the declaration node of the function (as a tree),
776 FUNTYPE is the data type of the function (as a tree),
777 or for a library call it is an identifier node for the subroutine name.
778 SIZE is the number of bytes of arguments passed on the stack.
780 On the 80386, the RTD insn may be used to pop them if the number
781 of args is fixed, but if the number is variable then the caller
782 must pop them all. RTD can't be used for library calls now
783 because the library is compiled with the Unix compiler.
784 Use of RTD is a selectable option, since it is incompatible with
785 standard Unix calling sequences. If the option is not selected,
786 the caller must always pop the args.
788 The attribute stdcall is equivalent to RTD on a per module basis. */
791 ix86_return_pops_args (fundecl, funtype, size)
792 tree fundecl;
793 tree funtype;
794 int size;
796 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
798 /* Cdecl functions override -mrtd, and never pop the stack. */
799 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
801 /* Stdcall functions will pop the stack if not variable args. */
802 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype)))
803 rtd = 1;
805 if (rtd
806 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
807 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
808 == void_type_node)))
809 return size;
812 /* Lose any fake structure return argument. */
813 if (aggregate_value_p (TREE_TYPE (funtype)))
814 return GET_MODE_SIZE (Pmode);
816 return 0;
819 /* Argument support functions. */
821 /* Initialize a variable CUM of type CUMULATIVE_ARGS
822 for a call to a function whose data type is FNTYPE.
823 For a library call, FNTYPE is 0. */
825 void
826 init_cumulative_args (cum, fntype, libname)
827 CUMULATIVE_ARGS *cum; /* Argument info to initialize */
828 tree fntype; /* tree ptr for function decl */
829 rtx libname; /* SYMBOL_REF of library name or 0 */
831 static CUMULATIVE_ARGS zero_cum;
832 tree param, next_param;
834 if (TARGET_DEBUG_ARG)
836 fprintf (stderr, "\ninit_cumulative_args (");
837 if (fntype)
838 fprintf (stderr, "fntype code = %s, ret code = %s",
839 tree_code_name[(int) TREE_CODE (fntype)],
840 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
841 else
842 fprintf (stderr, "no fntype");
844 if (libname)
845 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
848 *cum = zero_cum;
850 /* Set up the number of registers to use for passing arguments. */
851 cum->nregs = ix86_regparm;
852 if (fntype)
854 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype));
856 if (attr)
857 cum->nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
860 /* Determine if this function has variable arguments. This is
861 indicated by the last argument being 'void_type_mode' if there
862 are no variable arguments. If there are variable arguments, then
863 we won't pass anything in registers */
865 if (cum->nregs)
867 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
868 param != 0; param = next_param)
870 next_param = TREE_CHAIN (param);
871 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
872 cum->nregs = 0;
876 if (TARGET_DEBUG_ARG)
877 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
879 return;
882 /* Update the data in CUM to advance over an argument
883 of mode MODE and data type TYPE.
884 (TYPE is null for libcalls where that information may not be available.) */
886 void
887 function_arg_advance (cum, mode, type, named)
888 CUMULATIVE_ARGS *cum; /* current arg information */
889 enum machine_mode mode; /* current arg mode */
890 tree type; /* type of the argument or 0 if lib support */
891 int named; /* whether or not the argument was named */
893 int bytes
894 = (mode == BLKmode) ? int_size_in_bytes (type) : GET_MODE_SIZE (mode);
895 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
897 if (TARGET_DEBUG_ARG)
898 fprintf (stderr,
899 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
900 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
902 cum->words += words;
903 cum->nregs -= words;
904 cum->regno += words;
906 if (cum->nregs <= 0)
908 cum->nregs = 0;
909 cum->regno = 0;
912 return;
915 /* Define where to put the arguments to a function.
916 Value is zero to push the argument on the stack,
917 or a hard register in which to store the argument.
919 MODE is the argument's machine mode.
920 TYPE is the data type of the argument (as a tree).
921 This is null for libcalls where that information may
922 not be available.
923 CUM is a variable of type CUMULATIVE_ARGS which gives info about
924 the preceding args and about the function being called.
925 NAMED is nonzero if this argument is a named parameter
926 (otherwise it is an extra parameter matching an ellipsis). */
928 struct rtx_def *
929 function_arg (cum, mode, type, named)
930 CUMULATIVE_ARGS *cum; /* current arg information */
931 enum machine_mode mode; /* current arg mode */
932 tree type; /* type of the argument or 0 if lib support */
933 int named; /* != 0 for normal args, == 0 for ... args */
935 rtx ret = NULL_RTX;
936 int bytes
937 = (mode == BLKmode) ? int_size_in_bytes (type) : GET_MODE_SIZE (mode);
938 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
940 switch (mode)
942 /* For now, pass fp/complex values on the stack. */
943 default:
944 break;
946 case BLKmode:
947 case DImode:
948 case SImode:
949 case HImode:
950 case QImode:
951 if (words <= cum->nregs)
952 ret = gen_rtx_REG (mode, cum->regno);
953 break;
956 if (TARGET_DEBUG_ARG)
958 fprintf (stderr,
959 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d",
960 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
962 if (ret)
963 fprintf (stderr, ", reg=%%e%s", reg_names[ REGNO(ret) ]);
964 else
965 fprintf (stderr, ", stack");
967 fprintf (stderr, " )\n");
970 return ret;
973 /* Returns 1 if OP is either a symbol reference or a sum of a symbol
974 reference and a constant. */
977 symbolic_operand (op, mode)
978 register rtx op;
979 enum machine_mode mode ATTRIBUTE_UNUSED;
981 switch (GET_CODE (op))
983 case SYMBOL_REF:
984 case LABEL_REF:
985 return 1;
987 case CONST:
988 op = XEXP (op, 0);
989 if (GET_CODE (op) == SYMBOL_REF
990 || GET_CODE (op) == LABEL_REF
991 || (GET_CODE (op) == UNSPEC
992 && XINT (op, 1) >= 6
993 && XINT (op, 1) <= 7))
994 return 1;
995 if (GET_CODE (op) != PLUS
996 || GET_CODE (XEXP (op, 1)) != CONST_INT)
997 return 0;
999 op = XEXP (op, 0);
1000 if (GET_CODE (op) == SYMBOL_REF
1001 || GET_CODE (op) == LABEL_REF)
1002 return 1;
1003 /* Only @GOTOFF gets offsets. */
1004 if (GET_CODE (op) != UNSPEC
1005 || XINT (op, 1) != 7)
1006 return 0;
1008 op = XVECEXP (op, 0, 0);
1009 if (GET_CODE (op) == SYMBOL_REF
1010 || GET_CODE (op) == LABEL_REF)
1011 return 1;
1012 return 0;
1014 default:
1015 return 0;
1019 /* Return true if the operand contains a @GOT or @GOTOFF reference. */
1022 pic_symbolic_operand (op, mode)
1023 register rtx op;
1024 enum machine_mode mode ATTRIBUTE_UNUSED;
1026 if (GET_CODE (op) == CONST)
1028 op = XEXP (op, 0);
1029 if (GET_CODE (op) == UNSPEC)
1030 return 1;
1031 if (GET_CODE (op) != PLUS
1032 || GET_CODE (XEXP (op, 1)) != CONST_INT)
1033 return 0;
1034 op = XEXP (op, 0);
1035 if (GET_CODE (op) == UNSPEC)
1036 return 1;
1038 return 0;
1041 /* Test for a valid operand for a call instruction. Don't allow the
1042 arg pointer register or virtual regs since they may decay into
1043 reg + const, which the patterns can't handle. */
1046 call_insn_operand (op, mode)
1047 rtx op;
1048 enum machine_mode mode ATTRIBUTE_UNUSED;
1050 if (GET_CODE (op) != MEM)
1051 return 0;
1052 op = XEXP (op, 0);
1054 /* Disallow indirect through a virtual register. This leads to
1055 compiler aborts when trying to eliminate them. */
1056 if (GET_CODE (op) == REG
1057 && (op == arg_pointer_rtx
1058 || op == frame_pointer_rtx
1059 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
1060 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
1061 return 0;
1063 /* Disallow `call 1234'. Due to varying assembler lameness this
1064 gets either rejected or translated to `call .+1234'. */
1065 if (GET_CODE (op) == CONST_INT)
1066 return 0;
1068 /* Otherwise we can allow any general_operand in the address. */
1069 return general_operand (op, Pmode);
1072 /* Like call_insn_operand but allow (mem (symbol_ref ...)) even if pic. */
1075 expander_call_insn_operand (op, mode)
1076 rtx op;
1077 enum machine_mode mode;
1079 if (GET_CODE (op) == MEM
1080 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF)
1081 return 1;
1083 return call_insn_operand (op, mode);
1087 constant_call_address_operand (op, mode)
1088 rtx op;
1089 enum machine_mode mode ATTRIBUTE_UNUSED;
1091 return GET_CODE (op) == MEM &&
1092 CONSTANT_ADDRESS_P (XEXP (op, 0)) &&
1093 GET_CODE (XEXP (op, 0)) != CONST_INT;
1096 /* Match exactly zero and one. */
1098 int
1099 const0_operand (op, mode)
1100 register rtx op;
1101 enum machine_mode mode;
1103 return op == CONST0_RTX (mode);
1106 int
1107 const1_operand (op, mode)
1108 register rtx op;
1109 enum machine_mode mode ATTRIBUTE_UNUSED;
1111 return op == const1_rtx;
1114 /* Match 2, 4, or 8. Used for leal multiplicands. */
1117 const248_operand (op, mode)
1118 register rtx op;
1119 enum machine_mode mode ATTRIBUTE_UNUSED;
1121 return (GET_CODE (op) == CONST_INT
1122 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
1125 /* True if this is a constant appropriate for an increment or decremenmt. */
1128 incdec_operand (op, mode)
1129 register rtx op;
1130 enum machine_mode mode;
1132 if (op == const1_rtx || op == constm1_rtx)
1133 return 1;
1134 if (GET_CODE (op) != CONST_INT)
1135 return 0;
1136 if (mode == SImode && INTVAL (op) == (HOST_WIDE_INT) 0xffffffff)
1137 return 1;
1138 if (mode == HImode && INTVAL (op) == (HOST_WIDE_INT) 0xffff)
1139 return 1;
1140 if (mode == QImode && INTVAL (op) == (HOST_WIDE_INT) 0xff)
1141 return 1;
1142 return 0;
1145 /* Return false if this is the stack pointer, or any other fake
1146 register eliminable to the stack pointer. Otherwise, this is
1147 a register operand.
1149 This is used to prevent esp from being used as an index reg.
1150 Which would only happen in pathological cases. */
1153 reg_no_sp_operand (op, mode)
1154 register rtx op;
1155 enum machine_mode mode;
1157 rtx t = op;
1158 if (GET_CODE (t) == SUBREG)
1159 t = SUBREG_REG (t);
1160 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
1161 return 0;
1163 return register_operand (op, mode);
1166 /* Return false if this is any eliminable register. Otherwise
1167 general_operand. */
1170 general_no_elim_operand (op, mode)
1171 register rtx op;
1172 enum machine_mode mode;
1174 rtx t = op;
1175 if (GET_CODE (t) == SUBREG)
1176 t = SUBREG_REG (t);
1177 if (t == arg_pointer_rtx || t == frame_pointer_rtx
1178 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
1179 || t == virtual_stack_dynamic_rtx)
1180 return 0;
1182 return general_operand (op, mode);
1185 /* Return false if this is any eliminable register. Otherwise
1186 register_operand or const_int. */
1189 nonmemory_no_elim_operand (op, mode)
1190 register rtx op;
1191 enum machine_mode mode;
1193 rtx t = op;
1194 if (GET_CODE (t) == SUBREG)
1195 t = SUBREG_REG (t);
1196 if (t == arg_pointer_rtx || t == frame_pointer_rtx
1197 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
1198 || t == virtual_stack_dynamic_rtx)
1199 return 0;
1201 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
1204 /* Return true if op is a Q_REGS class register. */
1207 q_regs_operand (op, mode)
1208 register rtx op;
1209 enum machine_mode mode;
1211 if (mode != VOIDmode && GET_MODE (op) != mode)
1212 return 0;
1213 if (GET_CODE (op) == SUBREG)
1214 op = SUBREG_REG (op);
1215 return QI_REG_P (op);
1218 /* Return true if op is a NON_Q_REGS class register. */
1221 non_q_regs_operand (op, mode)
1222 register rtx op;
1223 enum machine_mode mode;
1225 if (mode != VOIDmode && GET_MODE (op) != mode)
1226 return 0;
1227 if (GET_CODE (op) == SUBREG)
1228 op = SUBREG_REG (op);
1229 return NON_QI_REG_P (op);
1232 /* Return 1 if OP is a comparison operator that can use the condition code
1233 generated by a logical operation, which characteristicly does not set
1234 overflow or carry. To be used with CCNOmode. */
1237 no_comparison_operator (op, mode)
1238 register rtx op;
1239 enum machine_mode mode;
1241 return ((mode == VOIDmode || GET_MODE (op) == mode)
1242 && GET_RTX_CLASS (GET_CODE (op)) == '<'
1243 && GET_CODE (op) != LE
1244 && GET_CODE (op) != GT);
1247 /* Return 1 if OP is a comparison operator that can be issued by fcmov. */
1250 fcmov_comparison_operator (op, mode)
1251 register rtx op;
1252 enum machine_mode mode;
1254 return ((mode == VOIDmode || GET_MODE (op) == mode)
1255 && GET_RTX_CLASS (GET_CODE (op)) == '<'
1256 && GET_CODE (op) == unsigned_condition (GET_CODE (op)));
1259 /* Return 1 if OP is a binary operator that can be promoted to wider mode. */
1262 promotable_binary_operator (op, mode)
1263 register rtx op;
1264 enum machine_mode mode ATTRIBUTE_UNUSED;
1266 switch (GET_CODE (op))
1268 case MULT:
1269 /* Modern CPUs have same latency for HImode and SImode multiply,
1270 but 386 and 486 do HImode multiply faster. */
1271 return ix86_cpu > PROCESSOR_I486;
1272 case PLUS:
1273 case AND:
1274 case IOR:
1275 case XOR:
1276 case ASHIFT:
1277 return 1;
1278 default:
1279 return 0;
1283 /* Nearly general operand, but accept any const_double, since we wish
1284 to be able to drop them into memory rather than have them get pulled
1285 into registers. */
1288 cmp_fp_expander_operand (op, mode)
1289 register rtx op;
1290 enum machine_mode mode;
1292 if (mode != VOIDmode && mode != GET_MODE (op))
1293 return 0;
1294 if (GET_CODE (op) == CONST_DOUBLE)
1295 return 1;
1296 return general_operand (op, mode);
1299 /* Match an SI or HImode register for a zero_extract. */
1302 ext_register_operand (op, mode)
1303 register rtx op;
1304 enum machine_mode mode ATTRIBUTE_UNUSED;
1306 if (GET_MODE (op) != SImode && GET_MODE (op) != HImode)
1307 return 0;
1308 return register_operand (op, VOIDmode);
1311 /* Return 1 if this is a valid binary floating-point operation.
1312 OP is the expression matched, and MODE is its mode. */
1315 binary_fp_operator (op, mode)
1316 register rtx op;
1317 enum machine_mode mode;
1319 if (mode != VOIDmode && mode != GET_MODE (op))
1320 return 0;
1322 switch (GET_CODE (op))
1324 case PLUS:
1325 case MINUS:
1326 case MULT:
1327 case DIV:
1328 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
1330 default:
1331 return 0;
1336 mult_operator(op, mode)
1337 register rtx op;
1338 enum machine_mode mode ATTRIBUTE_UNUSED;
1340 return GET_CODE (op) == MULT;
1344 div_operator(op, mode)
1345 register rtx op;
1346 enum machine_mode mode ATTRIBUTE_UNUSED;
1348 return GET_CODE (op) == DIV;
1352 arith_or_logical_operator (op, mode)
1353 rtx op;
1354 enum machine_mode mode;
1356 return ((mode == VOIDmode || GET_MODE (op) == mode)
1357 && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
1358 || GET_RTX_CLASS (GET_CODE (op)) == '2'));
1361 /* Returns 1 if OP is memory operand with a displacement. */
1364 memory_displacement_operand (op, mode)
1365 register rtx op;
1366 enum machine_mode mode;
1368 struct ix86_address parts;
1370 if (! memory_operand (op, mode))
1371 return 0;
1373 if (! ix86_decompose_address (XEXP (op, 0), &parts))
1374 abort ();
1376 return parts.disp != NULL_RTX;
1379 /* To avoid problems when jump re-emits comparisons like testqi_ext_0,
1380 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
1382 ??? It seems likely that this will only work because cmpsi is an
1383 expander, and no actual insns use this. */
1386 cmpsi_operand (op, mode)
1387 rtx op;
1388 enum machine_mode mode;
1390 if (general_operand (op, mode))
1391 return 1;
1393 if (GET_CODE (op) == AND
1394 && GET_MODE (op) == SImode
1395 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
1396 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
1397 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
1398 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
1399 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
1400 && GET_CODE (XEXP (op, 1)) == CONST_INT)
1401 return 1;
1403 return 0;
1406 /* Returns 1 if OP is memory operand that can not be represented by the
1407 modRM array. */
1410 long_memory_operand (op, mode)
1411 register rtx op;
1412 enum machine_mode mode;
1414 if (! memory_operand (op, mode))
1415 return 0;
1417 return memory_address_length (op) != 0;
1420 /* Return nonzero if the rtx is known aligned. */
1423 aligned_operand (op, mode)
1424 rtx op;
1425 enum machine_mode mode;
1427 struct ix86_address parts;
1429 if (!general_operand (op, mode))
1430 return 0;
1432 /* Registers and immediate operands are always "aligned". */
1433 if (GET_CODE (op) != MEM)
1434 return 1;
1436 /* Don't even try to do any aligned optimizations with volatiles. */
1437 if (MEM_VOLATILE_P (op))
1438 return 0;
1440 op = XEXP (op, 0);
1442 /* Pushes and pops are only valid on the stack pointer. */
1443 if (GET_CODE (op) == PRE_DEC
1444 || GET_CODE (op) == POST_INC)
1445 return 1;
1447 /* Decode the address. */
1448 if (! ix86_decompose_address (op, &parts))
1449 abort ();
1451 /* Look for some component that isn't known to be aligned. */
1452 if (parts.index)
1454 if (parts.scale < 4
1455 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 4)
1456 return 0;
1458 if (parts.base)
1460 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 4)
1461 return 0;
1463 if (parts.disp)
1465 if (GET_CODE (parts.disp) != CONST_INT
1466 || (INTVAL (parts.disp) & 3) != 0)
1467 return 0;
1470 /* Didn't find one -- this must be an aligned address. */
1471 return 1;
1474 /* Return true if the constant is something that can be loaded with
1475 a special instruction. Only handle 0.0 and 1.0; others are less
1476 worthwhile. */
1479 standard_80387_constant_p (x)
1480 rtx x;
1482 if (GET_CODE (x) != CONST_DOUBLE)
1483 return -1;
1485 #if ! defined (REAL_IS_NOT_DOUBLE) || defined (REAL_ARITHMETIC)
1487 REAL_VALUE_TYPE d;
1488 jmp_buf handler;
1489 int is0, is1;
1491 if (setjmp (handler))
1492 return 0;
1494 set_float_handler (handler);
1495 REAL_VALUE_FROM_CONST_DOUBLE (d, x);
1496 is0 = REAL_VALUES_EQUAL (d, dconst0) && !REAL_VALUE_MINUS_ZERO (d);
1497 is1 = REAL_VALUES_EQUAL (d, dconst1);
1498 set_float_handler (NULL_PTR);
1500 if (is0)
1501 return 1;
1503 if (is1)
1504 return 2;
1506 /* Note that on the 80387, other constants, such as pi,
1507 are much slower to load as standard constants
1508 than to load from doubles in memory! */
1509 /* ??? Not true on K6: all constants are equal cost. */
1511 #endif
1513 return 0;
1516 /* Returns 1 if OP contains a symbol reference */
1519 symbolic_reference_mentioned_p (op)
1520 rtx op;
1522 register const char *fmt;
1523 register int i;
1525 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
1526 return 1;
1528 fmt = GET_RTX_FORMAT (GET_CODE (op));
1529 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
1531 if (fmt[i] == 'E')
1533 register int j;
1535 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
1536 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
1537 return 1;
1540 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
1541 return 1;
1544 return 0;
1547 /* Return 1 if it is appropriate to emit `ret' instructions in the
1548 body of a function. Do this only if the epilogue is simple, needing a
1549 couple of insns. Prior to reloading, we can't tell how many registers
1550 must be saved, so return 0 then. Return 0 if there is no frame
1551 marker to de-allocate.
1553 If NON_SAVING_SETJMP is defined and true, then it is not possible
1554 for the epilogue to be simple, so return 0. This is a special case
1555 since NON_SAVING_SETJMP will not cause regs_ever_live to change
1556 until final, but jump_optimize may need to know sooner if a
1557 `return' is OK. */
1560 ix86_can_use_return_insn_p ()
1562 HOST_WIDE_INT tsize;
1563 int nregs;
1565 #ifdef NON_SAVING_SETJMP
1566 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
1567 return 0;
1568 #endif
1569 #ifdef FUNCTION_BLOCK_PROFILER_EXIT
1570 if (profile_block_flag == 2)
1571 return 0;
1572 #endif
1574 if (! reload_completed || frame_pointer_needed)
1575 return 0;
1577 /* Don't allow more than 32 pop, since that's all we can do
1578 with one instruction. */
1579 if (current_function_pops_args
1580 && current_function_args_size >= 32768)
1581 return 0;
1583 tsize = ix86_compute_frame_size (get_frame_size (), &nregs, NULL, NULL);
1584 return tsize == 0 && nregs == 0;
1587 static char *pic_label_name;
1588 static int pic_label_output;
1589 static char *global_offset_table_name;
1591 /* This function generates code for -fpic that loads %ebx with
1592 the return address of the caller and then returns. */
1594 void
1595 asm_output_function_prefix (file, name)
1596 FILE *file;
1597 const char *name ATTRIBUTE_UNUSED;
1599 rtx xops[2];
1600 int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
1601 || current_function_uses_const_pool);
1602 xops[0] = pic_offset_table_rtx;
1603 xops[1] = stack_pointer_rtx;
1605 /* Deep branch prediction favors having a return for every call. */
1606 if (pic_reg_used && TARGET_DEEP_BRANCH_PREDICTION)
1608 if (!pic_label_output)
1610 /* This used to call ASM_DECLARE_FUNCTION_NAME() but since it's an
1611 internal (non-global) label that's being emitted, it didn't make
1612 sense to have .type information for local labels. This caused
1613 the SCO OpenServer 5.0.4 ELF assembler grief (why are you giving
1614 me debug info for a label that you're declaring non-global?) this
1615 was changed to call ASM_OUTPUT_LABEL() instead. */
1617 ASM_OUTPUT_LABEL (file, pic_label_name);
1619 xops[1] = gen_rtx_MEM (SImode, xops[1]);
1620 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
1621 output_asm_insn ("ret", xops);
1623 pic_label_output = 1;
1628 void
1629 load_pic_register ()
1631 rtx gotsym, pclab;
1633 if (global_offset_table_name == NULL)
1635 global_offset_table_name =
1636 ggc_alloc_string ("_GLOBAL_OFFSET_TABLE_", 21);
1637 ggc_add_string_root (&global_offset_table_name, 1);
1639 gotsym = gen_rtx_SYMBOL_REF (Pmode, global_offset_table_name);
1641 if (TARGET_DEEP_BRANCH_PREDICTION)
1643 if (pic_label_name == NULL)
1645 pic_label_name = ggc_alloc_string (NULL, 32);
1646 ggc_add_string_root (&pic_label_name, 1);
1647 ASM_GENERATE_INTERNAL_LABEL (pic_label_name, "LPR", 0);
1649 pclab = gen_rtx_MEM (QImode, gen_rtx_SYMBOL_REF (Pmode, pic_label_name));
1651 else
1653 pclab = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
1656 emit_insn (gen_prologue_get_pc (pic_offset_table_rtx, pclab));
1658 if (! TARGET_DEEP_BRANCH_PREDICTION)
1659 emit_insn (gen_popsi1 (pic_offset_table_rtx));
1661 emit_insn (gen_prologue_set_got (pic_offset_table_rtx, gotsym, pclab));
1664 /* Generate an SImode "push" pattern for input ARG. */
1666 static rtx
1667 gen_push (arg)
1668 rtx arg;
1670 return gen_rtx_SET (VOIDmode,
1671 gen_rtx_MEM (SImode,
1672 gen_rtx_PRE_DEC (SImode,
1673 stack_pointer_rtx)),
1674 arg);
1677 /* Return number of registers to be saved on the stack. */
1679 static int
1680 ix86_nsaved_regs ()
1682 int nregs = 0;
1683 int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
1684 || current_function_uses_const_pool);
1685 int limit = (frame_pointer_needed
1686 ? HARD_FRAME_POINTER_REGNUM : STACK_POINTER_REGNUM);
1687 int regno;
1689 for (regno = limit - 1; regno >= 0; regno--)
1690 if ((regs_ever_live[regno] && ! call_used_regs[regno])
1691 || (regno == PIC_OFFSET_TABLE_REGNUM && pic_reg_used))
1693 nregs ++;
1695 return nregs;
1698 /* Return the offset between two registers, one to be eliminated, and the other
1699 its replacement, at the start of a routine. */
1701 HOST_WIDE_INT
1702 ix86_initial_elimination_offset (from, to)
1703 int from;
1704 int to;
1706 int padding1;
1707 int nregs;
1709 /* Stack grows downward:
1711 [arguments]
1712 <- ARG_POINTER
1713 saved pc
1715 saved frame pointer if frame_pointer_needed
1716 <- HARD_FRAME_POINTER
1717 [saved regs]
1719 [padding1] \
1720 | <- FRAME_POINTER
1721 [frame] > tsize
1723 [padding2] /
1726 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
1727 /* Skip saved PC and previous frame pointer.
1728 Executed only when frame_pointer_needed. */
1729 return 8;
1730 else if (from == FRAME_POINTER_REGNUM
1731 && to == HARD_FRAME_POINTER_REGNUM)
1733 ix86_compute_frame_size (get_frame_size (), &nregs, &padding1, (int *)0);
1734 padding1 += nregs * UNITS_PER_WORD;
1735 return -padding1;
1737 else
1739 /* ARG_POINTER or FRAME_POINTER to STACK_POINTER elimination. */
1740 int frame_size = frame_pointer_needed ? 8 : 4;
1741 HOST_WIDE_INT tsize = ix86_compute_frame_size (get_frame_size (),
1742 &nregs, &padding1, (int *)0);
1745 if (to != STACK_POINTER_REGNUM)
1746 abort ();
1747 else if (from == ARG_POINTER_REGNUM)
1748 return tsize + nregs * UNITS_PER_WORD + frame_size;
1749 else if (from != FRAME_POINTER_REGNUM)
1750 abort ();
1751 else
1752 return tsize - padding1;
1756 /* Compute the size of local storage taking into consideration the
1757 desired stack alignment which is to be maintained. Also determine
1758 the number of registers saved below the local storage.
1760 PADDING1 returns padding before stack frame and PADDING2 returns
1761 padding after stack frame;
1764 static HOST_WIDE_INT
1765 ix86_compute_frame_size (size, nregs_on_stack, rpadding1, rpadding2)
1766 HOST_WIDE_INT size;
1767 int *nregs_on_stack;
1768 int *rpadding1;
1769 int *rpadding2;
1771 int nregs;
1772 int padding1 = 0;
1773 int padding2 = 0;
1774 HOST_WIDE_INT total_size;
1775 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
1776 int offset;
1777 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
1779 nregs = ix86_nsaved_regs ();
1780 total_size = size;
1782 offset = frame_pointer_needed ? 8 : 4;
1784 /* Do some sanity checking of stack_alignment_needed and preferred_alignment,
1785 since i386 port is the only using those features that may break easilly. */
1787 if (size && !stack_alignment_needed)
1788 abort ();
1789 if (!size && stack_alignment_needed)
1790 abort ();
1791 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
1792 abort ();
1793 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
1794 abort ();
1795 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
1796 abort ();
1798 if (stack_alignment_needed < 4)
1799 stack_alignment_needed = 4;
1801 offset += nregs * UNITS_PER_WORD;
1803 total_size += offset;
1805 /* Align start of frame for local function. */
1806 padding1 = ((offset + stack_alignment_needed - 1)
1807 & -stack_alignment_needed) - offset;
1808 total_size += padding1;
1810 /* Align stack boundary. */
1811 padding2 = ((total_size + preferred_alignment - 1)
1812 & -preferred_alignment) - total_size;
1814 if (nregs_on_stack)
1815 *nregs_on_stack = nregs;
1816 if (rpadding1)
1817 *rpadding1 = padding1;
1818 if (rpadding2)
1819 *rpadding2 = padding2;
1821 return size + padding1 + padding2;
1824 /* Emit code to save registers in the prologue. */
1826 static void
1827 ix86_emit_save_regs ()
1829 register int regno;
1830 int limit;
1831 rtx insn;
1832 int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
1833 || current_function_uses_const_pool);
1834 limit = (frame_pointer_needed
1835 ? HARD_FRAME_POINTER_REGNUM : STACK_POINTER_REGNUM);
1837 for (regno = limit - 1; regno >= 0; regno--)
1838 if ((regs_ever_live[regno] && !call_used_regs[regno])
1839 || (regno == PIC_OFFSET_TABLE_REGNUM && pic_reg_used))
1841 insn = emit_insn (gen_push (gen_rtx_REG (SImode, regno)));
1842 RTX_FRAME_RELATED_P (insn) = 1;
1846 /* Expand the prologue into a bunch of separate insns. */
1848 void
1849 ix86_expand_prologue ()
1851 HOST_WIDE_INT tsize = ix86_compute_frame_size (get_frame_size (), (int *)0, (int *)0,
1852 (int *)0);
1853 rtx insn;
1854 int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
1855 || current_function_uses_const_pool);
1857 /* Note: AT&T enter does NOT have reversed args. Enter is probably
1858 slower on all targets. Also sdb doesn't like it. */
1860 if (frame_pointer_needed)
1862 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
1863 RTX_FRAME_RELATED_P (insn) = 1;
1865 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
1866 RTX_FRAME_RELATED_P (insn) = 1;
1869 ix86_emit_save_regs ();
1871 if (tsize == 0)
1873 else if (! TARGET_STACK_PROBE || tsize < CHECK_STACK_LIMIT)
1875 if (frame_pointer_needed)
1876 insn = emit_insn (gen_pro_epilogue_adjust_stack
1877 (stack_pointer_rtx, stack_pointer_rtx,
1878 GEN_INT (-tsize), hard_frame_pointer_rtx));
1879 else
1880 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
1881 GEN_INT (-tsize)));
1882 RTX_FRAME_RELATED_P (insn) = 1;
1884 else
1886 /* ??? Is this only valid for Win32? */
1888 rtx arg0, sym;
1890 arg0 = gen_rtx_REG (SImode, 0);
1891 emit_move_insn (arg0, GEN_INT (tsize));
1893 sym = gen_rtx_MEM (FUNCTION_MODE,
1894 gen_rtx_SYMBOL_REF (Pmode, "_alloca"));
1895 insn = emit_call_insn (gen_call (sym, const0_rtx));
1897 CALL_INSN_FUNCTION_USAGE (insn)
1898 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, arg0),
1899 CALL_INSN_FUNCTION_USAGE (insn));
1902 #ifdef SUBTARGET_PROLOGUE
1903 SUBTARGET_PROLOGUE;
1904 #endif
1906 if (pic_reg_used)
1907 load_pic_register ();
1909 /* If we are profiling, make sure no instructions are scheduled before
1910 the call to mcount. However, if -fpic, the above call will have
1911 done that. */
1912 if ((profile_flag || profile_block_flag) && ! pic_reg_used)
1913 emit_insn (gen_blockage ());
1916 /* Emit code to add TSIZE to esp value. Use POP instruction when
1917 profitable. */
1919 static void
1920 ix86_emit_epilogue_esp_adjustment (tsize)
1921 int tsize;
1923 /* Intel's docs say that for 4 or 8 bytes of stack frame one should
1924 use `pop' and not `add'. */
1925 int use_pop = tsize == 4;
1926 rtx edx = 0, ecx;
1928 /* Use two pops only for the Pentium processors. */
1929 if (tsize == 8 && !TARGET_386 && !TARGET_486)
1931 rtx retval = current_function_return_rtx;
1933 edx = gen_rtx_REG (SImode, 1);
1935 /* This case is a bit more complex. Since we cannot pop into
1936 %ecx twice we need a second register. But this is only
1937 available if the return value is not of DImode in which
1938 case the %edx register is not available. */
1939 use_pop = (retval == NULL
1940 || !reg_overlap_mentioned_p (edx, retval));
1943 if (use_pop)
1945 ecx = gen_rtx_REG (SImode, 2);
1947 /* We have to prevent the two pops here from being scheduled.
1948 GCC otherwise would try in some situation to put other
1949 instructions in between them which has a bad effect. */
1950 emit_insn (gen_blockage ());
1951 emit_insn (gen_popsi1 (ecx));
1952 if (tsize == 8)
1953 emit_insn (gen_popsi1 (edx));
1955 else
1957 /* If a frame pointer is present, we must be sure to tie the sp
1958 to the fp so that we don't mis-schedule. */
1959 if (frame_pointer_needed)
1960 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
1961 stack_pointer_rtx,
1962 GEN_INT (tsize),
1963 hard_frame_pointer_rtx));
1964 else
1965 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
1966 GEN_INT (tsize)));
1970 /* Emit code to restore saved registers using MOV insns. First register
1971 is restored from POINTER + OFFSET. */
1972 static void
1973 ix86_emit_restore_regs_using_mov (pointer, offset)
1974 rtx pointer;
1975 int offset;
1977 int regno;
1978 int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
1979 || current_function_uses_const_pool);
1980 int limit = (frame_pointer_needed
1981 ? HARD_FRAME_POINTER_REGNUM : STACK_POINTER_REGNUM);
1983 for (regno = 0; regno < limit; regno++)
1984 if ((regs_ever_live[regno] && !call_used_regs[regno])
1985 || (regno == PIC_OFFSET_TABLE_REGNUM && pic_reg_used))
1987 emit_move_insn (gen_rtx_REG (SImode, regno),
1988 adj_offsettable_operand (gen_rtx_MEM (SImode,
1989 pointer),
1990 offset));
1991 offset += 4;
1995 /* Restore function stack, frame, and registers. */
1997 void
1998 ix86_expand_epilogue ()
2000 int nregs;
2001 int regno;
2003 int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
2004 || current_function_uses_const_pool);
2005 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
2006 HOST_WIDE_INT offset;
2007 HOST_WIDE_INT tsize = ix86_compute_frame_size (get_frame_size (), &nregs,
2008 (int *)0, (int *)0);
2011 /* Calculate start of saved registers relative to ebp. */
2012 offset = -nregs * UNITS_PER_WORD;
2014 #ifdef FUNCTION_BLOCK_PROFILER_EXIT
2015 if (profile_block_flag == 2)
2017 FUNCTION_BLOCK_PROFILER_EXIT;
2019 #endif
2021 /* If we're only restoring one register and sp is not valid then
2022 using a move instruction to restore the register since it's
2023 less work than reloading sp and popping the register.
2025 The default code result in stack adjustment using add/lea instruction,
2026 while this code results in LEAVE instruction (or discrete equivalent),
2027 so it is profitable in some other cases as well. Especially when there
2028 are no registers to restore. We also use this code when TARGET_USE_LEAVE
2029 and there is exactly one register to pop. This heruistic may need some
2030 tuning in future. */
2031 if ((!sp_valid && nregs <= 1)
2032 || (frame_pointer_needed && !nregs && tsize)
2033 || (frame_pointer_needed && TARGET_USE_LEAVE && !optimize_size
2034 && nregs == 1))
2036 /* Restore registers. We can use ebp or esp to address the memory
2037 locations. If both are available, default to ebp, since offsets
2038 are known to be small. Only exception is esp pointing directly to the
2039 end of block of saved registers, where we may simplify addressing
2040 mode. */
2042 if (!frame_pointer_needed || (sp_valid && !tsize))
2043 ix86_emit_restore_regs_using_mov (stack_pointer_rtx, tsize);
2044 else
2045 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx, offset);
2047 if (!frame_pointer_needed)
2048 ix86_emit_epilogue_esp_adjustment (tsize + nregs * UNITS_PER_WORD);
2049 /* If not an i386, mov & pop is faster than "leave". */
2050 else if (TARGET_USE_LEAVE || optimize_size)
2051 emit_insn (gen_leave ());
2052 else
2054 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
2055 hard_frame_pointer_rtx,
2056 const0_rtx,
2057 hard_frame_pointer_rtx));
2058 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
2061 else
2063 /* First step is to deallocate the stack frame so that we can
2064 pop the registers. */
2065 if (!sp_valid)
2067 if (!frame_pointer_needed)
2068 abort ();
2069 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
2070 hard_frame_pointer_rtx,
2071 GEN_INT (offset),
2072 hard_frame_pointer_rtx));
2074 else if (tsize)
2075 ix86_emit_epilogue_esp_adjustment (tsize);
2077 for (regno = 0; regno < STACK_POINTER_REGNUM; regno++)
2078 if ((regs_ever_live[regno] && !call_used_regs[regno])
2079 || (regno == PIC_OFFSET_TABLE_REGNUM && pic_reg_used))
2080 emit_insn (gen_popsi1 (gen_rtx_REG (SImode, regno)));
2083 if (current_function_pops_args && current_function_args_size)
2085 rtx popc = GEN_INT (current_function_pops_args);
2087 /* i386 can only pop 32K bytes (maybe 64K? Is it signed?). If
2088 asked to pop more, pop return address, do explicit add, and jump
2089 indirectly to the caller. */
2091 if (current_function_pops_args >= 32768)
2093 rtx ecx = gen_rtx_REG (SImode, 2);
2095 emit_insn (gen_popsi1 (ecx));
2096 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
2097 emit_indirect_jump (ecx);
2099 else
2100 emit_jump_insn (gen_return_pop_internal (popc));
2102 else
2103 emit_jump_insn (gen_return_internal ());
2106 /* Extract the parts of an RTL expression that is a valid memory address
2107 for an instruction. Return false if the structure of the address is
2108 grossly off. */
2110 static int
2111 ix86_decompose_address (addr, out)
2112 register rtx addr;
2113 struct ix86_address *out;
2115 rtx base = NULL_RTX;
2116 rtx index = NULL_RTX;
2117 rtx disp = NULL_RTX;
2118 HOST_WIDE_INT scale = 1;
2119 rtx scale_rtx = NULL_RTX;
2121 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
2122 base = addr;
2123 else if (GET_CODE (addr) == PLUS)
2125 rtx op0 = XEXP (addr, 0);
2126 rtx op1 = XEXP (addr, 1);
2127 enum rtx_code code0 = GET_CODE (op0);
2128 enum rtx_code code1 = GET_CODE (op1);
2130 if (code0 == REG || code0 == SUBREG)
2132 if (code1 == REG || code1 == SUBREG)
2133 index = op0, base = op1; /* index + base */
2134 else
2135 base = op0, disp = op1; /* base + displacement */
2137 else if (code0 == MULT)
2139 index = XEXP (op0, 0);
2140 scale_rtx = XEXP (op0, 1);
2141 if (code1 == REG || code1 == SUBREG)
2142 base = op1; /* index*scale + base */
2143 else
2144 disp = op1; /* index*scale + disp */
2146 else if (code0 == PLUS && GET_CODE (XEXP (op0, 0)) == MULT)
2148 index = XEXP (XEXP (op0, 0), 0); /* index*scale + base + disp */
2149 scale_rtx = XEXP (XEXP (op0, 0), 1);
2150 base = XEXP (op0, 1);
2151 disp = op1;
2153 else if (code0 == PLUS)
2155 index = XEXP (op0, 0); /* index + base + disp */
2156 base = XEXP (op0, 1);
2157 disp = op1;
2159 else
2160 return FALSE;
2162 else if (GET_CODE (addr) == MULT)
2164 index = XEXP (addr, 0); /* index*scale */
2165 scale_rtx = XEXP (addr, 1);
2167 else if (GET_CODE (addr) == ASHIFT)
2169 rtx tmp;
2171 /* We're called for lea too, which implements ashift on occasion. */
2172 index = XEXP (addr, 0);
2173 tmp = XEXP (addr, 1);
2174 if (GET_CODE (tmp) != CONST_INT)
2175 return FALSE;
2176 scale = INTVAL (tmp);
2177 if ((unsigned HOST_WIDE_INT) scale > 3)
2178 return FALSE;
2179 scale = 1 << scale;
2181 else
2182 disp = addr; /* displacement */
2184 /* Extract the integral value of scale. */
2185 if (scale_rtx)
2187 if (GET_CODE (scale_rtx) != CONST_INT)
2188 return FALSE;
2189 scale = INTVAL (scale_rtx);
2192 /* Allow arg pointer and stack pointer as index if there is not scaling */
2193 if (base && index && scale == 1
2194 && (index == arg_pointer_rtx || index == frame_pointer_rtx
2195 || index == stack_pointer_rtx))
2197 rtx tmp = base;
2198 base = index;
2199 index = tmp;
2202 /* Special case: %ebp cannot be encoded as a base without a displacement. */
2203 if ((base == hard_frame_pointer_rtx
2204 || base == frame_pointer_rtx
2205 || base == arg_pointer_rtx) && !disp)
2206 disp = const0_rtx;
2208 /* Special case: on K6, [%esi] makes the instruction vector decoded.
2209 Avoid this by transforming to [%esi+0]. */
2210 if (ix86_cpu == PROCESSOR_K6 && !optimize_size
2211 && base && !index && !disp
2212 && REG_P (base)
2213 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
2214 disp = const0_rtx;
2216 /* Special case: encode reg+reg instead of reg*2. */
2217 if (!base && index && scale && scale == 2)
2218 base = index, scale = 1;
2220 /* Special case: scaling cannot be encoded without base or displacement. */
2221 if (!base && !disp && index && scale != 1)
2222 disp = const0_rtx;
2224 out->base = base;
2225 out->index = index;
2226 out->disp = disp;
2227 out->scale = scale;
2229 return TRUE;
2232 /* Determine if a given CONST RTX is a valid memory displacement
2233 in PIC mode. */
2236 legitimate_pic_address_disp_p (disp)
2237 register rtx disp;
2239 if (GET_CODE (disp) != CONST)
2240 return 0;
2241 disp = XEXP (disp, 0);
2243 if (GET_CODE (disp) == PLUS)
2245 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
2246 return 0;
2247 disp = XEXP (disp, 0);
2250 if (GET_CODE (disp) != UNSPEC
2251 || XVECLEN (disp, 0) != 1)
2252 return 0;
2254 /* Must be @GOT or @GOTOFF. */
2255 if (XINT (disp, 1) != 6
2256 && XINT (disp, 1) != 7)
2257 return 0;
2259 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
2260 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
2261 return 0;
2263 return 1;
2266 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
2267 memory address for an instruction. The MODE argument is the machine mode
2268 for the MEM expression that wants to use this address.
2270 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
2271 convert common non-canonical forms to canonical form so that they will
2272 be recognized. */
2275 legitimate_address_p (mode, addr, strict)
2276 enum machine_mode mode;
2277 register rtx addr;
2278 int strict;
2280 struct ix86_address parts;
2281 rtx base, index, disp;
2282 HOST_WIDE_INT scale;
2283 const char *reason = NULL;
2284 rtx reason_rtx = NULL_RTX;
2286 if (TARGET_DEBUG_ADDR)
2288 fprintf (stderr,
2289 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
2290 GET_MODE_NAME (mode), strict);
2291 debug_rtx (addr);
2294 if (! ix86_decompose_address (addr, &parts))
2296 reason = "decomposition failed";
2297 goto error;
2300 base = parts.base;
2301 index = parts.index;
2302 disp = parts.disp;
2303 scale = parts.scale;
2305 /* Validate base register.
2307 Don't allow SUBREG's here, it can lead to spill failures when the base
2308 is one word out of a two word structure, which is represented internally
2309 as a DImode int. */
2311 if (base)
2313 reason_rtx = base;
2315 if (GET_CODE (base) != REG)
2317 reason = "base is not a register";
2318 goto error;
2321 if (GET_MODE (base) != Pmode)
2323 reason = "base is not in Pmode";
2324 goto error;
2327 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (base))
2328 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (base)))
2330 reason = "base is not valid";
2331 goto error;
2335 /* Validate index register.
2337 Don't allow SUBREG's here, it can lead to spill failures when the index
2338 is one word out of a two word structure, which is represented internally
2339 as a DImode int. */
2341 if (index)
2343 reason_rtx = index;
2345 if (GET_CODE (index) != REG)
2347 reason = "index is not a register";
2348 goto error;
2351 if (GET_MODE (index) != Pmode)
2353 reason = "index is not in Pmode";
2354 goto error;
2357 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (index))
2358 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (index)))
2360 reason = "index is not valid";
2361 goto error;
2365 /* Validate scale factor. */
2366 if (scale != 1)
2368 reason_rtx = GEN_INT (scale);
2369 if (!index)
2371 reason = "scale without index";
2372 goto error;
2375 if (scale != 2 && scale != 4 && scale != 8)
2377 reason = "scale is not a valid multiplier";
2378 goto error;
2382 /* Validate displacement. */
2383 if (disp)
2385 reason_rtx = disp;
2387 if (!CONSTANT_ADDRESS_P (disp))
2389 reason = "displacement is not constant";
2390 goto error;
2393 if (GET_CODE (disp) == CONST_DOUBLE)
2395 reason = "displacement is a const_double";
2396 goto error;
2399 if (flag_pic && SYMBOLIC_CONST (disp))
2401 if (! legitimate_pic_address_disp_p (disp))
2403 reason = "displacement is an invalid pic construct";
2404 goto error;
2407 /* Verify that a symbolic pic displacement includes
2408 the pic_offset_table_rtx register. */
2409 if (base != pic_offset_table_rtx
2410 && (index != pic_offset_table_rtx || scale != 1))
2412 reason = "pic displacement against invalid base";
2413 goto error;
2416 else if (HALF_PIC_P ())
2418 if (! HALF_PIC_ADDRESS_P (disp)
2419 || (base != NULL_RTX || index != NULL_RTX))
2421 reason = "displacement is an invalid half-pic reference";
2422 goto error;
2427 /* Everything looks valid. */
2428 if (TARGET_DEBUG_ADDR)
2429 fprintf (stderr, "Success.\n");
2430 return TRUE;
2432 error:
2433 if (TARGET_DEBUG_ADDR)
2435 fprintf (stderr, "Error: %s\n", reason);
2436 debug_rtx (reason_rtx);
2438 return FALSE;
2441 /* Return a legitimate reference for ORIG (an address) using the
2442 register REG. If REG is 0, a new pseudo is generated.
2444 There are two types of references that must be handled:
2446 1. Global data references must load the address from the GOT, via
2447 the PIC reg. An insn is emitted to do this load, and the reg is
2448 returned.
2450 2. Static data references, constant pool addresses, and code labels
2451 compute the address as an offset from the GOT, whose base is in
2452 the PIC reg. Static data objects have SYMBOL_REF_FLAG set to
2453 differentiate them from global data objects. The returned
2454 address is the PIC reg + an unspec constant.
2456 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
2457 reg also appears in the address. */
2460 legitimize_pic_address (orig, reg)
2461 rtx orig;
2462 rtx reg;
2464 rtx addr = orig;
2465 rtx new = orig;
2466 rtx base;
2468 if (GET_CODE (addr) == LABEL_REF
2469 || (GET_CODE (addr) == SYMBOL_REF
2470 && (CONSTANT_POOL_ADDRESS_P (addr)
2471 || SYMBOL_REF_FLAG (addr))))
2473 /* This symbol may be referenced via a displacement from the PIC
2474 base address (@GOTOFF). */
2476 current_function_uses_pic_offset_table = 1;
2477 new = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, addr), 7);
2478 new = gen_rtx_CONST (VOIDmode, new);
2479 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
2481 if (reg != 0)
2483 emit_move_insn (reg, new);
2484 new = reg;
2487 else if (GET_CODE (addr) == SYMBOL_REF)
2489 /* This symbol must be referenced via a load from the
2490 Global Offset Table (@GOT). */
2492 current_function_uses_pic_offset_table = 1;
2493 new = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, addr), 6);
2494 new = gen_rtx_CONST (VOIDmode, new);
2495 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
2496 new = gen_rtx_MEM (Pmode, new);
2497 RTX_UNCHANGING_P (new) = 1;
2499 if (reg == 0)
2500 reg = gen_reg_rtx (Pmode);
2501 emit_move_insn (reg, new);
2502 new = reg;
2504 else
2506 if (GET_CODE (addr) == CONST)
2508 addr = XEXP (addr, 0);
2509 if (GET_CODE (addr) == UNSPEC)
2511 /* Check that the unspec is one of the ones we generate? */
2513 else if (GET_CODE (addr) != PLUS)
2514 abort ();
2516 if (GET_CODE (addr) == PLUS)
2518 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
2520 /* Check first to see if this is a constant offset from a @GOTOFF
2521 symbol reference. */
2522 if ((GET_CODE (op0) == LABEL_REF
2523 || (GET_CODE (op0) == SYMBOL_REF
2524 && (CONSTANT_POOL_ADDRESS_P (op0)
2525 || SYMBOL_REF_FLAG (op0))))
2526 && GET_CODE (op1) == CONST_INT)
2528 current_function_uses_pic_offset_table = 1;
2529 new = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, op0), 7);
2530 new = gen_rtx_PLUS (VOIDmode, new, op1);
2531 new = gen_rtx_CONST (VOIDmode, new);
2532 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
2534 if (reg != 0)
2536 emit_move_insn (reg, new);
2537 new = reg;
2540 else
2542 base = legitimize_pic_address (XEXP (addr, 0), reg);
2543 new = legitimize_pic_address (XEXP (addr, 1),
2544 base == reg ? NULL_RTX : reg);
2546 if (GET_CODE (new) == CONST_INT)
2547 new = plus_constant (base, INTVAL (new));
2548 else
2550 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
2552 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
2553 new = XEXP (new, 1);
2555 new = gen_rtx_PLUS (Pmode, base, new);
2560 return new;
2563 /* Try machine-dependent ways of modifying an illegitimate address
2564 to be legitimate. If we find one, return the new, valid address.
2565 This macro is used in only one place: `memory_address' in explow.c.
2567 OLDX is the address as it was before break_out_memory_refs was called.
2568 In some cases it is useful to look at this to decide what needs to be done.
2570 MODE and WIN are passed so that this macro can use
2571 GO_IF_LEGITIMATE_ADDRESS.
2573 It is always safe for this macro to do nothing. It exists to recognize
2574 opportunities to optimize the output.
2576 For the 80386, we handle X+REG by loading X into a register R and
2577 using R+REG. R will go in a general reg and indexing will be used.
2578 However, if REG is a broken-out memory address or multiplication,
2579 nothing needs to be done because REG can certainly go in a general reg.
2581 When -fpic is used, special handling is needed for symbolic references.
2582 See comments by legitimize_pic_address in i386.c for details. */
2585 legitimize_address (x, oldx, mode)
2586 register rtx x;
2587 register rtx oldx ATTRIBUTE_UNUSED;
2588 enum machine_mode mode;
2590 int changed = 0;
2591 unsigned log;
2593 if (TARGET_DEBUG_ADDR)
2595 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
2596 GET_MODE_NAME (mode));
2597 debug_rtx (x);
2600 if (flag_pic && SYMBOLIC_CONST (x))
2601 return legitimize_pic_address (x, 0);
2603 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
2604 if (GET_CODE (x) == ASHIFT
2605 && GET_CODE (XEXP (x, 1)) == CONST_INT
2606 && (log = (unsigned)exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
2608 changed = 1;
2609 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
2610 GEN_INT (1 << log));
2613 if (GET_CODE (x) == PLUS)
2615 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
2617 if (GET_CODE (XEXP (x, 0)) == ASHIFT
2618 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
2619 && (log = (unsigned)exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
2621 changed = 1;
2622 XEXP (x, 0) = gen_rtx_MULT (Pmode,
2623 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
2624 GEN_INT (1 << log));
2627 if (GET_CODE (XEXP (x, 1)) == ASHIFT
2628 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
2629 && (log = (unsigned)exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
2631 changed = 1;
2632 XEXP (x, 1) = gen_rtx_MULT (Pmode,
2633 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
2634 GEN_INT (1 << log));
2637 /* Put multiply first if it isn't already. */
2638 if (GET_CODE (XEXP (x, 1)) == MULT)
2640 rtx tmp = XEXP (x, 0);
2641 XEXP (x, 0) = XEXP (x, 1);
2642 XEXP (x, 1) = tmp;
2643 changed = 1;
2646 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
2647 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
2648 created by virtual register instantiation, register elimination, and
2649 similar optimizations. */
2650 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
2652 changed = 1;
2653 x = gen_rtx_PLUS (Pmode,
2654 gen_rtx_PLUS (Pmode, XEXP (x, 0),
2655 XEXP (XEXP (x, 1), 0)),
2656 XEXP (XEXP (x, 1), 1));
2659 /* Canonicalize
2660 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
2661 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
2662 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
2663 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
2664 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
2665 && CONSTANT_P (XEXP (x, 1)))
2667 rtx constant;
2668 rtx other = NULL_RTX;
2670 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
2672 constant = XEXP (x, 1);
2673 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
2675 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
2677 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
2678 other = XEXP (x, 1);
2680 else
2681 constant = 0;
2683 if (constant)
2685 changed = 1;
2686 x = gen_rtx_PLUS (Pmode,
2687 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
2688 XEXP (XEXP (XEXP (x, 0), 1), 0)),
2689 plus_constant (other, INTVAL (constant)));
2693 if (changed && legitimate_address_p (mode, x, FALSE))
2694 return x;
2696 if (GET_CODE (XEXP (x, 0)) == MULT)
2698 changed = 1;
2699 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
2702 if (GET_CODE (XEXP (x, 1)) == MULT)
2704 changed = 1;
2705 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
2708 if (changed
2709 && GET_CODE (XEXP (x, 1)) == REG
2710 && GET_CODE (XEXP (x, 0)) == REG)
2711 return x;
2713 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
2715 changed = 1;
2716 x = legitimize_pic_address (x, 0);
2719 if (changed && legitimate_address_p (mode, x, FALSE))
2720 return x;
2722 if (GET_CODE (XEXP (x, 0)) == REG)
2724 register rtx temp = gen_reg_rtx (Pmode);
2725 register rtx val = force_operand (XEXP (x, 1), temp);
2726 if (val != temp)
2727 emit_move_insn (temp, val);
2729 XEXP (x, 1) = temp;
2730 return x;
2733 else if (GET_CODE (XEXP (x, 1)) == REG)
2735 register rtx temp = gen_reg_rtx (Pmode);
2736 register rtx val = force_operand (XEXP (x, 0), temp);
2737 if (val != temp)
2738 emit_move_insn (temp, val);
2740 XEXP (x, 0) = temp;
2741 return x;
2745 return x;
2748 /* Print an integer constant expression in assembler syntax. Addition
2749 and subtraction are the only arithmetic that may appear in these
2750 expressions. FILE is the stdio stream to write to, X is the rtx, and
2751 CODE is the operand print code from the output string. */
2753 static void
2754 output_pic_addr_const (file, x, code)
2755 FILE *file;
2756 rtx x;
2757 int code;
2759 char buf[256];
2761 switch (GET_CODE (x))
2763 case PC:
2764 if (flag_pic)
2765 putc ('.', file);
2766 else
2767 abort ();
2768 break;
2770 case SYMBOL_REF:
2771 assemble_name (file, XSTR (x, 0));
2772 if (code == 'P' && ! SYMBOL_REF_FLAG (x))
2773 fputs ("@PLT", file);
2774 break;
2776 case LABEL_REF:
2777 x = XEXP (x, 0);
2778 /* FALLTHRU */
2779 case CODE_LABEL:
2780 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
2781 assemble_name (asm_out_file, buf);
2782 break;
2784 case CONST_INT:
2785 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
2786 break;
2788 case CONST:
2789 /* This used to output parentheses around the expression,
2790 but that does not work on the 386 (either ATT or BSD assembler). */
2791 output_pic_addr_const (file, XEXP (x, 0), code);
2792 break;
2794 case CONST_DOUBLE:
2795 if (GET_MODE (x) == VOIDmode)
2797 /* We can use %d if the number is <32 bits and positive. */
2798 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
2799 fprintf (file, "0x%lx%08lx",
2800 (unsigned long) CONST_DOUBLE_HIGH (x),
2801 (unsigned long) CONST_DOUBLE_LOW (x));
2802 else
2803 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
2805 else
2806 /* We can't handle floating point constants;
2807 PRINT_OPERAND must handle them. */
2808 output_operand_lossage ("floating constant misused");
2809 break;
2811 case PLUS:
2812 /* Some assemblers need integer constants to appear first. */
2813 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
2815 output_pic_addr_const (file, XEXP (x, 0), code);
2816 putc ('+', file);
2817 output_pic_addr_const (file, XEXP (x, 1), code);
2819 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
2821 output_pic_addr_const (file, XEXP (x, 1), code);
2822 putc ('+', file);
2823 output_pic_addr_const (file, XEXP (x, 0), code);
2825 else
2826 abort ();
2827 break;
2829 case MINUS:
2830 putc (ASSEMBLER_DIALECT ? '(' : '[', file);
2831 output_pic_addr_const (file, XEXP (x, 0), code);
2832 putc ('-', file);
2833 output_pic_addr_const (file, XEXP (x, 1), code);
2834 putc (ASSEMBLER_DIALECT ? ')' : ']', file);
2835 break;
2837 case UNSPEC:
2838 if (XVECLEN (x, 0) != 1)
2839 abort ();
2840 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
2841 switch (XINT (x, 1))
2843 case 6:
2844 fputs ("@GOT", file);
2845 break;
2846 case 7:
2847 fputs ("@GOTOFF", file);
2848 break;
2849 case 8:
2850 fputs ("@PLT", file);
2851 break;
2852 default:
2853 output_operand_lossage ("invalid UNSPEC as operand");
2854 break;
2856 break;
2858 default:
2859 output_operand_lossage ("invalid expression as operand");
2863 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
2864 We need to handle our special PIC relocations. */
2866 void
2867 i386_dwarf_output_addr_const (file, x)
2868 FILE *file;
2869 rtx x;
2871 fprintf (file, "\t%s\t", INT_ASM_OP);
2872 if (flag_pic)
2873 output_pic_addr_const (file, x, '\0');
2874 else
2875 output_addr_const (file, x);
2876 fputc ('\n', file);
2879 /* In the name of slightly smaller debug output, and to cater to
2880 general assembler losage, recognize PIC+GOTOFF and turn it back
2881 into a direct symbol reference. */
2884 i386_simplify_dwarf_addr (orig_x)
2885 rtx orig_x;
2887 rtx x = orig_x;
2889 if (GET_CODE (x) != PLUS
2890 || GET_CODE (XEXP (x, 0)) != REG
2891 || GET_CODE (XEXP (x, 1)) != CONST)
2892 return orig_x;
2894 x = XEXP (XEXP (x, 1), 0);
2895 if (GET_CODE (x) == UNSPEC
2896 && XINT (x, 1) == 7)
2897 return XVECEXP (x, 0, 0);
2899 if (GET_CODE (x) == PLUS
2900 && GET_CODE (XEXP (x, 0)) == UNSPEC
2901 && GET_CODE (XEXP (x, 1)) == CONST_INT
2902 && XINT (XEXP (x, 0), 1) == 7)
2903 return gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
2905 return orig_x;
2908 static void
2909 put_condition_code (code, mode, reverse, fp, file)
2910 enum rtx_code code;
2911 enum machine_mode mode;
2912 int reverse, fp;
2913 FILE *file;
2915 const char *suffix;
2917 if (reverse)
2918 code = reverse_condition (code);
2920 switch (code)
2922 case EQ:
2923 suffix = "e";
2924 break;
2925 case NE:
2926 suffix = "ne";
2927 break;
2928 case GT:
2929 if (mode == CCNOmode)
2930 abort ();
2931 suffix = "g";
2932 break;
2933 case GTU:
2934 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
2935 Those same assemblers have the same but opposite losage on cmov. */
2936 suffix = fp ? "nbe" : "a";
2937 break;
2938 case LT:
2939 if (mode == CCNOmode)
2940 suffix = "s";
2941 else
2942 suffix = "l";
2943 break;
2944 case LTU:
2945 suffix = "b";
2946 break;
2947 case GE:
2948 if (mode == CCNOmode)
2949 suffix = "ns";
2950 else
2951 suffix = "ge";
2952 break;
2953 case GEU:
2954 /* ??? As above. */
2955 suffix = fp ? "nb" : "ae";
2956 break;
2957 case LE:
2958 if (mode == CCNOmode)
2959 abort ();
2960 suffix = "le";
2961 break;
2962 case LEU:
2963 suffix = "be";
2964 break;
2965 default:
2966 abort ();
2968 fputs (suffix, file);
2971 void
2972 print_reg (x, code, file)
2973 rtx x;
2974 int code;
2975 FILE *file;
2977 if (REGNO (x) == ARG_POINTER_REGNUM
2978 || REGNO (x) == FRAME_POINTER_REGNUM
2979 || REGNO (x) == FLAGS_REG
2980 || REGNO (x) == FPSR_REG)
2981 abort ();
2983 if (ASSEMBLER_DIALECT == 0 || USER_LABEL_PREFIX[0] == 0)
2984 putc ('%', file);
2986 if (code == 'w')
2987 code = 2;
2988 else if (code == 'b')
2989 code = 1;
2990 else if (code == 'k')
2991 code = 4;
2992 else if (code == 'y')
2993 code = 3;
2994 else if (code == 'h')
2995 code = 0;
2996 else
2997 code = GET_MODE_SIZE (GET_MODE (x));
2999 switch (code)
3001 case 3:
3002 if (STACK_TOP_P (x))
3004 fputs ("st(0)", file);
3005 break;
3007 /* FALLTHRU */
3008 case 4:
3009 case 8:
3010 case 12:
3011 if (! FP_REG_P (x))
3012 putc ('e', file);
3013 /* FALLTHRU */
3014 case 2:
3015 fputs (hi_reg_name[REGNO (x)], file);
3016 break;
3017 case 1:
3018 fputs (qi_reg_name[REGNO (x)], file);
3019 break;
3020 case 0:
3021 fputs (qi_high_reg_name[REGNO (x)], file);
3022 break;
3023 default:
3024 abort ();
3028 /* Meaning of CODE:
3029 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
3030 C -- print opcode suffix for set/cmov insn.
3031 c -- like C, but print reversed condition
3032 R -- print the prefix for register names.
3033 z -- print the opcode suffix for the size of the current operand.
3034 * -- print a star (in certain assembler syntax)
3035 w -- print the operand as if it's a "word" (HImode) even if it isn't.
3036 s -- print a shift double count, followed by the assemblers argument
3037 delimiter.
3038 b -- print the QImode name of the register for the indicated operand.
3039 %b0 would print %al if operands[0] is reg 0.
3040 w -- likewise, print the HImode name of the register.
3041 k -- likewise, print the SImode name of the register.
3042 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
3043 y -- print "st(0)" instead of "st" as a register. */
3045 void
3046 print_operand (file, x, code)
3047 FILE *file;
3048 rtx x;
3049 int code;
3051 if (code)
3053 switch (code)
3055 case '*':
3056 if (ASSEMBLER_DIALECT == 0)
3057 putc ('*', file);
3058 return;
3060 case 'L':
3061 if (ASSEMBLER_DIALECT == 0)
3062 putc ('l', file);
3063 return;
3065 case 'W':
3066 if (ASSEMBLER_DIALECT == 0)
3067 putc ('w', file);
3068 return;
3070 case 'B':
3071 if (ASSEMBLER_DIALECT == 0)
3072 putc ('b', file);
3073 return;
3075 case 'Q':
3076 if (ASSEMBLER_DIALECT == 0)
3077 putc ('l', file);
3078 return;
3080 case 'S':
3081 if (ASSEMBLER_DIALECT == 0)
3082 putc ('s', file);
3083 return;
3085 case 'T':
3086 if (ASSEMBLER_DIALECT == 0)
3087 putc ('t', file);
3088 return;
3090 case 'z':
3091 /* 387 opcodes don't get size suffixes if the operands are
3092 registers. */
3094 if (STACK_REG_P (x))
3095 return;
3097 /* Intel syntax has no truck with instruction suffixes. */
3098 if (ASSEMBLER_DIALECT != 0)
3099 return;
3101 /* this is the size of op from size of operand */
3102 switch (GET_MODE_SIZE (GET_MODE (x)))
3104 case 1:
3105 putc ('b', file);
3106 return;
3108 case 2:
3109 putc ('w', file);
3110 return;
3112 case 4:
3113 if (GET_MODE (x) == SFmode)
3115 putc ('s', file);
3116 return;
3118 else
3119 putc ('l', file);
3120 return;
3122 case 12:
3123 putc ('t', file);
3124 return;
3126 case 8:
3127 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
3129 #ifdef GAS_MNEMONICS
3130 putc ('q', file);
3131 #else
3132 putc ('l', file);
3133 putc ('l', file);
3134 #endif
3136 else
3137 putc ('l', file);
3138 return;
3141 case 'b':
3142 case 'w':
3143 case 'k':
3144 case 'h':
3145 case 'y':
3146 case 'X':
3147 case 'P':
3148 break;
3150 case 's':
3151 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
3153 PRINT_OPERAND (file, x, 0);
3154 putc (',', file);
3156 return;
3158 case 'C':
3159 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
3160 return;
3161 case 'F':
3162 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
3163 return;
3165 /* Like above, but reverse condition */
3166 case 'c':
3167 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
3168 return;
3169 case 'f':
3170 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
3171 return;
3173 default:
3175 char str[50];
3176 sprintf (str, "invalid operand code `%c'", code);
3177 output_operand_lossage (str);
3182 if (GET_CODE (x) == REG)
3184 PRINT_REG (x, code, file);
3187 else if (GET_CODE (x) == MEM)
3189 /* No `byte ptr' prefix for call instructions. */
3190 if (ASSEMBLER_DIALECT != 0 && code != 'X' && code != 'P')
3192 const char * size;
3193 switch (GET_MODE_SIZE (GET_MODE (x)))
3195 case 1: size = "BYTE"; break;
3196 case 2: size = "WORD"; break;
3197 case 4: size = "DWORD"; break;
3198 case 8: size = "QWORD"; break;
3199 case 12: size = "XWORD"; break;
3200 default:
3201 abort ();
3203 fputs (size, file);
3204 fputs (" PTR ", file);
3207 x = XEXP (x, 0);
3208 if (flag_pic && CONSTANT_ADDRESS_P (x))
3209 output_pic_addr_const (file, x, code);
3210 else
3211 output_address (x);
3214 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
3216 REAL_VALUE_TYPE r;
3217 long l;
3219 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3220 REAL_VALUE_TO_TARGET_SINGLE (r, l);
3222 if (ASSEMBLER_DIALECT == 0)
3223 putc ('$', file);
3224 fprintf (file, "0x%lx", l);
3227 /* These float cases don't actually occur as immediate operands. */
3228 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
3230 REAL_VALUE_TYPE r;
3231 char dstr[30];
3233 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3234 REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr);
3235 fprintf (file, "%s", dstr);
3238 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == XFmode)
3240 REAL_VALUE_TYPE r;
3241 char dstr[30];
3243 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3244 REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr);
3245 fprintf (file, "%s", dstr);
3247 else
3249 if (code != 'P')
3251 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
3253 if (ASSEMBLER_DIALECT == 0)
3254 putc ('$', file);
3256 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
3257 || GET_CODE (x) == LABEL_REF)
3259 if (ASSEMBLER_DIALECT == 0)
3260 putc ('$', file);
3261 else
3262 fputs ("OFFSET FLAT:", file);
3265 if (GET_CODE (x) == CONST_INT)
3266 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
3267 else if (flag_pic)
3268 output_pic_addr_const (file, x, code);
3269 else
3270 output_addr_const (file, x);
3274 /* Print a memory operand whose address is ADDR. */
3276 void
3277 print_operand_address (file, addr)
3278 FILE *file;
3279 register rtx addr;
3281 struct ix86_address parts;
3282 rtx base, index, disp;
3283 int scale;
3285 if (! ix86_decompose_address (addr, &parts))
3286 abort ();
3288 base = parts.base;
3289 index = parts.index;
3290 disp = parts.disp;
3291 scale = parts.scale;
3293 if (!base && !index)
3295 /* Displacement only requires special attention. */
3297 if (GET_CODE (disp) == CONST_INT)
3299 if (ASSEMBLER_DIALECT != 0)
3300 fputs ("ds:", file);
3301 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (addr));
3303 else if (flag_pic)
3304 output_pic_addr_const (file, addr, 0);
3305 else
3306 output_addr_const (file, addr);
3308 else
3310 if (ASSEMBLER_DIALECT == 0)
3312 if (disp)
3314 if (flag_pic)
3315 output_pic_addr_const (file, disp, 0);
3316 else if (GET_CODE (disp) == LABEL_REF)
3317 output_asm_label (disp);
3318 else
3319 output_addr_const (file, disp);
3322 putc ('(', file);
3323 if (base)
3324 PRINT_REG (base, 0, file);
3325 if (index)
3327 putc (',', file);
3328 PRINT_REG (index, 0, file);
3329 if (scale != 1)
3330 fprintf (file, ",%d", scale);
3332 putc (')', file);
3334 else
3336 rtx offset = NULL_RTX;
3338 if (disp)
3340 /* Pull out the offset of a symbol; print any symbol itself. */
3341 if (GET_CODE (disp) == CONST
3342 && GET_CODE (XEXP (disp, 0)) == PLUS
3343 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
3345 offset = XEXP (XEXP (disp, 0), 1);
3346 disp = gen_rtx_CONST (VOIDmode,
3347 XEXP (XEXP (disp, 0), 0));
3350 if (flag_pic)
3351 output_pic_addr_const (file, disp, 0);
3352 else if (GET_CODE (disp) == LABEL_REF)
3353 output_asm_label (disp);
3354 else if (GET_CODE (disp) == CONST_INT)
3355 offset = disp;
3356 else
3357 output_addr_const (file, disp);
3360 putc ('[', file);
3361 if (base)
3363 PRINT_REG (base, 0, file);
3364 if (offset)
3366 if (INTVAL (offset) >= 0)
3367 putc ('+', file);
3368 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
3371 else if (offset)
3372 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
3373 else
3374 putc ('0', file);
3376 if (index)
3378 putc ('+', file);
3379 PRINT_REG (index, 0, file);
3380 if (scale != 1)
3381 fprintf (file, "*%d", scale);
3383 putc (']', file);
3388 /* Split one or more DImode RTL references into pairs of SImode
3389 references. The RTL can be REG, offsettable MEM, integer constant, or
3390 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
3391 split and "num" is its length. lo_half and hi_half are output arrays
3392 that parallel "operands". */
3394 void
3395 split_di (operands, num, lo_half, hi_half)
3396 rtx operands[];
3397 int num;
3398 rtx lo_half[], hi_half[];
3400 while (num--)
3402 rtx op = operands[num];
3403 if (CONSTANT_P (op))
3404 split_double (op, &lo_half[num], &hi_half[num]);
3405 else if (! reload_completed)
3407 lo_half[num] = gen_lowpart (SImode, op);
3408 hi_half[num] = gen_highpart (SImode, op);
3410 else if (GET_CODE (op) == REG)
3412 lo_half[num] = gen_rtx_REG (SImode, REGNO (op));
3413 hi_half[num] = gen_rtx_REG (SImode, REGNO (op) + 1);
3415 else if (offsettable_memref_p (op))
3417 rtx lo_addr = XEXP (op, 0);
3418 rtx hi_addr = XEXP (adj_offsettable_operand (op, 4), 0);
3419 lo_half[num] = change_address (op, SImode, lo_addr);
3420 hi_half[num] = change_address (op, SImode, hi_addr);
3422 else
3423 abort ();
3427 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
3428 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
3429 is the expression of the binary operation. The output may either be
3430 emitted here, or returned to the caller, like all output_* functions.
3432 There is no guarantee that the operands are the same mode, as they
3433 might be within FLOAT or FLOAT_EXTEND expressions. */
3435 const char *
3436 output_387_binary_op (insn, operands)
3437 rtx insn;
3438 rtx *operands;
3440 static char buf[100];
3441 rtx temp;
3442 const char *p;
3444 switch (GET_CODE (operands[3]))
3446 case PLUS:
3447 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
3448 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
3449 p = "fiadd";
3450 else
3451 p = "fadd";
3452 break;
3454 case MINUS:
3455 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
3456 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
3457 p = "fisub";
3458 else
3459 p = "fsub";
3460 break;
3462 case MULT:
3463 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
3464 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
3465 p = "fimul";
3466 else
3467 p = "fmul";
3468 break;
3470 case DIV:
3471 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
3472 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
3473 p = "fidiv";
3474 else
3475 p = "fdiv";
3476 break;
3478 default:
3479 abort ();
3482 strcpy (buf, p);
3484 switch (GET_CODE (operands[3]))
3486 case MULT:
3487 case PLUS:
3488 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
3490 temp = operands[2];
3491 operands[2] = operands[1];
3492 operands[1] = temp;
3495 if (GET_CODE (operands[2]) == MEM)
3497 p = "%z2\t%2";
3498 break;
3501 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
3503 if (STACK_TOP_P (operands[0]))
3504 p = "p\t{%0,%2|%2, %0}";
3505 else
3506 p = "p\t{%2,%0|%0, %2}";
3507 break;
3510 if (STACK_TOP_P (operands[0]))
3511 p = "\t{%y2,%0|%0, %y2}";
3512 else
3513 p = "\t{%2,%0|%0, %2}";
3514 break;
3516 case MINUS:
3517 case DIV:
3518 if (GET_CODE (operands[1]) == MEM)
3520 p = "r%z1\t%1";
3521 break;
3524 if (GET_CODE (operands[2]) == MEM)
3526 p = "%z2\t%2";
3527 break;
3530 if (! STACK_REG_P (operands[1]) || ! STACK_REG_P (operands[2]))
3531 abort ();
3533 /* Note that the Unixware assembler, and the AT&T assembler before
3534 that, are confusingly not reversed from Intel syntax in this
3535 area. */
3536 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
3538 if (STACK_TOP_P (operands[0]))
3539 p = "p\t%0,%2";
3540 else
3541 p = "rp\t%2,%0";
3542 break;
3545 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
3547 if (STACK_TOP_P (operands[0]))
3548 p = "rp\t%0,%1";
3549 else
3550 p = "p\t%1,%0";
3551 break;
3554 if (STACK_TOP_P (operands[0]))
3556 if (STACK_TOP_P (operands[1]))
3557 p = "\t%y2,%0";
3558 else
3559 p = "r\t%y1,%0";
3560 break;
3562 else if (STACK_TOP_P (operands[1]))
3563 p = "\t%1,%0";
3564 else
3565 p = "r\t%2,%0";
3566 break;
3568 default:
3569 abort ();
3572 strcat (buf, p);
3573 return buf;
3576 /* Output code for INSN to convert a float to a signed int. OPERANDS
3577 are the insn operands. The output may be [SD]Imode and the input
3578 operand may be [SDX]Fmode. */
3580 const char *
3581 output_fix_trunc (insn, operands)
3582 rtx insn;
3583 rtx *operands;
3585 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
3586 int dimode_p = GET_MODE (operands[0]) == DImode;
3587 rtx xops[4];
3589 /* Jump through a hoop or two for DImode, since the hardware has no
3590 non-popping instruction. We used to do this a different way, but
3591 that was somewhat fragile and broke with post-reload splitters. */
3592 if (dimode_p && !stack_top_dies)
3593 output_asm_insn ("fld\t%y1", operands);
3595 if (! STACK_TOP_P (operands[1]))
3596 abort ();
3598 xops[0] = GEN_INT (12);
3599 xops[1] = adj_offsettable_operand (operands[2], 1);
3600 xops[1] = change_address (xops[1], QImode, NULL_RTX);
3602 xops[2] = operands[0];
3603 if (GET_CODE (operands[0]) != MEM)
3604 xops[2] = operands[3];
3606 output_asm_insn ("fnstcw\t%2", operands);
3607 output_asm_insn ("mov{l}\t{%2, %4|%4, %2}", operands);
3608 output_asm_insn ("mov{b}\t{%0, %1|%1, %0}", xops);
3609 output_asm_insn ("fldcw\t%2", operands);
3610 output_asm_insn ("mov{l}\t{%4, %2|%2, %4}", operands);
3612 if (stack_top_dies || dimode_p)
3613 output_asm_insn ("fistp%z2\t%2", xops);
3614 else
3615 output_asm_insn ("fist%z2\t%2", xops);
3617 output_asm_insn ("fldcw\t%2", operands);
3619 if (GET_CODE (operands[0]) != MEM)
3621 if (dimode_p)
3623 split_di (operands+0, 1, xops+0, xops+1);
3624 split_di (operands+3, 1, xops+2, xops+3);
3625 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
3626 output_asm_insn ("mov{l}\t{%3, %1|%1, %3}", xops);
3628 else
3629 output_asm_insn ("mov{l}\t{%3,%0|%0, %3}", operands);
3632 return "";
3635 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
3636 should be used and 2 when fnstsw should be used. UNORDERED_P is true
3637 when fucom should be used. */
3639 const char *
3640 output_fp_compare (insn, operands, eflags_p, unordered_p)
3641 rtx insn;
3642 rtx *operands;
3643 int eflags_p, unordered_p;
3645 int stack_top_dies;
3646 rtx cmp_op0 = operands[0];
3647 rtx cmp_op1 = operands[1];
3649 if (eflags_p == 2)
3651 cmp_op0 = cmp_op1;
3652 cmp_op1 = operands[2];
3655 if (! STACK_TOP_P (cmp_op0))
3656 abort ();
3658 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
3660 if (STACK_REG_P (cmp_op1)
3661 && stack_top_dies
3662 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
3663 && REGNO (cmp_op1) != FIRST_STACK_REG)
3665 /* If both the top of the 387 stack dies, and the other operand
3666 is also a stack register that dies, then this must be a
3667 `fcompp' float compare */
3669 if (eflags_p == 1)
3671 /* There is no double popping fcomi variant. Fortunately,
3672 eflags is immune from the fstp's cc clobbering. */
3673 if (unordered_p)
3674 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
3675 else
3676 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
3677 return "fstp\t%y0";
3679 else
3681 if (eflags_p == 2)
3683 if (unordered_p)
3684 return "fucompp\n\tfnstsw\t%0";
3685 else
3686 return "fcompp\n\tfnstsw\t%0";
3688 else
3690 if (unordered_p)
3691 return "fucompp";
3692 else
3693 return "fcompp";
3697 else
3699 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
3701 static const char * const alt[24] =
3703 "fcom%z1\t%y1",
3704 "fcomp%z1\t%y1",
3705 "fucom%z1\t%y1",
3706 "fucomp%z1\t%y1",
3708 "ficom%z1\t%y1",
3709 "ficomp%z1\t%y1",
3710 NULL,
3711 NULL,
3713 "fcomi\t{%y1, %0|%0, %y1}",
3714 "fcomip\t{%y1, %0|%0, %y1}",
3715 "fucomi\t{%y1, %0|%0, %y1}",
3716 "fucomip\t{%y1, %0|%0, %y1}",
3718 NULL,
3719 NULL,
3720 NULL,
3721 NULL,
3723 "fcom%z2\t%y2\n\tfnstsw\t%0",
3724 "fcomp%z2\t%y2\n\tfnstsw\t%0",
3725 "fucom%z2\t%y2\n\tfnstsw\t%0",
3726 "fucomp%z2\t%y2\n\tfnstsw\t%0",
3728 "ficom%z2\t%y2\n\tfnstsw\t%0",
3729 "ficomp%z2\t%y2\n\tfnstsw\t%0",
3730 NULL,
3731 NULL
3734 int mask;
3735 const char *ret;
3737 mask = eflags_p << 3;
3738 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
3739 mask |= unordered_p << 1;
3740 mask |= stack_top_dies;
3742 if (mask >= 24)
3743 abort ();
3744 ret = alt[mask];
3745 if (ret == NULL)
3746 abort ();
3748 return ret;
3752 /* Output assembler code to FILE to initialize basic-block profiling.
3754 If profile_block_flag == 2
3756 Output code to call the subroutine `__bb_init_trace_func'
3757 and pass two parameters to it. The first parameter is
3758 the address of a block allocated in the object module.
3759 The second parameter is the number of the first basic block
3760 of the function.
3762 The name of the block is a local symbol made with this statement:
3764 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 0);
3766 Of course, since you are writing the definition of
3767 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
3768 can take a short cut in the definition of this macro and use the
3769 name that you know will result.
3771 The number of the first basic block of the function is
3772 passed to the macro in BLOCK_OR_LABEL.
3774 If described in a virtual assembler language the code to be
3775 output looks like:
3777 parameter1 <- LPBX0
3778 parameter2 <- BLOCK_OR_LABEL
3779 call __bb_init_trace_func
3781 else if profile_block_flag != 0
3783 Output code to call the subroutine `__bb_init_func'
3784 and pass one single parameter to it, which is the same
3785 as the first parameter to `__bb_init_trace_func'.
3787 The first word of this parameter is a flag which will be nonzero if
3788 the object module has already been initialized. So test this word
3789 first, and do not call `__bb_init_func' if the flag is nonzero.
3790 Note: When profile_block_flag == 2 the test need not be done
3791 but `__bb_init_trace_func' *must* be called.
3793 BLOCK_OR_LABEL may be used to generate a label number as a
3794 branch destination in case `__bb_init_func' will not be called.
3796 If described in a virtual assembler language the code to be
3797 output looks like:
3799 cmp (LPBX0),0
3800 jne local_label
3801 parameter1 <- LPBX0
3802 call __bb_init_func
3803 local_label:
3806 void
3807 ix86_output_function_block_profiler (file, block_or_label)
3808 FILE *file;
3809 int block_or_label;
3811 static int num_func = 0;
3812 rtx xops[8];
3813 char block_table[80], false_label[80];
3815 ASM_GENERATE_INTERNAL_LABEL (block_table, "LPBX", 0);
3817 xops[1] = gen_rtx_SYMBOL_REF (VOIDmode, block_table);
3818 xops[5] = stack_pointer_rtx;
3819 xops[7] = gen_rtx_REG (Pmode, 0); /* eax */
3821 CONSTANT_POOL_ADDRESS_P (xops[1]) = TRUE;
3823 switch (profile_block_flag)
3825 case 2:
3826 xops[2] = GEN_INT (block_or_label);
3827 xops[3] = gen_rtx_MEM (Pmode,
3828 gen_rtx_SYMBOL_REF (VOIDmode, "__bb_init_trace_func"));
3829 xops[6] = GEN_INT (8);
3831 output_asm_insn ("push{l}\t%2", xops);
3832 if (!flag_pic)
3833 output_asm_insn ("push{l}\t%1", xops);
3834 else
3836 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a1}", xops);
3837 output_asm_insn ("push{l}\t%7", xops);
3839 output_asm_insn ("call\t%P3", xops);
3840 output_asm_insn ("add{l}\t{%6, %5|%5, %6}", xops);
3841 break;
3843 default:
3844 ASM_GENERATE_INTERNAL_LABEL (false_label, "LPBZ", num_func);
3846 xops[0] = const0_rtx;
3847 xops[2] = gen_rtx_MEM (Pmode,
3848 gen_rtx_SYMBOL_REF (VOIDmode, false_label));
3849 xops[3] = gen_rtx_MEM (Pmode,
3850 gen_rtx_SYMBOL_REF (VOIDmode, "__bb_init_func"));
3851 xops[4] = gen_rtx_MEM (Pmode, xops[1]);
3852 xops[6] = GEN_INT (4);
3854 CONSTANT_POOL_ADDRESS_P (xops[2]) = TRUE;
3856 output_asm_insn ("cmp{l}\t{%0, %4|%4, %0}", xops);
3857 output_asm_insn ("jne\t%2", xops);
3859 if (!flag_pic)
3860 output_asm_insn ("push{l}\t%1", xops);
3861 else
3863 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a2}", xops);
3864 output_asm_insn ("push{l}\t%7", xops);
3866 output_asm_insn ("call\t%P3", xops);
3867 output_asm_insn ("add{l}\t{%6, %5|%5, %6}", xops);
3868 ASM_OUTPUT_INTERNAL_LABEL (file, "LPBZ", num_func);
3869 num_func++;
3870 break;
3874 /* Output assembler code to FILE to increment a counter associated
3875 with basic block number BLOCKNO.
3877 If profile_block_flag == 2
3879 Output code to initialize the global structure `__bb' and
3880 call the function `__bb_trace_func' which will increment the
3881 counter.
3883 `__bb' consists of two words. In the first word the number
3884 of the basic block has to be stored. In the second word
3885 the address of a block allocated in the object module
3886 has to be stored.
3888 The basic block number is given by BLOCKNO.
3890 The address of the block is given by the label created with
3892 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 0);
3894 by FUNCTION_BLOCK_PROFILER.
3896 Of course, since you are writing the definition of
3897 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
3898 can take a short cut in the definition of this macro and use the
3899 name that you know will result.
3901 If described in a virtual assembler language the code to be
3902 output looks like:
3904 move BLOCKNO -> (__bb)
3905 move LPBX0 -> (__bb+4)
3906 call __bb_trace_func
3908 Note that function `__bb_trace_func' must not change the
3909 machine state, especially the flag register. To grant
3910 this, you must output code to save and restore registers
3911 either in this macro or in the macros MACHINE_STATE_SAVE
3912 and MACHINE_STATE_RESTORE. The last two macros will be
3913 used in the function `__bb_trace_func', so you must make
3914 sure that the function prologue does not change any
3915 register prior to saving it with MACHINE_STATE_SAVE.
3917 else if profile_block_flag != 0
3919 Output code to increment the counter directly.
3920 Basic blocks are numbered separately from zero within each
3921 compiled object module. The count associated with block number
3922 BLOCKNO is at index BLOCKNO in an array of words; the name of
3923 this array is a local symbol made with this statement:
3925 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 2);
3927 Of course, since you are writing the definition of
3928 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
3929 can take a short cut in the definition of this macro and use the
3930 name that you know will result.
3932 If described in a virtual assembler language the code to be
3933 output looks like:
3935 inc (LPBX2+4*BLOCKNO)
3938 void
3939 ix86_output_block_profiler (file, blockno)
3940 FILE *file ATTRIBUTE_UNUSED;
3941 int blockno;
3943 rtx xops[8], cnt_rtx;
3944 char counts[80];
3945 char *block_table = counts;
3947 switch (profile_block_flag)
3949 case 2:
3950 ASM_GENERATE_INTERNAL_LABEL (block_table, "LPBX", 0);
3952 xops[1] = gen_rtx_SYMBOL_REF (VOIDmode, block_table);
3953 xops[2] = GEN_INT (blockno);
3954 xops[3] = gen_rtx_MEM (Pmode,
3955 gen_rtx_SYMBOL_REF (VOIDmode, "__bb_trace_func"));
3956 xops[4] = gen_rtx_SYMBOL_REF (VOIDmode, "__bb");
3957 xops[5] = plus_constant (xops[4], 4);
3958 xops[0] = gen_rtx_MEM (SImode, xops[4]);
3959 xops[6] = gen_rtx_MEM (SImode, xops[5]);
3961 CONSTANT_POOL_ADDRESS_P (xops[1]) = TRUE;
3963 output_asm_insn ("pushf", xops);
3964 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
3965 if (flag_pic)
3967 xops[7] = gen_rtx_REG (Pmode, 0); /* eax */
3968 output_asm_insn ("push{l}\t%7", xops);
3969 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a1}", xops);
3970 output_asm_insn ("mov{l}\t{%7, %6|%6, %7}", xops);
3971 output_asm_insn ("pop{l}\t%7", xops);
3973 else
3974 output_asm_insn ("mov{l}\t{%1, %6|%6, %1}", xops);
3975 output_asm_insn ("call\t%P3", xops);
3976 output_asm_insn ("popf", xops);
3978 break;
3980 default:
3981 ASM_GENERATE_INTERNAL_LABEL (counts, "LPBX", 2);
3982 cnt_rtx = gen_rtx_SYMBOL_REF (VOIDmode, counts);
3983 SYMBOL_REF_FLAG (cnt_rtx) = TRUE;
3985 if (blockno)
3986 cnt_rtx = plus_constant (cnt_rtx, blockno*4);
3988 if (flag_pic)
3989 cnt_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, cnt_rtx);
3991 xops[0] = gen_rtx_MEM (SImode, cnt_rtx);
3992 output_asm_insn ("inc{l}\t%0", xops);
3994 break;
3998 void
3999 ix86_expand_move (mode, operands)
4000 enum machine_mode mode;
4001 rtx operands[];
4003 int strict = (reload_in_progress || reload_completed);
4004 rtx insn;
4006 if (flag_pic && mode == Pmode && symbolic_operand (operands[1], Pmode))
4008 /* Emit insns to move operands[1] into operands[0]. */
4010 if (GET_CODE (operands[0]) == MEM)
4011 operands[1] = force_reg (Pmode, operands[1]);
4012 else
4014 rtx temp = operands[0];
4015 if (GET_CODE (temp) != REG)
4016 temp = gen_reg_rtx (Pmode);
4017 temp = legitimize_pic_address (operands[1], temp);
4018 if (temp == operands[0])
4019 return;
4020 operands[1] = temp;
4023 else
4025 if (GET_CODE (operands[0]) == MEM
4026 && (GET_MODE (operands[0]) == QImode
4027 || !push_operand (operands[0], mode))
4028 && GET_CODE (operands[1]) == MEM)
4029 operands[1] = force_reg (mode, operands[1]);
4031 if (push_operand (operands[0], mode)
4032 && ! general_no_elim_operand (operands[1], mode))
4033 operands[1] = copy_to_mode_reg (mode, operands[1]);
4035 if (FLOAT_MODE_P (mode))
4037 /* If we are loading a floating point constant to a register,
4038 force the value to memory now, since we'll get better code
4039 out the back end. */
4041 if (strict)
4043 else if (GET_CODE (operands[1]) == CONST_DOUBLE
4044 && register_operand (operands[0], mode))
4045 operands[1] = validize_mem (force_const_mem (mode, operands[1]));
4049 insn = gen_rtx_SET (VOIDmode, operands[0], operands[1]);
4051 emit_insn (insn);
4054 /* Attempt to expand a binary operator. Make the expansion closer to the
4055 actual machine, then just general_operand, which will allow 3 separate
4056 memory references (one output, two input) in a single insn. */
4058 void
4059 ix86_expand_binary_operator (code, mode, operands)
4060 enum rtx_code code;
4061 enum machine_mode mode;
4062 rtx operands[];
4064 int matching_memory;
4065 rtx src1, src2, dst, op, clob;
4067 dst = operands[0];
4068 src1 = operands[1];
4069 src2 = operands[2];
4071 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
4072 if (GET_RTX_CLASS (code) == 'c'
4073 && (rtx_equal_p (dst, src2)
4074 || immediate_operand (src1, mode)))
4076 rtx temp = src1;
4077 src1 = src2;
4078 src2 = temp;
4081 /* If the destination is memory, and we do not have matching source
4082 operands, do things in registers. */
4083 matching_memory = 0;
4084 if (GET_CODE (dst) == MEM)
4086 if (rtx_equal_p (dst, src1))
4087 matching_memory = 1;
4088 else if (GET_RTX_CLASS (code) == 'c'
4089 && rtx_equal_p (dst, src2))
4090 matching_memory = 2;
4091 else
4092 dst = gen_reg_rtx (mode);
4095 /* Both source operands cannot be in memory. */
4096 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
4098 if (matching_memory != 2)
4099 src2 = force_reg (mode, src2);
4100 else
4101 src1 = force_reg (mode, src1);
4104 /* If the operation is not commutable, source 1 cannot be a constant
4105 or non-matching memory. */
4106 if ((CONSTANT_P (src1)
4107 || (!matching_memory && GET_CODE (src1) == MEM))
4108 && GET_RTX_CLASS (code) != 'c')
4109 src1 = force_reg (mode, src1);
4111 /* If optimizing, copy to regs to improve CSE */
4112 if (optimize && !reload_in_progress && !reload_completed)
4114 if (GET_CODE (dst) == MEM)
4115 dst = gen_reg_rtx (mode);
4116 if (GET_CODE (src1) == MEM)
4117 src1 = force_reg (mode, src1);
4118 if (GET_CODE (src2) == MEM)
4119 src2 = force_reg (mode, src2);
4122 /* Emit the instruction. */
4124 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
4125 if (reload_in_progress)
4127 /* Reload doesn't know about the flags register, and doesn't know that
4128 it doesn't want to clobber it. We can only do this with PLUS. */
4129 if (code != PLUS)
4130 abort ();
4131 emit_insn (op);
4133 else
4135 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
4136 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
4139 /* Fix up the destination if needed. */
4140 if (dst != operands[0])
4141 emit_move_insn (operands[0], dst);
4144 /* Return TRUE or FALSE depending on whether the binary operator meets the
4145 appropriate constraints. */
4148 ix86_binary_operator_ok (code, mode, operands)
4149 enum rtx_code code;
4150 enum machine_mode mode ATTRIBUTE_UNUSED;
4151 rtx operands[3];
4153 /* Both source operands cannot be in memory. */
4154 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
4155 return 0;
4156 /* If the operation is not commutable, source 1 cannot be a constant. */
4157 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
4158 return 0;
4159 /* If the destination is memory, we must have a matching source operand. */
4160 if (GET_CODE (operands[0]) == MEM
4161 && ! (rtx_equal_p (operands[0], operands[1])
4162 || (GET_RTX_CLASS (code) == 'c'
4163 && rtx_equal_p (operands[0], operands[2]))))
4164 return 0;
4165 /* If the operation is not commutable and the source 1 is memory, we must
4166 have a matching destionation. */
4167 if (GET_CODE (operands[1]) == MEM
4168 && GET_RTX_CLASS (code) != 'c'
4169 && ! rtx_equal_p (operands[0], operands[1]))
4170 return 0;
4171 return 1;
4174 /* Attempt to expand a unary operator. Make the expansion closer to the
4175 actual machine, then just general_operand, which will allow 2 separate
4176 memory references (one output, one input) in a single insn. */
4178 void
4179 ix86_expand_unary_operator (code, mode, operands)
4180 enum rtx_code code;
4181 enum machine_mode mode;
4182 rtx operands[];
4184 int matching_memory;
4185 rtx src, dst, op, clob;
4187 dst = operands[0];
4188 src = operands[1];
4190 /* If the destination is memory, and we do not have matching source
4191 operands, do things in registers. */
4192 matching_memory = 0;
4193 if (GET_CODE (dst) == MEM)
4195 if (rtx_equal_p (dst, src))
4196 matching_memory = 1;
4197 else
4198 dst = gen_reg_rtx (mode);
4201 /* When source operand is memory, destination must match. */
4202 if (!matching_memory && GET_CODE (src) == MEM)
4203 src = force_reg (mode, src);
4205 /* If optimizing, copy to regs to improve CSE */
4206 if (optimize && !reload_in_progress && !reload_completed)
4208 if (GET_CODE (dst) == MEM)
4209 dst = gen_reg_rtx (mode);
4210 if (GET_CODE (src) == MEM)
4211 src = force_reg (mode, src);
4214 /* Emit the instruction. */
4216 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
4217 if (reload_in_progress || code == NOT)
4219 /* Reload doesn't know about the flags register, and doesn't know that
4220 it doesn't want to clobber it. */
4221 if (code != NOT)
4222 abort ();
4223 emit_insn (op);
4225 else
4227 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
4228 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
4231 /* Fix up the destination if needed. */
4232 if (dst != operands[0])
4233 emit_move_insn (operands[0], dst);
4236 /* Return TRUE or FALSE depending on whether the unary operator meets the
4237 appropriate constraints. */
4240 ix86_unary_operator_ok (code, mode, operands)
4241 enum rtx_code code ATTRIBUTE_UNUSED;
4242 enum machine_mode mode ATTRIBUTE_UNUSED;
4243 rtx operands[2] ATTRIBUTE_UNUSED;
4245 /* If one of operands is memory, source and destination must match. */
4246 if ((GET_CODE (operands[0]) == MEM
4247 || GET_CODE (operands[1]) == MEM)
4248 && ! rtx_equal_p (operands[0], operands[1]))
4249 return FALSE;
4250 return TRUE;
4253 /* Produce an unsigned comparison for a given signed comparison. */
4255 static enum rtx_code
4256 unsigned_comparison (code)
4257 enum rtx_code code;
4259 switch (code)
4261 case GT:
4262 code = GTU;
4263 break;
4264 case LT:
4265 code = LTU;
4266 break;
4267 case GE:
4268 code = GEU;
4269 break;
4270 case LE:
4271 code = LEU;
4272 break;
4273 case EQ:
4274 case NE:
4275 case LEU:
4276 case LTU:
4277 case GEU:
4278 case GTU:
4279 break;
4280 default:
4281 abort ();
4283 return code;
4286 /* Generate insn patterns to do an integer compare of OPERANDS. */
4288 static rtx
4289 ix86_expand_int_compare (code, op0, op1)
4290 enum rtx_code code;
4291 rtx op0, op1;
4293 enum machine_mode cmpmode;
4294 rtx tmp, flags;
4296 cmpmode = SELECT_CC_MODE (code, op0, op1);
4297 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
4299 /* This is very simple, but making the interface the same as in the
4300 FP case makes the rest of the code easier. */
4301 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
4302 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
4304 /* Return the test that should be put into the flags user, i.e.
4305 the bcc, scc, or cmov instruction. */
4306 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
4309 /* Generate insn patterns to do a floating point compare of OPERANDS.
4310 If UNORDERED, allow for unordered compares. */
4312 static rtx
4313 ix86_expand_fp_compare (code, op0, op1, unordered)
4314 enum rtx_code code;
4315 rtx op0, op1;
4316 int unordered;
4318 enum machine_mode fpcmp_mode;
4319 enum machine_mode intcmp_mode;
4320 rtx tmp;
4322 /* When not doing IEEE compliant compares, disable unordered. */
4323 if (! TARGET_IEEE_FP)
4324 unordered = 0;
4325 fpcmp_mode = unordered ? CCFPUmode : CCFPmode;
4327 /* ??? If we knew whether invalid-operand exceptions were masked,
4328 we could rely on fcom to raise an exception and take care of
4329 NaNs. But we don't. We could know this from c9x math bits. */
4330 if (TARGET_IEEE_FP)
4331 unordered = 1;
4333 /* All of the unordered compare instructions only work on registers.
4334 The same is true of the XFmode compare instructions. */
4335 if (unordered || GET_MODE (op0) == XFmode)
4337 op0 = force_reg (GET_MODE (op0), op0);
4338 op1 = force_reg (GET_MODE (op1), op1);
4340 else
4342 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
4343 things around if they appear profitable, otherwise force op0
4344 into a register. */
4346 if (standard_80387_constant_p (op0) == 0
4347 || (GET_CODE (op0) == MEM
4348 && ! (standard_80387_constant_p (op1) == 0
4349 || GET_CODE (op1) == MEM)))
4351 rtx tmp;
4352 tmp = op0, op0 = op1, op1 = tmp;
4353 code = swap_condition (code);
4356 if (GET_CODE (op0) != REG)
4357 op0 = force_reg (GET_MODE (op0), op0);
4359 if (CONSTANT_P (op1))
4361 if (standard_80387_constant_p (op1))
4362 op1 = force_reg (GET_MODE (op1), op1);
4363 else
4364 op1 = validize_mem (force_const_mem (GET_MODE (op1), op1));
4368 /* %%% fcomi is probably always faster, even when dealing with memory,
4369 since compare-and-branch would be three insns instead of four. */
4370 if (TARGET_CMOVE && !unordered)
4372 if (GET_CODE (op0) != REG)
4373 op0 = force_reg (GET_MODE (op0), op0);
4374 if (GET_CODE (op1) != REG)
4375 op1 = force_reg (GET_MODE (op1), op1);
4377 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
4378 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG), tmp);
4379 emit_insn (tmp);
4381 /* The FP codes work out to act like unsigned. */
4382 code = unsigned_comparison (code);
4383 intcmp_mode = fpcmp_mode;
4385 else
4387 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
4389 rtx tmp2;
4390 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
4391 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), 9);
4392 tmp = gen_reg_rtx (HImode);
4393 emit_insn (gen_rtx_SET (VOIDmode, tmp, tmp2));
4395 if (! unordered)
4397 /* We have two options here -- use sahf, or testing bits of ah
4398 directly. On PPRO, they are equivalent, sahf being one byte
4399 smaller. On Pentium, sahf is non-pairable while test is UV
4400 pairable. */
4402 if (TARGET_USE_SAHF || optimize_size)
4404 do_sahf:
4406 /* The FP codes work out to act like unsigned. */
4407 code = unsigned_comparison (code);
4408 emit_insn (gen_x86_sahf_1 (tmp));
4409 intcmp_mode = CCmode;
4411 else
4414 * The numbers below correspond to the bits of the FPSW in AH.
4415 * C3, C2, and C0 are in bits 0x40, 0x4, and 0x01 respectively.
4417 * cmp C3 C2 C0
4418 * > 0 0 0
4419 * < 0 0 1
4420 * = 1 0 0
4421 * un 1 1 1
4424 int mask;
4426 switch (code)
4428 case GT:
4429 mask = 0x41;
4430 code = EQ;
4431 break;
4432 case LT:
4433 mask = 0x01;
4434 code = NE;
4435 break;
4436 case GE:
4437 /* We'd have to use `xorb 1,ah; andb 0x41,ah', so it's
4438 faster in all cases to just fall back on sahf. */
4439 goto do_sahf;
4440 case LE:
4441 mask = 0x41;
4442 code = NE;
4443 break;
4444 case EQ:
4445 mask = 0x40;
4446 code = NE;
4447 break;
4448 case NE:
4449 mask = 0x40;
4450 code = EQ;
4451 break;
4452 default:
4453 abort ();
4456 emit_insn (gen_testqi_ext_0 (tmp, GEN_INT (mask)));
4457 intcmp_mode = CCNOmode;
4460 else
4462 /* In the unordered case, we have to check C2 for NaN's, which
4463 doesn't happen to work out to anything nice combination-wise.
4464 So do some bit twiddling on the value we've got in AH to come
4465 up with an appropriate set of condition codes. */
4467 intcmp_mode = CCNOmode;
4468 switch (code)
4470 case GT:
4471 emit_insn (gen_testqi_ext_0 (tmp, GEN_INT (0x45)));
4472 code = EQ;
4473 break;
4474 case LT:
4475 emit_insn (gen_andqi_ext_0 (tmp, tmp, GEN_INT (0x45)));
4476 emit_insn (gen_cmpqi_ext_3 (tmp, GEN_INT (0x01)));
4477 intcmp_mode = CCmode;
4478 code = EQ;
4479 break;
4480 case GE:
4481 emit_insn (gen_testqi_ext_0 (tmp, GEN_INT (0x05)));
4482 code = EQ;
4483 break;
4484 case LE:
4485 emit_insn (gen_andqi_ext_0 (tmp, tmp, GEN_INT (0x45)));
4486 emit_insn (gen_addqi_ext_1 (tmp, tmp, constm1_rtx));
4487 emit_insn (gen_cmpqi_ext_3 (tmp, GEN_INT (0x40)));
4488 intcmp_mode = CCmode;
4489 code = LTU;
4490 break;
4491 case EQ:
4492 emit_insn (gen_andqi_ext_0 (tmp, tmp, GEN_INT (0x45)));
4493 emit_insn (gen_cmpqi_ext_3 (tmp, GEN_INT (0x40)));
4494 intcmp_mode = CCmode;
4495 code = EQ;
4496 break;
4497 case NE:
4498 emit_insn (gen_andqi_ext_0 (tmp, tmp, GEN_INT (0x45)));
4499 emit_insn (gen_xorqi_cc_ext_1 (tmp, tmp, GEN_INT (0x40)));
4500 code = NE;
4501 break;
4502 default:
4503 abort ();
4508 /* Return the test that should be put into the flags user, i.e.
4509 the bcc, scc, or cmov instruction. */
4510 return gen_rtx_fmt_ee (code, VOIDmode,
4511 gen_rtx_REG (intcmp_mode, FLAGS_REG),
4512 const0_rtx);
4515 static rtx
4516 ix86_expand_compare (code, unordered)
4517 enum rtx_code code;
4518 int unordered;
4520 rtx op0, op1, ret;
4521 op0 = ix86_compare_op0;
4522 op1 = ix86_compare_op1;
4524 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
4525 ret = ix86_expand_fp_compare (code, op0, op1, unordered);
4526 else
4527 ret = ix86_expand_int_compare (code, op0, op1);
4529 return ret;
4532 void
4533 ix86_expand_branch (code, unordered, label)
4534 enum rtx_code code;
4535 int unordered;
4536 rtx label;
4538 rtx tmp, lo[2], hi[2], label2;
4539 enum rtx_code code1, code2, code3;
4541 if (GET_MODE (ix86_compare_op0) != DImode)
4543 tmp = ix86_expand_compare (code, unordered);
4544 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
4545 gen_rtx_LABEL_REF (VOIDmode, label),
4546 pc_rtx);
4547 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
4548 return;
4551 /* Expand DImode branch into multiple compare+branch. */
4553 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
4555 tmp = ix86_compare_op0;
4556 ix86_compare_op0 = ix86_compare_op1;
4557 ix86_compare_op1 = tmp;
4558 code = swap_condition (code);
4560 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
4561 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
4563 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to avoid
4564 two branches. This costs one extra insn, so disable when optimizing
4565 for size. */
4567 if ((code == EQ || code == NE)
4568 && (!optimize_size
4569 || hi[1] == const0_rtx || lo[1] == const0_rtx))
4571 rtx xor0, xor1;
4573 xor1 = hi[0];
4574 if (hi[1] != const0_rtx)
4576 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
4577 NULL_RTX, 0, OPTAB_WIDEN);
4580 xor0 = lo[0];
4581 if (lo[1] != const0_rtx)
4583 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
4584 NULL_RTX, 0, OPTAB_WIDEN);
4587 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
4588 NULL_RTX, 0, OPTAB_WIDEN);
4590 ix86_compare_op0 = tmp;
4591 ix86_compare_op1 = const0_rtx;
4592 ix86_expand_branch (code, unordered, label);
4593 return;
4596 /* Otherwise, if we are doing less-than, op1 is a constant and the
4597 low word is zero, then we can just examine the high word. */
4599 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx
4600 && (code == LT || code == LTU))
4602 ix86_compare_op0 = hi[0];
4603 ix86_compare_op1 = hi[1];
4604 ix86_expand_branch (code, unordered, label);
4605 return;
4608 /* Otherwise, we need two or three jumps. */
4610 label2 = gen_label_rtx ();
4612 code1 = code;
4613 code2 = swap_condition (code);
4614 code3 = unsigned_condition (code);
4616 switch (code)
4618 case LT: case GT: case LTU: case GTU:
4619 break;
4621 case LE: code1 = LT; code2 = GT; break;
4622 case GE: code1 = GT; code2 = LT; break;
4623 case LEU: code1 = LTU; code2 = GTU; break;
4624 case GEU: code1 = GTU; code2 = LTU; break;
4626 case EQ: code1 = NIL; code2 = NE; break;
4627 case NE: code2 = NIL; break;
4629 default:
4630 abort ();
4634 * a < b =>
4635 * if (hi(a) < hi(b)) goto true;
4636 * if (hi(a) > hi(b)) goto false;
4637 * if (lo(a) < lo(b)) goto true;
4638 * false:
4641 ix86_compare_op0 = hi[0];
4642 ix86_compare_op1 = hi[1];
4644 if (code1 != NIL)
4645 ix86_expand_branch (code1, unordered, label);
4646 if (code2 != NIL)
4647 ix86_expand_branch (code2, unordered, label2);
4649 ix86_compare_op0 = lo[0];
4650 ix86_compare_op1 = lo[1];
4651 ix86_expand_branch (code3, unordered, label);
4653 if (code2 != NIL)
4654 emit_label (label2);
4658 ix86_expand_setcc (code, unordered, dest)
4659 enum rtx_code code;
4660 int unordered;
4661 rtx dest;
4663 rtx ret, tmp;
4664 int type;
4666 if (GET_MODE (ix86_compare_op0) == DImode)
4667 return 0; /* FAIL */
4669 /* Three modes of generation:
4670 0 -- destination does not overlap compare sources:
4671 clear dest first, emit strict_low_part setcc.
4672 1 -- destination does overlap compare sources:
4673 emit subreg setcc, zero extend.
4674 2 -- destination is in QImode:
4675 emit setcc only.
4678 type = 0;
4680 if (GET_MODE (dest) == QImode)
4681 type = 2;
4682 else if (reg_overlap_mentioned_p (dest, ix86_compare_op0)
4683 || reg_overlap_mentioned_p (dest, ix86_compare_op1))
4684 type = 1;
4686 if (type == 0)
4687 emit_move_insn (dest, const0_rtx);
4689 ret = ix86_expand_compare (code, unordered);
4690 PUT_MODE (ret, QImode);
4692 tmp = dest;
4693 if (type == 0)
4695 tmp = gen_lowpart (QImode, dest);
4696 tmp = gen_rtx_STRICT_LOW_PART (VOIDmode, tmp);
4698 else if (type == 1)
4700 if (!cse_not_expected)
4701 tmp = gen_reg_rtx (QImode);
4702 else
4703 tmp = gen_lowpart (QImode, dest);
4706 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
4708 if (type == 1)
4710 rtx clob;
4712 tmp = gen_rtx_ZERO_EXTEND (GET_MODE (dest), tmp);
4713 tmp = gen_rtx_SET (VOIDmode, dest, tmp);
4714 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
4715 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
4716 emit_insn (tmp);
4719 return 1; /* DONE */
4723 ix86_expand_int_movcc (operands)
4724 rtx operands[];
4726 enum rtx_code code = GET_CODE (operands[1]), compare_code;
4727 rtx compare_seq, compare_op;
4729 /* When the compare code is not LTU or GEU, we can not use sbbl case.
4730 In case comparsion is done with immediate, we can convert it to LTU or
4731 GEU by altering the integer. */
4733 if ((code == LEU || code == GTU)
4734 && GET_CODE (ix86_compare_op1) == CONST_INT
4735 && GET_MODE (operands[0]) != HImode
4736 && (unsigned int)INTVAL (ix86_compare_op1) != 0xffffffff
4737 && GET_CODE (operands[2]) == CONST_INT
4738 && GET_CODE (operands[3]) == CONST_INT)
4740 if (code == LEU)
4741 code = LTU;
4742 else
4743 code = GEU;
4744 ix86_compare_op1 = GEN_INT (INTVAL (ix86_compare_op1) + 1);
4746 start_sequence ();
4747 compare_op = ix86_expand_compare (code, code == EQ || code == NE);
4748 compare_seq = gen_sequence ();
4749 end_sequence ();
4751 compare_code = GET_CODE (compare_op);
4753 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
4754 HImode insns, we'd be swallowed in word prefix ops. */
4756 if (GET_MODE (operands[0]) != HImode
4757 && GET_CODE (operands[2]) == CONST_INT
4758 && GET_CODE (operands[3]) == CONST_INT)
4760 rtx out = operands[0];
4761 HOST_WIDE_INT ct = INTVAL (operands[2]);
4762 HOST_WIDE_INT cf = INTVAL (operands[3]);
4763 HOST_WIDE_INT diff;
4765 if (compare_code == LTU || compare_code == GEU)
4768 /* Detect overlap between destination and compare sources. */
4769 rtx tmp = out;
4771 /* To simplify rest of code, restrict to the GEU case. */
4772 if (compare_code == LTU)
4774 int tmp = ct;
4775 ct = cf;
4776 cf = tmp;
4777 compare_code = reverse_condition (compare_code);
4778 code = reverse_condition (code);
4780 diff = ct - cf;
4782 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
4783 || reg_overlap_mentioned_p (out, ix86_compare_op1))
4784 tmp = gen_reg_rtx (SImode);
4786 emit_insn (compare_seq);
4787 emit_insn (gen_x86_movsicc_0_m1 (tmp));
4789 if (diff == 1)
4792 * cmpl op0,op1
4793 * sbbl dest,dest
4794 * [addl dest, ct]
4796 * Size 5 - 8.
4798 if (ct)
4799 emit_insn (gen_addsi3 (out, out, GEN_INT (ct)));
4801 else if (cf == -1)
4804 * cmpl op0,op1
4805 * sbbl dest,dest
4806 * orl $ct, dest
4808 * Size 8.
4810 emit_insn (gen_iorsi3 (out, out, GEN_INT (ct)));
4812 else if (diff == -1 && ct)
4815 * cmpl op0,op1
4816 * sbbl dest,dest
4817 * xorl $-1, dest
4818 * [addl dest, cf]
4820 * Size 8 - 11.
4822 emit_insn (gen_one_cmplsi2 (tmp, tmp));
4823 if (cf)
4824 emit_insn (gen_addsi3 (out, out, GEN_INT (cf)));
4826 else
4829 * cmpl op0,op1
4830 * sbbl dest,dest
4831 * andl cf - ct, dest
4832 * [addl dest, ct]
4834 * Size 8 - 11.
4836 emit_insn (gen_andsi3 (out, out, GEN_INT (cf - ct)));
4837 if (ct)
4838 emit_insn (gen_addsi3 (out, out, GEN_INT (ct)));
4841 if (tmp != out)
4842 emit_move_insn (out, tmp);
4844 return 1; /* DONE */
4847 diff = ct - cf;
4848 if (diff < 0)
4850 HOST_WIDE_INT tmp;
4851 tmp = ct, ct = cf, cf = tmp;
4852 diff = -diff;
4853 compare_code = reverse_condition (compare_code);
4854 code = reverse_condition (code);
4856 if (diff == 1 || diff == 2 || diff == 4 || diff == 8
4857 || diff == 3 || diff == 5 || diff == 9)
4860 * xorl dest,dest
4861 * cmpl op1,op2
4862 * setcc dest
4863 * lea cf(dest*(ct-cf)),dest
4865 * Size 14.
4867 * This also catches the degenerate setcc-only case.
4870 rtx tmp;
4871 int nops;
4873 out = emit_store_flag (out, code, ix86_compare_op0,
4874 ix86_compare_op1, VOIDmode, 0, 1);
4876 nops = 0;
4877 if (diff == 1)
4878 tmp = out;
4879 else
4881 tmp = gen_rtx_MULT (SImode, out, GEN_INT (diff & ~1));
4882 nops++;
4883 if (diff & 1)
4885 tmp = gen_rtx_PLUS (SImode, tmp, out);
4886 nops++;
4889 if (cf != 0)
4891 tmp = gen_rtx_PLUS (SImode, tmp, GEN_INT (cf));
4892 nops++;
4894 if (tmp != out)
4896 if (nops == 0)
4897 emit_move_insn (out, tmp);
4898 else if (nops == 1)
4900 rtx clob;
4902 clob = gen_rtx_REG (CCmode, FLAGS_REG);
4903 clob = gen_rtx_CLOBBER (VOIDmode, clob);
4905 tmp = gen_rtx_SET (VOIDmode, out, tmp);
4906 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
4907 emit_insn (tmp);
4909 else
4910 emit_insn (gen_rtx_SET (VOIDmode, out, tmp));
4912 if (out != operands[0])
4913 emit_move_insn (operands[0], out);
4915 return 1; /* DONE */
4919 * General case: Jumpful:
4920 * xorl dest,dest cmpl op1, op2
4921 * cmpl op1, op2 movl ct, dest
4922 * setcc dest jcc 1f
4923 * decl dest movl cf, dest
4924 * andl (cf-ct),dest 1:
4925 * addl ct,dest
4927 * Size 20. Size 14.
4929 * This is reasonably steep, but branch mispredict costs are
4930 * high on modern cpus, so consider failing only if optimizing
4931 * for space.
4933 * %%% Parameterize branch_cost on the tuning architecture, then
4934 * use that. The 80386 couldn't care less about mispredicts.
4937 if (!optimize_size && !TARGET_CMOVE)
4939 if (ct == 0)
4941 ct = cf;
4942 cf = 0;
4943 compare_code = reverse_condition (compare_code);
4944 code = reverse_condition (code);
4947 out = emit_store_flag (out, code, ix86_compare_op0,
4948 ix86_compare_op1, VOIDmode, 0, 1);
4950 emit_insn (gen_addsi3 (out, out, constm1_rtx));
4951 emit_insn (gen_andsi3 (out, out, GEN_INT (cf-ct)));
4952 if (ct != 0)
4953 emit_insn (gen_addsi3 (out, out, GEN_INT (ct)));
4954 if (out != operands[0])
4955 emit_move_insn (operands[0], out);
4957 return 1; /* DONE */
4961 if (!TARGET_CMOVE)
4963 /* Try a few things more with specific constants and a variable. */
4965 optab op;
4966 rtx var, orig_out, out, tmp;
4968 if (optimize_size)
4969 return 0; /* FAIL */
4971 /* If one of the two operands is an interesting constant, load a
4972 constant with the above and mask it in with a logical operation. */
4974 if (GET_CODE (operands[2]) == CONST_INT)
4976 var = operands[3];
4977 if (INTVAL (operands[2]) == 0)
4978 operands[3] = constm1_rtx, op = and_optab;
4979 else if (INTVAL (operands[2]) == -1)
4980 operands[3] = const0_rtx, op = ior_optab;
4981 else
4982 return 0; /* FAIL */
4984 else if (GET_CODE (operands[3]) == CONST_INT)
4986 var = operands[2];
4987 if (INTVAL (operands[3]) == 0)
4988 operands[2] = constm1_rtx, op = and_optab;
4989 else if (INTVAL (operands[3]) == -1)
4990 operands[2] = const0_rtx, op = ior_optab;
4991 else
4992 return 0; /* FAIL */
4994 else
4995 return 0; /* FAIL */
4997 orig_out = operands[0];
4998 tmp = gen_reg_rtx (GET_MODE (orig_out));
4999 operands[0] = tmp;
5001 /* Recurse to get the constant loaded. */
5002 if (ix86_expand_int_movcc (operands) == 0)
5003 return 0; /* FAIL */
5005 /* Mask in the interesting variable. */
5006 out = expand_binop (GET_MODE (orig_out), op, var, tmp, orig_out, 0,
5007 OPTAB_WIDEN);
5008 if (out != orig_out)
5009 emit_move_insn (orig_out, out);
5011 return 1; /* DONE */
5015 * For comparison with above,
5017 * movl cf,dest
5018 * movl ct,tmp
5019 * cmpl op1,op2
5020 * cmovcc tmp,dest
5022 * Size 15.
5025 if (! nonimmediate_operand (operands[2], GET_MODE (operands[0])))
5026 operands[2] = force_reg (GET_MODE (operands[0]), operands[2]);
5027 if (! nonimmediate_operand (operands[3], GET_MODE (operands[0])))
5028 operands[3] = force_reg (GET_MODE (operands[0]), operands[3]);
5030 emit_insn (compare_seq);
5031 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
5032 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
5033 compare_op, operands[2],
5034 operands[3])));
5036 return 1; /* DONE */
5040 ix86_expand_fp_movcc (operands)
5041 rtx operands[];
5043 enum rtx_code code;
5044 enum machine_mode mode;
5045 rtx tmp;
5047 /* The floating point conditional move instructions don't directly
5048 support conditions resulting from a signed integer comparison. */
5050 code = GET_CODE (operands[1]);
5051 switch (code)
5053 case LT:
5054 case LE:
5055 case GE:
5056 case GT:
5057 tmp = gen_reg_rtx (QImode);
5058 ix86_expand_setcc (code, 0, tmp);
5059 code = NE;
5060 ix86_compare_op0 = tmp;
5061 ix86_compare_op1 = const0_rtx;
5062 break;
5064 default:
5065 break;
5068 mode = SELECT_CC_MODE (code, ix86_compare_op0, ix86_compare_op1);
5069 emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_REG (mode, FLAGS_REG),
5070 gen_rtx_COMPARE (mode,
5071 ix86_compare_op0,
5072 ix86_compare_op1)));
5073 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
5074 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
5075 gen_rtx_fmt_ee (code, VOIDmode,
5076 gen_rtx_REG (mode, FLAGS_REG),
5077 const0_rtx),
5078 operands[2],
5079 operands[3])));
5081 return 1;
5084 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
5085 works for floating pointer parameters and nonoffsetable memories.
5086 For pushes, it returns just stack offsets; the values will be saved
5087 in the right order. Maximally three parts are generated. */
5089 static void
5090 ix86_split_to_parts (operand, parts, mode)
5091 rtx operand;
5092 rtx *parts;
5093 enum machine_mode mode;
5095 int size = GET_MODE_SIZE (mode) / 4;
5097 if (size < 2 || size > 3)
5098 abort ();
5100 /* Optimize constant pool reference to immediates. This is used by fp moves,
5101 that force all constants to memory to allow combining. */
5103 if (GET_CODE (operand) == MEM
5104 && GET_CODE (XEXP (operand, 0)) == SYMBOL_REF
5105 && CONSTANT_POOL_ADDRESS_P (XEXP (operand, 0)))
5106 operand = get_pool_constant (XEXP (operand, 0));
5108 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
5110 /* The only non-offsetable memories we handle are pushes. */
5111 if (! push_operand (operand, VOIDmode))
5112 abort ();
5114 PUT_MODE (operand, SImode);
5115 parts[0] = parts[1] = parts[2] = operand;
5117 else
5119 if (mode == DImode)
5120 split_di (&operand, 1, &parts[0], &parts[1]);
5121 else
5123 if (REG_P (operand))
5125 if (!reload_completed)
5126 abort ();
5127 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
5128 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
5129 if (size == 3)
5130 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
5132 else if (offsettable_memref_p (operand))
5134 PUT_MODE (operand, SImode);
5135 parts[0] = operand;
5136 parts[1] = adj_offsettable_operand (operand, 4);
5137 if (size == 3)
5138 parts[2] = adj_offsettable_operand (operand, 8);
5140 else if (GET_CODE (operand) == CONST_DOUBLE)
5142 REAL_VALUE_TYPE r;
5143 long l[3];
5145 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
5146 switch (mode)
5148 case XFmode:
5149 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
5150 parts[2] = GEN_INT (l[2]);
5151 break;
5152 case DFmode:
5153 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
5154 break;
5155 default:
5156 abort ();
5158 parts[1] = GEN_INT (l[1]);
5159 parts[0] = GEN_INT (l[0]);
5161 else
5162 abort ();
5166 return;
5169 /* Emit insns to perform a move or push of DI, DF, and XF values.
5170 Return false when normal moves are needed; true when all required
5171 insns have been emitted. Operands 2-4 contain the input values
5172 int the correct order; operands 5-7 contain the output values. */
5174 int
5175 ix86_split_long_move (operands1)
5176 rtx operands1[];
5178 rtx part[2][3];
5179 rtx operands[2];
5180 int size = GET_MODE_SIZE (GET_MODE (operands1[0])) / 4;
5181 int push = 0;
5182 int collisions = 0;
5184 /* Make our own copy to avoid clobbering the operands. */
5185 operands[0] = copy_rtx (operands1[0]);
5186 operands[1] = copy_rtx (operands1[1]);
5188 if (size < 2 || size > 3)
5189 abort ();
5191 /* The only non-offsettable memory we handle is push. */
5192 if (push_operand (operands[0], VOIDmode))
5193 push = 1;
5194 else if (GET_CODE (operands[0]) == MEM
5195 && ! offsettable_memref_p (operands[0]))
5196 abort ();
5198 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands1[0]));
5199 ix86_split_to_parts (operands[1], part[1], GET_MODE (operands1[0]));
5201 /* When emitting push, take care for source operands on the stack. */
5202 if (push && GET_CODE (operands[1]) == MEM
5203 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
5205 if (size == 3)
5206 part[1][1] = part[1][2];
5207 part[1][0] = part[1][1];
5210 /* We need to do copy in the right order in case an address register
5211 of the source overlaps the destination. */
5212 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
5214 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
5215 collisions++;
5216 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
5217 collisions++;
5218 if (size == 3
5219 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
5220 collisions++;
5222 /* Collision in the middle part can be handled by reordering. */
5223 if (collisions == 1 && size == 3
5224 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
5226 rtx tmp;
5227 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
5228 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
5231 /* If there are more collisions, we can't handle it by reordering.
5232 Do an lea to the last part and use only one colliding move. */
5233 else if (collisions > 1)
5235 collisions = 1;
5236 emit_insn (gen_rtx_SET (VOIDmode, part[0][size - 1],
5237 XEXP (part[1][0], 0)));
5238 part[1][0] = change_address (part[1][0], SImode, part[0][size - 1]);
5239 part[1][1] = adj_offsettable_operand (part[1][0], 4);
5240 if (size == 3)
5241 part[1][2] = adj_offsettable_operand (part[1][0], 8);
5245 if (push)
5247 if (size == 3)
5248 emit_insn (gen_push (part[1][2]));
5249 emit_insn (gen_push (part[1][1]));
5250 emit_insn (gen_push (part[1][0]));
5251 return 1;
5254 /* Choose correct order to not overwrite the source before it is copied. */
5255 if ((REG_P (part[0][0])
5256 && REG_P (part[1][1])
5257 && (REGNO (part[0][0]) == REGNO (part[1][1])
5258 || (size == 3
5259 && REGNO (part[0][0]) == REGNO (part[1][2]))))
5260 || (collisions > 0
5261 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
5263 if (size == 3)
5265 operands1[2] = part[0][2];
5266 operands1[3] = part[0][1];
5267 operands1[4] = part[0][0];
5268 operands1[5] = part[1][2];
5269 operands1[6] = part[1][1];
5270 operands1[7] = part[1][0];
5272 else
5274 operands1[2] = part[0][1];
5275 operands1[3] = part[0][0];
5276 operands1[5] = part[1][1];
5277 operands1[6] = part[1][0];
5280 else
5282 if (size == 3)
5284 operands1[2] = part[0][0];
5285 operands1[3] = part[0][1];
5286 operands1[4] = part[0][2];
5287 operands1[5] = part[1][0];
5288 operands1[6] = part[1][1];
5289 operands1[7] = part[1][2];
5291 else
5293 operands1[2] = part[0][0];
5294 operands1[3] = part[0][1];
5295 operands1[5] = part[1][0];
5296 operands1[6] = part[1][1];
5300 return 0;
5303 void
5304 ix86_split_ashldi (operands, scratch)
5305 rtx *operands, scratch;
5307 rtx low[2], high[2];
5308 int count;
5310 if (GET_CODE (operands[2]) == CONST_INT)
5312 split_di (operands, 2, low, high);
5313 count = INTVAL (operands[2]) & 63;
5315 if (count >= 32)
5317 emit_move_insn (high[0], low[1]);
5318 emit_move_insn (low[0], const0_rtx);
5320 if (count > 32)
5321 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
5323 else
5325 if (!rtx_equal_p (operands[0], operands[1]))
5326 emit_move_insn (operands[0], operands[1]);
5327 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
5328 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
5331 else
5333 if (!rtx_equal_p (operands[0], operands[1]))
5334 emit_move_insn (operands[0], operands[1]);
5336 split_di (operands, 1, low, high);
5338 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
5339 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
5341 if (TARGET_CMOVE && (! reload_completed || scratch))
5343 if (! reload_completed)
5344 scratch = force_reg (SImode, const0_rtx);
5345 else
5346 emit_move_insn (scratch, const0_rtx);
5348 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
5349 scratch));
5351 else
5352 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
5356 void
5357 ix86_split_ashrdi (operands, scratch)
5358 rtx *operands, scratch;
5360 rtx low[2], high[2];
5361 int count;
5363 if (GET_CODE (operands[2]) == CONST_INT)
5365 split_di (operands, 2, low, high);
5366 count = INTVAL (operands[2]) & 63;
5368 if (count >= 32)
5370 emit_move_insn (low[0], high[1]);
5372 if (! reload_completed)
5373 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
5374 else
5376 emit_move_insn (high[0], low[0]);
5377 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
5380 if (count > 32)
5381 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
5383 else
5385 if (!rtx_equal_p (operands[0], operands[1]))
5386 emit_move_insn (operands[0], operands[1]);
5387 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
5388 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
5391 else
5393 if (!rtx_equal_p (operands[0], operands[1]))
5394 emit_move_insn (operands[0], operands[1]);
5396 split_di (operands, 1, low, high);
5398 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
5399 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
5401 if (TARGET_CMOVE && (!reload_completed || scratch))
5403 if (! reload_completed)
5404 scratch = gen_reg_rtx (SImode);
5405 emit_move_insn (scratch, high[0]);
5406 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
5407 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
5408 scratch));
5410 else
5411 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
5415 void
5416 ix86_split_lshrdi (operands, scratch)
5417 rtx *operands, scratch;
5419 rtx low[2], high[2];
5420 int count;
5422 if (GET_CODE (operands[2]) == CONST_INT)
5424 split_di (operands, 2, low, high);
5425 count = INTVAL (operands[2]) & 63;
5427 if (count >= 32)
5429 emit_move_insn (low[0], high[1]);
5430 emit_move_insn (high[0], const0_rtx);
5432 if (count > 32)
5433 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
5435 else
5437 if (!rtx_equal_p (operands[0], operands[1]))
5438 emit_move_insn (operands[0], operands[1]);
5439 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
5440 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
5443 else
5445 if (!rtx_equal_p (operands[0], operands[1]))
5446 emit_move_insn (operands[0], operands[1]);
5448 split_di (operands, 1, low, high);
5450 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
5451 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
5453 /* Heh. By reversing the arguments, we can reuse this pattern. */
5454 if (TARGET_CMOVE && (! reload_completed || scratch))
5456 if (! reload_completed)
5457 scratch = force_reg (SImode, const0_rtx);
5458 else
5459 emit_move_insn (scratch, const0_rtx);
5461 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
5462 scratch));
5464 else
5465 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
5469 /* Expand the appropriate insns for doing strlen if not just doing
5470 repnz; scasb
5472 out = result, initialized with the start address
5473 align_rtx = alignment of the address.
5474 scratch = scratch register, initialized with the startaddress when
5475 not aligned, otherwise undefined
5477 This is just the body. It needs the initialisations mentioned above and
5478 some address computing at the end. These things are done in i386.md. */
5480 void
5481 ix86_expand_strlensi_unroll_1 (out, align_rtx, scratch)
5482 rtx out, align_rtx, scratch;
5484 int align;
5485 rtx tmp;
5486 rtx align_2_label = NULL_RTX;
5487 rtx align_3_label = NULL_RTX;
5488 rtx align_4_label = gen_label_rtx ();
5489 rtx end_0_label = gen_label_rtx ();
5490 rtx mem;
5491 rtx flags = gen_rtx_REG (CCNOmode, FLAGS_REG);
5492 rtx tmpreg = gen_reg_rtx (SImode);
5494 align = 0;
5495 if (GET_CODE (align_rtx) == CONST_INT)
5496 align = INTVAL (align_rtx);
5498 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
5500 /* Is there a known alignment and is it less than 4? */
5501 if (align < 4)
5503 /* Is there a known alignment and is it not 2? */
5504 if (align != 2)
5506 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
5507 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
5509 /* Leave just the 3 lower bits. */
5510 align_rtx = expand_binop (SImode, and_optab, scratch, GEN_INT (3),
5511 NULL_RTX, 0, OPTAB_WIDEN);
5513 emit_insn (gen_cmpsi_0 (align_rtx, const0_rtx));
5515 tmp = gen_rtx_EQ (VOIDmode, flags, const0_rtx);
5516 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
5517 gen_rtx_LABEL_REF (VOIDmode,
5518 align_4_label),
5519 pc_rtx);
5520 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
5522 emit_insn (gen_cmpsi_1 (align_rtx, GEN_INT (2)));
5524 tmp = gen_rtx_EQ (VOIDmode, flags, const0_rtx);
5525 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
5526 gen_rtx_LABEL_REF (VOIDmode,
5527 align_2_label),
5528 pc_rtx);
5529 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
5531 tmp = gen_rtx_GTU (VOIDmode, flags, const0_rtx);
5532 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
5533 gen_rtx_LABEL_REF (VOIDmode,
5534 align_3_label),
5535 pc_rtx);
5536 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
5538 else
5540 /* Since the alignment is 2, we have to check 2 or 0 bytes;
5541 check if is aligned to 4 - byte. */
5543 align_rtx = expand_binop (SImode, and_optab, scratch, GEN_INT (2),
5544 NULL_RTX, 0, OPTAB_WIDEN);
5546 emit_insn (gen_cmpsi_0 (align_rtx, const0_rtx));
5548 tmp = gen_rtx_EQ (VOIDmode, flags, const0_rtx);
5549 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
5550 gen_rtx_LABEL_REF (VOIDmode,
5551 align_4_label),
5552 pc_rtx);
5553 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
5556 mem = gen_rtx_MEM (QImode, out);
5558 /* Now compare the bytes. */
5560 /* Compare the first n unaligned byte on a byte per byte basis. */
5561 emit_insn (gen_cmpqi_0 (mem, const0_rtx));
5563 tmp = gen_rtx_EQ (VOIDmode, flags, const0_rtx);
5564 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
5565 gen_rtx_LABEL_REF (VOIDmode, end_0_label),
5566 pc_rtx);
5567 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
5569 /* Increment the address. */
5570 emit_insn (gen_addsi3 (out, out, const1_rtx));
5572 /* Not needed with an alignment of 2 */
5573 if (align != 2)
5575 emit_label (align_2_label);
5577 emit_insn (gen_cmpqi_0 (mem, const0_rtx));
5579 tmp = gen_rtx_EQ (VOIDmode, flags, const0_rtx);
5580 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
5581 gen_rtx_LABEL_REF (VOIDmode,
5582 end_0_label),
5583 pc_rtx);
5584 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
5586 emit_insn (gen_addsi3 (out, out, const1_rtx));
5588 emit_label (align_3_label);
5591 emit_insn (gen_cmpqi_0 (mem, const0_rtx));
5593 tmp = gen_rtx_EQ (VOIDmode, flags, const0_rtx);
5594 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
5595 gen_rtx_LABEL_REF (VOIDmode, end_0_label),
5596 pc_rtx);
5597 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
5599 emit_insn (gen_addsi3 (out, out, const1_rtx));
5602 /* Generate loop to check 4 bytes at a time. It is not a good idea to
5603 align this loop. It gives only huge programs, but does not help to
5604 speed up. */
5605 emit_label (align_4_label);
5607 mem = gen_rtx_MEM (SImode, out);
5608 emit_move_insn (scratch, mem);
5609 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
5611 /* This formula yields a nonzero result iff one of the bytes is zero.
5612 This saves three branches inside loop and many cycles. */
5614 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
5615 emit_insn (gen_one_cmplsi2 (scratch, scratch));
5616 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
5617 emit_insn (gen_andsi3 (tmpreg, tmpreg, GEN_INT (0x80808080)));
5618 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1, 0, align_4_label);
5620 if (TARGET_CMOVE)
5622 rtx reg = gen_reg_rtx (SImode);
5623 emit_move_insn (reg, tmpreg);
5624 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
5626 /* If zero is not in the first two bytes, move two bytes forward. */
5627 emit_insn (gen_testsi_1 (tmpreg, GEN_INT (0x8080)));
5628 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
5629 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
5630 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
5631 gen_rtx_IF_THEN_ELSE (SImode, tmp,
5632 reg,
5633 tmpreg)));
5634 /* Emit lea manually to avoid clobbering of flags. */
5635 emit_insn (gen_rtx_SET (SImode, reg,
5636 gen_rtx_PLUS (SImode, out, GEN_INT (2))));
5638 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
5639 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
5640 emit_insn (gen_rtx_SET (VOIDmode, out,
5641 gen_rtx_IF_THEN_ELSE (SImode, tmp,
5642 reg,
5643 out)));
5646 else
5648 rtx end_2_label = gen_label_rtx ();
5649 /* Is zero in the first two bytes? */
5651 emit_insn (gen_testsi_1 (tmpreg, GEN_INT (0x8080)));
5652 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
5653 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
5654 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
5655 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
5656 pc_rtx);
5657 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
5658 JUMP_LABEL (tmp) = end_2_label;
5660 /* Not in the first two. Move two bytes forward. */
5661 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
5662 emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
5664 emit_label (end_2_label);
5668 /* Avoid branch in fixing the byte. */
5669 tmpreg = gen_lowpart (QImode, tmpreg);
5670 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
5671 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3)));
5673 emit_label (end_0_label);
5676 /* Clear stack slot assignments remembered from previous functions.
5677 This is called from INIT_EXPANDERS once before RTL is emitted for each
5678 function. */
5680 static void
5681 ix86_init_machine_status (p)
5682 struct function *p;
5684 enum machine_mode mode;
5685 int n;
5686 p->machine
5687 = (struct machine_function *) xmalloc (sizeof (struct machine_function));
5689 for (mode = VOIDmode; (int) mode < (int) MAX_MACHINE_MODE;
5690 mode = (enum machine_mode) ((int) mode + 1))
5691 for (n = 0; n < MAX_386_STACK_LOCALS; n++)
5692 ix86_stack_locals[(int) mode][n] = NULL_RTX;
5695 /* Mark machine specific bits of P for GC. */
5696 static void
5697 ix86_mark_machine_status (p)
5698 struct function *p;
5700 enum machine_mode mode;
5701 int n;
5703 for (mode = VOIDmode; (int) mode < (int) MAX_MACHINE_MODE;
5704 mode = (enum machine_mode) ((int) mode + 1))
5705 for (n = 0; n < MAX_386_STACK_LOCALS; n++)
5706 ggc_mark_rtx (p->machine->stack_locals[(int) mode][n]);
5709 /* Return a MEM corresponding to a stack slot with mode MODE.
5710 Allocate a new slot if necessary.
5712 The RTL for a function can have several slots available: N is
5713 which slot to use. */
5716 assign_386_stack_local (mode, n)
5717 enum machine_mode mode;
5718 int n;
5720 if (n < 0 || n >= MAX_386_STACK_LOCALS)
5721 abort ();
5723 if (ix86_stack_locals[(int) mode][n] == NULL_RTX)
5724 ix86_stack_locals[(int) mode][n]
5725 = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
5727 return ix86_stack_locals[(int) mode][n];
5730 /* Calculate the length of the memory address in the instruction
5731 encoding. Does not include the one-byte modrm, opcode, or prefix. */
5733 static int
5734 memory_address_length (addr)
5735 rtx addr;
5737 struct ix86_address parts;
5738 rtx base, index, disp;
5739 int len;
5741 if (GET_CODE (addr) == PRE_DEC
5742 || GET_CODE (addr) == POST_INC)
5743 return 0;
5745 if (! ix86_decompose_address (addr, &parts))
5746 abort ();
5748 base = parts.base;
5749 index = parts.index;
5750 disp = parts.disp;
5751 len = 0;
5753 /* Register Indirect. */
5754 if (base && !index && !disp)
5756 /* Special cases: ebp and esp need the two-byte modrm form. */
5757 if (addr == stack_pointer_rtx
5758 || addr == arg_pointer_rtx
5759 || addr == frame_pointer_rtx
5760 || addr == hard_frame_pointer_rtx)
5761 len = 1;
5764 /* Direct Addressing. */
5765 else if (disp && !base && !index)
5766 len = 4;
5768 else
5770 /* Find the length of the displacement constant. */
5771 if (disp)
5773 if (GET_CODE (disp) == CONST_INT
5774 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K'))
5775 len = 1;
5776 else
5777 len = 4;
5780 /* An index requires the two-byte modrm form. */
5781 if (index)
5782 len += 1;
5785 return len;
5789 ix86_attr_length_default (insn)
5790 rtx insn;
5792 enum attr_type type;
5793 int len = 0, i;
5795 type = get_attr_type (insn);
5796 extract_insn (insn);
5797 switch (type)
5799 case TYPE_INCDEC:
5800 case TYPE_SETCC:
5801 case TYPE_ICMOV:
5802 case TYPE_FMOV:
5803 case TYPE_FOP:
5804 case TYPE_FCMP:
5805 case TYPE_FOP1:
5806 case TYPE_FMUL:
5807 case TYPE_FDIV:
5808 case TYPE_FSGN:
5809 case TYPE_FPSPC:
5810 case TYPE_FCMOV:
5811 case TYPE_IBR:
5812 break;
5813 case TYPE_STR:
5814 case TYPE_CLD:
5815 len = 0;
5817 case TYPE_ALU1:
5818 case TYPE_NEGNOT:
5819 case TYPE_ALU:
5820 case TYPE_ICMP:
5821 case TYPE_IMOVX:
5822 case TYPE_ISHIFT:
5823 case TYPE_IMUL:
5824 case TYPE_IDIV:
5825 case TYPE_PUSH:
5826 case TYPE_POP:
5827 for (i = recog_data.n_operands - 1; i >= 0; --i)
5828 if (CONSTANT_P (recog_data.operand[i]))
5830 if (GET_CODE (recog_data.operand[i]) == CONST_INT
5831 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
5832 len += 1;
5833 else
5834 len += GET_MODE_SIZE (GET_MODE (recog_data.operand[0]));
5836 break;
5838 case TYPE_IMOV:
5839 if (CONSTANT_P (recog_data.operand[1]))
5840 len += GET_MODE_SIZE (GET_MODE (recog_data.operand[0]));
5841 break;
5843 case TYPE_CALL:
5844 if (constant_call_address_operand (recog_data.operand[0],
5845 GET_MODE (recog_data.operand[0])))
5846 return 5;
5847 break;
5849 case TYPE_CALLV:
5850 if (constant_call_address_operand (recog_data.operand[1],
5851 GET_MODE (recog_data.operand[1])))
5852 return 5;
5853 break;
5855 case TYPE_LEA:
5857 /* Irritatingly, single_set doesn't work with REG_UNUSED present,
5858 as we'll get from running life_analysis during reg-stack when
5859 not optimizing. Not that it matters anyway, now that
5860 pro_epilogue_adjust_stack uses lea, and is by design not
5861 single_set. */
5862 rtx set = PATTERN (insn);
5863 if (GET_CODE (set) == SET)
5865 else if (GET_CODE (set) == PARALLEL
5866 && GET_CODE (XVECEXP (set, 0, 0)) == SET)
5867 set = XVECEXP (set, 0, 0);
5868 else
5869 abort ();
5871 len += memory_address_length (SET_SRC (set));
5872 goto just_opcode;
5875 case TYPE_OTHER:
5876 case TYPE_MULTI:
5877 return 15;
5879 case TYPE_FXCH:
5880 if (STACK_TOP_P (recog_data.operand[0]))
5881 return 2 + (REGNO (recog_data.operand[1]) != FIRST_STACK_REG + 1);
5882 else
5883 return 2 + (REGNO (recog_data.operand[0]) != FIRST_STACK_REG + 1);
5885 default:
5886 abort ();
5889 for (i = recog_data.n_operands - 1; i >= 0; --i)
5890 if (GET_CODE (recog_data.operand[i]) == MEM)
5892 len += memory_address_length (XEXP (recog_data.operand[i], 0));
5893 break;
5896 just_opcode:
5897 len += get_attr_length_opcode (insn);
5898 len += get_attr_length_prefix (insn);
5900 return len;
5903 /* Return the maximum number of instructions a cpu can issue. */
5906 ix86_issue_rate ()
5908 switch (ix86_cpu)
5910 case PROCESSOR_PENTIUM:
5911 case PROCESSOR_K6:
5912 return 2;
5914 case PROCESSOR_PENTIUMPRO:
5915 return 3;
5917 default:
5918 return 1;
5922 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
5923 by DEP_INSN and nothing set by DEP_INSN. */
5925 static int
5926 ix86_flags_dependant (insn, dep_insn, insn_type)
5927 rtx insn, dep_insn;
5928 enum attr_type insn_type;
5930 rtx set, set2;
5932 /* Simplify the test for uninteresting insns. */
5933 if (insn_type != TYPE_SETCC
5934 && insn_type != TYPE_ICMOV
5935 && insn_type != TYPE_FCMOV
5936 && insn_type != TYPE_IBR)
5937 return 0;
5939 if ((set = single_set (dep_insn)) != 0)
5941 set = SET_DEST (set);
5942 set2 = NULL_RTX;
5944 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
5945 && XVECLEN (PATTERN (dep_insn), 0) == 2
5946 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
5947 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
5949 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
5950 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
5952 else
5953 return 0;
5955 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
5956 return 0;
5958 /* This test is true if the dependant insn reads the flags but
5959 not any other potentially set register. */
5960 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
5961 return 0;
5963 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
5964 return 0;
5966 return 1;
5969 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
5970 address with operands set by DEP_INSN. */
5972 static int
5973 ix86_agi_dependant (insn, dep_insn, insn_type)
5974 rtx insn, dep_insn;
5975 enum attr_type insn_type;
5977 rtx addr;
5979 if (insn_type == TYPE_LEA)
5981 addr = PATTERN (insn);
5982 if (GET_CODE (addr) == SET)
5984 else if (GET_CODE (addr) == PARALLEL
5985 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
5986 addr = XVECEXP (addr, 0, 0);
5987 else
5988 abort ();
5989 addr = SET_SRC (addr);
5991 else
5993 int i;
5994 extract_insn (insn);
5995 for (i = recog_data.n_operands - 1; i >= 0; --i)
5996 if (GET_CODE (recog_data.operand[i]) == MEM)
5998 addr = XEXP (recog_data.operand[i], 0);
5999 goto found;
6001 return 0;
6002 found:;
6005 return modified_in_p (addr, dep_insn);
6009 ix86_adjust_cost (insn, link, dep_insn, cost)
6010 rtx insn, link, dep_insn;
6011 int cost;
6013 enum attr_type insn_type, dep_insn_type;
6014 rtx set, set2;
6015 int dep_insn_code_number;
6017 /* Anti and output depenancies have zero cost on all CPUs. */
6018 if (REG_NOTE_KIND (link) != 0)
6019 return 0;
6021 dep_insn_code_number = recog_memoized (dep_insn);
6023 /* If we can't recognize the insns, we can't really do anything. */
6024 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
6025 return cost;
6027 insn_type = get_attr_type (insn);
6028 dep_insn_type = get_attr_type (dep_insn);
6030 /* Prologue and epilogue allocators can have a false dependency on ebp.
6031 This results in one cycle extra stall on Pentium prologue scheduling,
6032 so handle this important case manually. */
6033 if (dep_insn_code_number == CODE_FOR_pro_epilogue_adjust_stack
6034 && dep_insn_type == TYPE_ALU
6035 && !reg_mentioned_p (stack_pointer_rtx, insn))
6036 return 0;
6038 switch (ix86_cpu)
6040 case PROCESSOR_PENTIUM:
6041 /* Address Generation Interlock adds a cycle of latency. */
6042 if (ix86_agi_dependant (insn, dep_insn, insn_type))
6043 cost += 1;
6045 /* ??? Compares pair with jump/setcc. */
6046 if (ix86_flags_dependant (insn, dep_insn, insn_type))
6047 cost = 0;
6049 /* Floating point stores require value to be ready one cycle ealier. */
6050 if (insn_type == TYPE_FMOV
6051 && get_attr_memory (insn) == MEMORY_STORE
6052 && !ix86_agi_dependant (insn, dep_insn, insn_type))
6053 cost += 1;
6054 break;
6056 case PROCESSOR_PENTIUMPRO:
6057 /* Since we can't represent delayed latencies of load+operation,
6058 increase the cost here for non-imov insns. */
6059 if (dep_insn_type != TYPE_IMOV
6060 && dep_insn_type != TYPE_FMOV
6061 && get_attr_memory (dep_insn) == MEMORY_LOAD)
6062 cost += 1;
6064 /* INT->FP conversion is expensive. */
6065 if (get_attr_fp_int_src (dep_insn))
6066 cost += 5;
6068 /* There is one cycle extra latency between an FP op and a store. */
6069 if (insn_type == TYPE_FMOV
6070 && (set = single_set (dep_insn)) != NULL_RTX
6071 && (set2 = single_set (insn)) != NULL_RTX
6072 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
6073 && GET_CODE (SET_DEST (set2)) == MEM)
6074 cost += 1;
6075 break;
6077 case PROCESSOR_K6:
6078 /* The esp dependency is resolved before the instruction is really
6079 finished. */
6080 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
6081 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
6082 return 1;
6084 /* Since we can't represent delayed latencies of load+operation,
6085 increase the cost here for non-imov insns. */
6086 if (get_attr_memory (dep_insn) == MEMORY_LOAD)
6087 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
6089 /* INT->FP conversion is expensive. */
6090 if (get_attr_fp_int_src (dep_insn))
6091 cost += 5;
6092 break;
6094 case PROCESSOR_ATHLON:
6095 /* Address Generation Interlock cause problems on the Athlon CPU because
6096 the loads and stores are done in order so once one load or store has
6097 to wait, others must too, so penalize the AGIs slightly by one cycle.
6098 We might experiment with this value later. */
6099 if (ix86_agi_dependant (insn, dep_insn, insn_type))
6100 cost += 1;
6102 /* Since we can't represent delayed latencies of load+operation,
6103 increase the cost here for non-imov insns. */
6104 if (dep_insn_type != TYPE_IMOV
6105 && dep_insn_type != TYPE_FMOV
6106 && get_attr_memory (dep_insn) == MEMORY_LOAD)
6107 cost += 2;
6108 default:
6109 break;
6112 return cost;
6115 static union
6117 struct ppro_sched_data
6119 rtx decode[3];
6120 int issued_this_cycle;
6121 } ppro;
6122 } ix86_sched_data;
6124 static int
6125 ix86_safe_length (insn)
6126 rtx insn;
6128 if (recog_memoized (insn) >= 0)
6129 return get_attr_length(insn);
6130 else
6131 return 128;
6134 static int
6135 ix86_safe_length_prefix (insn)
6136 rtx insn;
6138 if (recog_memoized (insn) >= 0)
6139 return get_attr_length(insn);
6140 else
6141 return 0;
6144 static enum attr_memory
6145 ix86_safe_memory (insn)
6146 rtx insn;
6148 if (recog_memoized (insn) >= 0)
6149 return get_attr_memory(insn);
6150 else
6151 return MEMORY_UNKNOWN;
6154 static enum attr_pent_pair
6155 ix86_safe_pent_pair (insn)
6156 rtx insn;
6158 if (recog_memoized (insn) >= 0)
6159 return get_attr_pent_pair(insn);
6160 else
6161 return PENT_PAIR_NP;
6164 static enum attr_ppro_uops
6165 ix86_safe_ppro_uops (insn)
6166 rtx insn;
6168 if (recog_memoized (insn) >= 0)
6169 return get_attr_ppro_uops (insn);
6170 else
6171 return PPRO_UOPS_MANY;
6174 static void
6175 ix86_dump_ppro_packet (dump)
6176 FILE *dump;
6178 if (ix86_sched_data.ppro.decode[0])
6180 fprintf (dump, "PPRO packet: %d",
6181 INSN_UID (ix86_sched_data.ppro.decode[0]));
6182 if (ix86_sched_data.ppro.decode[1])
6183 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
6184 if (ix86_sched_data.ppro.decode[2])
6185 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
6186 fputc ('\n', dump);
6190 /* We're beginning a new block. Initialize data structures as necessary. */
6192 void
6193 ix86_sched_init (dump, sched_verbose)
6194 FILE *dump ATTRIBUTE_UNUSED;
6195 int sched_verbose ATTRIBUTE_UNUSED;
6197 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
6200 /* Shift INSN to SLOT, and shift everything else down. */
6202 static void
6203 ix86_reorder_insn (insnp, slot)
6204 rtx *insnp, *slot;
6206 if (insnp != slot)
6208 rtx insn = *insnp;
6210 insnp[0] = insnp[1];
6211 while (++insnp != slot);
6212 *insnp = insn;
6216 /* Find an instruction with given pairability and minimal amount of cycles
6217 lost by the fact that the CPU waits for both pipelines to finish before
6218 reading next instructions. Also take care that both instructions together
6219 can not exceed 7 bytes. */
6221 static rtx *
6222 ix86_pent_find_pair (e_ready, ready, type, first)
6223 rtx *e_ready;
6224 rtx *ready;
6225 enum attr_pent_pair type;
6226 rtx first;
6228 int mincycles, cycles;
6229 enum attr_pent_pair tmp;
6230 enum attr_memory memory;
6231 rtx *insnp, *bestinsnp = NULL;
6233 if (ix86_safe_length (first) > 7 + ix86_safe_length_prefix (first))
6234 return NULL;
6236 memory = ix86_safe_memory (first);
6237 cycles = result_ready_cost (first);
6238 mincycles = INT_MAX;
6240 for (insnp = e_ready; insnp >= ready && mincycles; --insnp)
6241 if ((tmp = ix86_safe_pent_pair (*insnp)) == type
6242 && ix86_safe_length (*insnp) <= 7 + ix86_safe_length_prefix (*insnp))
6244 enum attr_memory second_memory;
6245 int secondcycles, currentcycles;
6247 second_memory = ix86_safe_memory (*insnp);
6248 secondcycles = result_ready_cost (*insnp);
6249 currentcycles = abs (cycles - secondcycles);
6251 if (secondcycles >= 1 && cycles >= 1)
6253 /* Two read/modify/write instructions together takes two
6254 cycles longer. */
6255 if (memory == MEMORY_BOTH && second_memory == MEMORY_BOTH)
6256 currentcycles += 2;
6258 /* Read modify/write instruction followed by read/modify
6259 takes one cycle longer. */
6260 if (memory == MEMORY_BOTH && second_memory == MEMORY_LOAD
6261 && tmp != PENT_PAIR_UV
6262 && ix86_safe_pent_pair (first) != PENT_PAIR_UV)
6263 currentcycles += 1;
6265 if (currentcycles < mincycles)
6266 bestinsnp = insnp, mincycles = currentcycles;
6269 return bestinsnp;
6272 /* Subroutines of ix86_sched_reorder. */
6274 static void
6275 ix86_sched_reorder_pentium (ready, e_ready)
6276 rtx *ready;
6277 rtx *e_ready;
6279 enum attr_pent_pair pair1, pair2;
6280 rtx *insnp;
6282 /* This wouldn't be necessary if Haifa knew that static insn ordering
6283 is important to which pipe an insn is issued to. So we have to make
6284 some minor rearrangements. */
6286 pair1 = ix86_safe_pent_pair (*e_ready);
6288 /* If the first insn is non-pairable, let it be. */
6289 if (pair1 == PENT_PAIR_NP)
6290 return;
6292 pair2 = PENT_PAIR_NP;
6293 insnp = 0;
6295 /* If the first insn is UV or PV pairable, search for a PU
6296 insn to go with. */
6297 if (pair1 == PENT_PAIR_UV || pair1 == PENT_PAIR_PV)
6299 insnp = ix86_pent_find_pair (e_ready-1, ready,
6300 PENT_PAIR_PU, *e_ready);
6301 if (insnp)
6302 pair2 = PENT_PAIR_PU;
6305 /* If the first insn is PU or UV pairable, search for a PV
6306 insn to go with. */
6307 if (pair2 == PENT_PAIR_NP
6308 && (pair1 == PENT_PAIR_PU || pair1 == PENT_PAIR_UV))
6310 insnp = ix86_pent_find_pair (e_ready-1, ready,
6311 PENT_PAIR_PV, *e_ready);
6312 if (insnp)
6313 pair2 = PENT_PAIR_PV;
6316 /* If the first insn is pairable, search for a UV
6317 insn to go with. */
6318 if (pair2 == PENT_PAIR_NP)
6320 insnp = ix86_pent_find_pair (e_ready-1, ready,
6321 PENT_PAIR_UV, *e_ready);
6322 if (insnp)
6323 pair2 = PENT_PAIR_UV;
6326 if (pair2 == PENT_PAIR_NP)
6327 return;
6329 /* Found something! Decide if we need to swap the order. */
6330 if (pair1 == PENT_PAIR_PV || pair2 == PENT_PAIR_PU
6331 || (pair1 == PENT_PAIR_UV && pair2 == PENT_PAIR_UV
6332 && ix86_safe_memory (*e_ready) == MEMORY_BOTH
6333 && ix86_safe_memory (*insnp) == MEMORY_LOAD))
6334 ix86_reorder_insn (insnp, e_ready);
6335 else
6336 ix86_reorder_insn (insnp, e_ready - 1);
6339 static void
6340 ix86_sched_reorder_ppro (ready, e_ready)
6341 rtx *ready;
6342 rtx *e_ready;
6344 rtx decode[3];
6345 enum attr_ppro_uops cur_uops;
6346 int issued_this_cycle;
6347 rtx *insnp;
6348 int i;
6350 /* At this point .ppro.decode contains the state of the three
6351 decoders from last "cycle". That is, those insns that were
6352 actually independent. But here we're scheduling for the
6353 decoder, and we may find things that are decodable in the
6354 same cycle. */
6356 memcpy (decode, ix86_sched_data.ppro.decode, sizeof(decode));
6357 issued_this_cycle = 0;
6359 insnp = e_ready;
6360 cur_uops = ix86_safe_ppro_uops (*insnp);
6362 /* If the decoders are empty, and we've a complex insn at the
6363 head of the priority queue, let it issue without complaint. */
6364 if (decode[0] == NULL)
6366 if (cur_uops == PPRO_UOPS_MANY)
6368 decode[0] = *insnp;
6369 goto ppro_done;
6372 /* Otherwise, search for a 2-4 uop unsn to issue. */
6373 while (cur_uops != PPRO_UOPS_FEW)
6375 if (insnp == ready)
6376 break;
6377 cur_uops = ix86_safe_ppro_uops (*--insnp);
6380 /* If so, move it to the head of the line. */
6381 if (cur_uops == PPRO_UOPS_FEW)
6382 ix86_reorder_insn (insnp, e_ready);
6384 /* Issue the head of the queue. */
6385 issued_this_cycle = 1;
6386 decode[0] = *e_ready--;
6389 /* Look for simple insns to fill in the other two slots. */
6390 for (i = 1; i < 3; ++i)
6391 if (decode[i] == NULL)
6393 if (ready >= e_ready)
6394 goto ppro_done;
6396 insnp = e_ready;
6397 cur_uops = ix86_safe_ppro_uops (*insnp);
6398 while (cur_uops != PPRO_UOPS_ONE)
6400 if (insnp == ready)
6401 break;
6402 cur_uops = ix86_safe_ppro_uops (*--insnp);
6405 /* Found one. Move it to the head of the queue and issue it. */
6406 if (cur_uops == PPRO_UOPS_ONE)
6408 ix86_reorder_insn (insnp, e_ready);
6409 decode[i] = *e_ready--;
6410 issued_this_cycle++;
6411 continue;
6414 /* ??? Didn't find one. Ideally, here we would do a lazy split
6415 of 2-uop insns, issue one and queue the other. */
6418 ppro_done:
6419 if (issued_this_cycle == 0)
6420 issued_this_cycle = 1;
6421 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
6425 /* We are about to being issuing insns for this clock cycle.
6426 Override the default sort algorithm to better slot instructions. */
6428 ix86_sched_reorder (dump, sched_verbose, ready, n_ready, clock_var)
6429 FILE *dump ATTRIBUTE_UNUSED;
6430 int sched_verbose ATTRIBUTE_UNUSED;
6431 rtx *ready;
6432 int n_ready;
6433 int clock_var ATTRIBUTE_UNUSED;
6435 rtx *e_ready = ready + n_ready - 1;
6437 if (n_ready < 2)
6438 goto out;
6440 switch (ix86_cpu)
6442 default:
6443 break;
6445 case PROCESSOR_PENTIUM:
6446 ix86_sched_reorder_pentium (ready, e_ready);
6447 break;
6449 case PROCESSOR_PENTIUMPRO:
6450 ix86_sched_reorder_ppro (ready, e_ready);
6451 break;
6454 out:
6455 return ix86_issue_rate ();
6458 /* We are about to issue INSN. Return the number of insns left on the
6459 ready queue that can be issued this cycle. */
6462 ix86_variable_issue (dump, sched_verbose, insn, can_issue_more)
6463 FILE *dump;
6464 int sched_verbose;
6465 rtx insn;
6466 int can_issue_more;
6468 int i;
6469 switch (ix86_cpu)
6471 default:
6472 return can_issue_more - 1;
6474 case PROCESSOR_PENTIUMPRO:
6476 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
6478 if (uops == PPRO_UOPS_MANY)
6480 if (sched_verbose)
6481 ix86_dump_ppro_packet (dump);
6482 ix86_sched_data.ppro.decode[0] = insn;
6483 ix86_sched_data.ppro.decode[1] = NULL;
6484 ix86_sched_data.ppro.decode[2] = NULL;
6485 if (sched_verbose)
6486 ix86_dump_ppro_packet (dump);
6487 ix86_sched_data.ppro.decode[0] = NULL;
6489 else if (uops == PPRO_UOPS_FEW)
6491 if (sched_verbose)
6492 ix86_dump_ppro_packet (dump);
6493 ix86_sched_data.ppro.decode[0] = insn;
6494 ix86_sched_data.ppro.decode[1] = NULL;
6495 ix86_sched_data.ppro.decode[2] = NULL;
6497 else
6499 for (i = 0; i < 3; ++i)
6500 if (ix86_sched_data.ppro.decode[i] == NULL)
6502 ix86_sched_data.ppro.decode[i] = insn;
6503 break;
6505 if (i == 3)
6506 abort ();
6507 if (i == 2)
6509 if (sched_verbose)
6510 ix86_dump_ppro_packet (dump);
6511 ix86_sched_data.ppro.decode[0] = NULL;
6512 ix86_sched_data.ppro.decode[1] = NULL;
6513 ix86_sched_data.ppro.decode[2] = NULL;
6517 return --ix86_sched_data.ppro.issued_this_cycle;