* configure.in: Arrange to include defaults.h in [ht]config.h/tm.h.
[official-gcc.git] / gcc / config / i386 / i386.c
bloba6a0f6d8fcdd2f45e32134720a2cffafab3a6a1f
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001
3 Free Software Foundation, Inc.
5 This file is part of GNU CC.
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
22 #include "config.h"
23 #include <setjmp.h>
24 #include "system.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "tm_p.h"
28 #include "regs.h"
29 #include "hard-reg-set.h"
30 #include "real.h"
31 #include "insn-config.h"
32 #include "conditions.h"
33 #include "insn-flags.h"
34 #include "output.h"
35 #include "insn-attr.h"
36 #include "flags.h"
37 #include "except.h"
38 #include "function.h"
39 #include "recog.h"
40 #include "expr.h"
41 #include "toplev.h"
42 #include "basic-block.h"
43 #include "ggc.h"
45 #ifndef CHECK_STACK_LIMIT
46 #define CHECK_STACK_LIMIT -1
47 #endif
49 /* Processor costs (relative to an add) */
50 struct processor_costs i386_cost = { /* 386 specific costs */
51 1, /* cost of an add instruction */
52 1, /* cost of a lea instruction */
53 3, /* variable shift costs */
54 2, /* constant shift costs */
55 6, /* cost of starting a multiply */
56 1, /* cost of multiply per each bit set */
57 23, /* cost of a divide/mod */
58 15, /* "large" insn */
59 3, /* MOVE_RATIO */
60 4, /* cost for loading QImode using movzbl */
61 {2, 4, 2}, /* cost of loading integer registers
62 in QImode, HImode and SImode.
63 Relative to reg-reg move (2). */
64 {2, 4, 2}, /* cost of storing integer registers */
65 2, /* cost of reg,reg fld/fst */
66 {8, 8, 8}, /* cost of loading fp registers
67 in SFmode, DFmode and XFmode */
68 {8, 8, 8} /* cost of loading integer registers */
71 struct processor_costs i486_cost = { /* 486 specific costs */
72 1, /* cost of an add instruction */
73 1, /* cost of a lea instruction */
74 3, /* variable shift costs */
75 2, /* constant shift costs */
76 12, /* cost of starting a multiply */
77 1, /* cost of multiply per each bit set */
78 40, /* cost of a divide/mod */
79 15, /* "large" insn */
80 3, /* MOVE_RATIO */
81 4, /* cost for loading QImode using movzbl */
82 {2, 4, 2}, /* cost of loading integer registers
83 in QImode, HImode and SImode.
84 Relative to reg-reg move (2). */
85 {2, 4, 2}, /* cost of storing integer registers */
86 2, /* cost of reg,reg fld/fst */
87 {8, 8, 8}, /* cost of loading fp registers
88 in SFmode, DFmode and XFmode */
89 {8, 8, 8} /* cost of loading integer registers */
92 struct processor_costs pentium_cost = {
93 1, /* cost of an add instruction */
94 1, /* cost of a lea instruction */
95 4, /* variable shift costs */
96 1, /* constant shift costs */
97 11, /* cost of starting a multiply */
98 0, /* cost of multiply per each bit set */
99 25, /* cost of a divide/mod */
100 8, /* "large" insn */
101 6, /* MOVE_RATIO */
102 6, /* cost for loading QImode using movzbl */
103 {2, 4, 2}, /* cost of loading integer registers
104 in QImode, HImode and SImode.
105 Relative to reg-reg move (2). */
106 {2, 4, 2}, /* cost of storing integer registers */
107 2, /* cost of reg,reg fld/fst */
108 {2, 2, 6}, /* cost of loading fp registers
109 in SFmode, DFmode and XFmode */
110 {4, 4, 6} /* cost of loading integer registers */
113 struct processor_costs pentiumpro_cost = {
114 1, /* cost of an add instruction */
115 1, /* cost of a lea instruction */
116 1, /* variable shift costs */
117 1, /* constant shift costs */
118 4, /* cost of starting a multiply */
119 0, /* cost of multiply per each bit set */
120 17, /* cost of a divide/mod */
121 8, /* "large" insn */
122 6, /* MOVE_RATIO */
123 2, /* cost for loading QImode using movzbl */
124 {4, 4, 4}, /* cost of loading integer registers
125 in QImode, HImode and SImode.
126 Relative to reg-reg move (2). */
127 {2, 2, 2}, /* cost of storing integer registers */
128 2, /* cost of reg,reg fld/fst */
129 {2, 2, 6}, /* cost of loading fp registers
130 in SFmode, DFmode and XFmode */
131 {4, 4, 6} /* cost of loading integer registers */
134 struct processor_costs k6_cost = {
135 1, /* cost of an add instruction */
136 2, /* cost of a lea instruction */
137 1, /* variable shift costs */
138 1, /* constant shift costs */
139 3, /* cost of starting a multiply */
140 0, /* cost of multiply per each bit set */
141 18, /* cost of a divide/mod */
142 8, /* "large" insn */
143 4, /* MOVE_RATIO */
144 3, /* cost for loading QImode using movzbl */
145 {4, 5, 4}, /* cost of loading integer registers
146 in QImode, HImode and SImode.
147 Relative to reg-reg move (2). */
148 {2, 3, 2}, /* cost of storing integer registers */
149 4, /* cost of reg,reg fld/fst */
150 {6, 6, 6}, /* cost of loading fp registers
151 in SFmode, DFmode and XFmode */
152 {4, 4, 4} /* cost of loading integer registers */
155 struct processor_costs athlon_cost = {
156 1, /* cost of an add instruction */
157 2, /* cost of a lea instruction */
158 1, /* variable shift costs */
159 1, /* constant shift costs */
160 5, /* cost of starting a multiply */
161 0, /* cost of multiply per each bit set */
162 42, /* cost of a divide/mod */
163 8, /* "large" insn */
164 9, /* MOVE_RATIO */
165 4, /* cost for loading QImode using movzbl */
166 {4, 5, 4}, /* cost of loading integer registers
167 in QImode, HImode and SImode.
168 Relative to reg-reg move (2). */
169 {2, 3, 2}, /* cost of storing integer registers */
170 4, /* cost of reg,reg fld/fst */
171 {6, 6, 20}, /* cost of loading fp registers
172 in SFmode, DFmode and XFmode */
173 {4, 4, 16} /* cost of loading integer registers */
176 struct processor_costs *ix86_cost = &pentium_cost;
178 /* Processor feature/optimization bitmasks. */
179 #define m_386 (1<<PROCESSOR_I386)
180 #define m_486 (1<<PROCESSOR_I486)
181 #define m_PENT (1<<PROCESSOR_PENTIUM)
182 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
183 #define m_K6 (1<<PROCESSOR_K6)
184 #define m_ATHLON (1<<PROCESSOR_ATHLON)
186 const int x86_use_leave = m_386 | m_K6 | m_ATHLON;
187 const int x86_push_memory = m_386 | m_K6 | m_ATHLON;
188 const int x86_zero_extend_with_and = m_486 | m_PENT;
189 const int x86_movx = m_ATHLON | m_PPRO /* m_386 | m_K6 */;
190 const int x86_double_with_add = ~m_386;
191 const int x86_use_bit_test = m_386;
192 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON | m_K6;
193 const int x86_use_q_reg = m_PENT | m_PPRO | m_K6;
194 const int x86_use_any_reg = m_486;
195 const int x86_cmove = m_PPRO | m_ATHLON;
196 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON;
197 const int x86_use_sahf = m_PPRO | m_K6;
198 const int x86_partial_reg_stall = m_PPRO;
199 const int x86_use_loop = m_K6;
200 const int x86_use_fiop = ~(m_PPRO | m_ATHLON | m_PENT);
201 const int x86_use_mov0 = m_K6;
202 const int x86_use_cltd = ~(m_PENT | m_K6);
203 const int x86_read_modify_write = ~m_PENT;
204 const int x86_read_modify = ~(m_PENT | m_PPRO);
205 const int x86_split_long_moves = m_PPRO;
206 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486;
207 const int x86_single_stringop = m_386;
208 const int x86_qimode_math = ~(0);
209 const int x86_promote_qi_regs = 0;
210 const int x86_himode_math = ~(m_PPRO);
211 const int x86_promote_hi_regs = m_PPRO;
212 const int x86_sub_esp_4 = m_ATHLON | m_PPRO;
213 const int x86_sub_esp_8 = m_ATHLON | m_PPRO | m_386 | m_486;
214 const int x86_add_esp_4 = m_ATHLON | m_K6;
215 const int x86_add_esp_8 = m_ATHLON | m_PPRO | m_K6 | m_386 | m_486;
216 const int x86_integer_DFmode_moves = ~m_ATHLON;
217 const int x86_partial_reg_dependency = m_ATHLON;
218 const int x86_memory_mismatch_stall = m_ATHLON;
220 #define AT_BP(mode) (gen_rtx_MEM ((mode), hard_frame_pointer_rtx))
222 const char * const hi_reg_name[] = HI_REGISTER_NAMES;
223 const char * const qi_reg_name[] = QI_REGISTER_NAMES;
224 const char * const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
226 /* Array of the smallest class containing reg number REGNO, indexed by
227 REGNO. Used by REGNO_REG_CLASS in i386.h. */
229 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
231 /* ax, dx, cx, bx */
232 AREG, DREG, CREG, BREG,
233 /* si, di, bp, sp */
234 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
235 /* FP registers */
236 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
237 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
238 /* arg pointer */
239 NON_Q_REGS,
240 /* flags, fpsr, dirflag, frame */
241 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
242 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
243 SSE_REGS, SSE_REGS,
244 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
245 MMX_REGS, MMX_REGS
248 /* The "default" register map. */
250 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
252 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
253 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
254 -1, -1, -1, -1, /* arg, flags, fpsr, dir */
255 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
256 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
259 /* Define the register numbers to be used in Dwarf debugging information.
260 The SVR4 reference port C compiler uses the following register numbers
261 in its Dwarf output code:
262 0 for %eax (gcc regno = 0)
263 1 for %ecx (gcc regno = 2)
264 2 for %edx (gcc regno = 1)
265 3 for %ebx (gcc regno = 3)
266 4 for %esp (gcc regno = 7)
267 5 for %ebp (gcc regno = 6)
268 6 for %esi (gcc regno = 4)
269 7 for %edi (gcc regno = 5)
270 The following three DWARF register numbers are never generated by
271 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
272 believes these numbers have these meanings.
273 8 for %eip (no gcc equivalent)
274 9 for %eflags (gcc regno = 17)
275 10 for %trapno (no gcc equivalent)
276 It is not at all clear how we should number the FP stack registers
277 for the x86 architecture. If the version of SDB on x86/svr4 were
278 a bit less brain dead with respect to floating-point then we would
279 have a precedent to follow with respect to DWARF register numbers
280 for x86 FP registers, but the SDB on x86/svr4 is so completely
281 broken with respect to FP registers that it is hardly worth thinking
282 of it as something to strive for compatibility with.
283 The version of x86/svr4 SDB I have at the moment does (partially)
284 seem to believe that DWARF register number 11 is associated with
285 the x86 register %st(0), but that's about all. Higher DWARF
286 register numbers don't seem to be associated with anything in
287 particular, and even for DWARF regno 11, SDB only seems to under-
288 stand that it should say that a variable lives in %st(0) (when
289 asked via an `=' command) if we said it was in DWARF regno 11,
290 but SDB still prints garbage when asked for the value of the
291 variable in question (via a `/' command).
292 (Also note that the labels SDB prints for various FP stack regs
293 when doing an `x' command are all wrong.)
294 Note that these problems generally don't affect the native SVR4
295 C compiler because it doesn't allow the use of -O with -g and
296 because when it is *not* optimizing, it allocates a memory
297 location for each floating-point variable, and the memory
298 location is what gets described in the DWARF AT_location
299 attribute for the variable in question.
300 Regardless of the severe mental illness of the x86/svr4 SDB, we
301 do something sensible here and we use the following DWARF
302 register numbers. Note that these are all stack-top-relative
303 numbers.
304 11 for %st(0) (gcc regno = 8)
305 12 for %st(1) (gcc regno = 9)
306 13 for %st(2) (gcc regno = 10)
307 14 for %st(3) (gcc regno = 11)
308 15 for %st(4) (gcc regno = 12)
309 16 for %st(5) (gcc regno = 13)
310 17 for %st(6) (gcc regno = 14)
311 18 for %st(7) (gcc regno = 15)
313 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
315 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
316 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
317 -1, 9, -1, -1, /* arg, flags, fpsr, dir */
318 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
319 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
322 /* Test and compare insns in i386.md store the information needed to
323 generate branch and scc insns here. */
325 struct rtx_def *ix86_compare_op0 = NULL_RTX;
326 struct rtx_def *ix86_compare_op1 = NULL_RTX;
328 #define MAX_386_STACK_LOCALS 2
330 /* Define the structure for the machine field in struct function. */
331 struct machine_function
333 rtx stack_locals[(int) MAX_MACHINE_MODE][MAX_386_STACK_LOCALS];
336 #define ix86_stack_locals (cfun->machine->stack_locals)
338 /* which cpu are we scheduling for */
339 enum processor_type ix86_cpu;
341 /* which instruction set architecture to use. */
342 int ix86_arch;
344 /* Strings to hold which cpu and instruction set architecture to use. */
345 const char *ix86_cpu_string; /* for -mcpu=<xxx> */
346 const char *ix86_arch_string; /* for -march=<xxx> */
348 /* Register allocation order */
349 const char *ix86_reg_alloc_order;
350 static char regs_allocated[FIRST_PSEUDO_REGISTER];
352 /* # of registers to use to pass arguments. */
353 const char *ix86_regparm_string;
355 /* ix86_regparm_string as a number */
356 int ix86_regparm;
358 /* Alignment to use for loops and jumps: */
360 /* Power of two alignment for loops. */
361 const char *ix86_align_loops_string;
363 /* Power of two alignment for non-loop jumps. */
364 const char *ix86_align_jumps_string;
366 /* Power of two alignment for stack boundary in bytes. */
367 const char *ix86_preferred_stack_boundary_string;
369 /* Preferred alignment for stack boundary in bits. */
370 int ix86_preferred_stack_boundary;
372 /* Values 1-5: see jump.c */
373 int ix86_branch_cost;
374 const char *ix86_branch_cost_string;
376 /* Power of two alignment for functions. */
377 int ix86_align_funcs;
378 const char *ix86_align_funcs_string;
380 /* Power of two alignment for loops. */
381 int ix86_align_loops;
383 /* Power of two alignment for non-loop jumps. */
384 int ix86_align_jumps;
386 static void output_pic_addr_const PARAMS ((FILE *, rtx, int));
387 static void put_condition_code PARAMS ((enum rtx_code, enum machine_mode,
388 int, int, FILE *));
389 static rtx ix86_expand_int_compare PARAMS ((enum rtx_code, rtx, rtx));
390 static enum rtx_code ix86_prepare_fp_compare_args PARAMS ((enum rtx_code,
391 rtx *, rtx *));
392 static rtx gen_push PARAMS ((rtx));
393 static int memory_address_length PARAMS ((rtx addr));
394 static int ix86_flags_dependant PARAMS ((rtx, rtx, enum attr_type));
395 static int ix86_agi_dependant PARAMS ((rtx, rtx, enum attr_type));
396 static int ix86_safe_length PARAMS ((rtx));
397 static enum attr_memory ix86_safe_memory PARAMS ((rtx));
398 static enum attr_pent_pair ix86_safe_pent_pair PARAMS ((rtx));
399 static enum attr_ppro_uops ix86_safe_ppro_uops PARAMS ((rtx));
400 static void ix86_dump_ppro_packet PARAMS ((FILE *));
401 static void ix86_reorder_insn PARAMS ((rtx *, rtx *));
402 static rtx * ix86_pent_find_pair PARAMS ((rtx *, rtx *, enum attr_pent_pair,
403 rtx));
404 static void ix86_init_machine_status PARAMS ((struct function *));
405 static void ix86_mark_machine_status PARAMS ((struct function *));
406 static void ix86_free_machine_status PARAMS ((struct function *));
407 static int ix86_split_to_parts PARAMS ((rtx, rtx *, enum machine_mode));
408 static int ix86_safe_length_prefix PARAMS ((rtx));
409 static HOST_WIDE_INT ix86_compute_frame_size PARAMS((HOST_WIDE_INT,
410 int *, int *, int *));
411 static int ix86_nsaved_regs PARAMS((void));
412 static void ix86_emit_save_regs PARAMS((void));
413 static void ix86_emit_restore_regs_using_mov PARAMS ((rtx, int));
414 static void ix86_emit_epilogue_esp_adjustment PARAMS((int));
415 static void ix86_sched_reorder_pentium PARAMS((rtx *, rtx *));
416 static void ix86_sched_reorder_ppro PARAMS((rtx *, rtx *));
417 static HOST_WIDE_INT ix86_GOT_alias_set PARAMS ((void));
419 struct ix86_address
421 rtx base, index, disp;
422 HOST_WIDE_INT scale;
425 static int ix86_decompose_address PARAMS ((rtx, struct ix86_address *));
427 struct builtin_description;
428 static rtx ix86_expand_sse_comi PARAMS ((struct builtin_description *, tree,
429 rtx));
430 static rtx ix86_expand_sse_compare PARAMS ((struct builtin_description *, tree,
431 rtx));
432 static rtx ix86_expand_unop1_builtin PARAMS ((enum insn_code, tree, rtx));
433 static rtx ix86_expand_unop_builtin PARAMS ((enum insn_code, tree, rtx, int));
434 static rtx ix86_expand_binop_builtin PARAMS ((enum insn_code, tree, rtx));
435 static rtx ix86_expand_store_builtin PARAMS ((enum insn_code, tree, int));
436 static rtx safe_vector_operand PARAMS ((rtx, enum machine_mode));
437 static enum rtx_code ix86_fp_compare_code_to_integer PARAMS ((enum rtx_code));
438 static void ix86_fp_comparison_codes PARAMS ((enum rtx_code code,
439 enum rtx_code *,
440 enum rtx_code *,
441 enum rtx_code *));
442 static rtx ix86_expand_fp_compare PARAMS ((enum rtx_code, rtx, rtx, rtx,
443 rtx *, rtx *));
444 static int ix86_fp_comparison_arithmetics_cost PARAMS ((enum rtx_code code));
445 static int ix86_fp_comparison_fcomi_cost PARAMS ((enum rtx_code code));
446 static int ix86_fp_comparison_sahf_cost PARAMS ((enum rtx_code code));
447 static int ix86_fp_comparison_cost PARAMS ((enum rtx_code code));
449 /* Sometimes certain combinations of command options do not make
450 sense on a particular target machine. You can define a macro
451 `OVERRIDE_OPTIONS' to take account of this. This macro, if
452 defined, is executed once just after all the command options have
453 been parsed.
455 Don't use this macro to turn on various extra optimizations for
456 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
458 void
459 override_options ()
461 /* Comes from final.c -- no real reason to change it. */
462 #define MAX_CODE_ALIGN 16
464 static struct ptt
466 struct processor_costs *cost; /* Processor costs */
467 int target_enable; /* Target flags to enable. */
468 int target_disable; /* Target flags to disable. */
469 int align_loop; /* Default alignments. */
470 int align_jump;
471 int align_func;
472 int branch_cost;
474 const processor_target_table[PROCESSOR_max] =
476 {&i386_cost, 0, 0, 2, 2, 2, 1},
477 {&i486_cost, 0, 0, 4, 4, 4, 1},
478 {&pentium_cost, 0, 0, -4, -4, -4, 1},
479 {&pentiumpro_cost, 0, 0, 4, -4, 4, 1},
480 {&k6_cost, 0, 0, -5, -5, 4, 1},
481 {&athlon_cost, 0, 0, 4, -4, 4, 1}
484 static struct pta
486 const char *name; /* processor name or nickname. */
487 enum processor_type processor;
489 const processor_alias_table[] =
491 {"i386", PROCESSOR_I386},
492 {"i486", PROCESSOR_I486},
493 {"i586", PROCESSOR_PENTIUM},
494 {"pentium", PROCESSOR_PENTIUM},
495 {"i686", PROCESSOR_PENTIUMPRO},
496 {"pentiumpro", PROCESSOR_PENTIUMPRO},
497 {"k6", PROCESSOR_K6},
498 {"athlon", PROCESSOR_ATHLON},
501 int const pta_size = sizeof (processor_alias_table) / sizeof (struct pta);
503 #ifdef SUBTARGET_OVERRIDE_OPTIONS
504 SUBTARGET_OVERRIDE_OPTIONS;
505 #endif
507 ix86_arch = PROCESSOR_I386;
508 ix86_cpu = (enum processor_type) TARGET_CPU_DEFAULT;
510 if (ix86_arch_string != 0)
512 int i;
513 for (i = 0; i < pta_size; i++)
514 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
516 ix86_arch = processor_alias_table[i].processor;
517 /* Default cpu tuning to the architecture. */
518 ix86_cpu = ix86_arch;
519 break;
521 if (i == pta_size)
522 error ("bad value (%s) for -march= switch", ix86_arch_string);
525 if (ix86_cpu_string != 0)
527 int i;
528 for (i = 0; i < pta_size; i++)
529 if (! strcmp (ix86_cpu_string, processor_alias_table[i].name))
531 ix86_cpu = processor_alias_table[i].processor;
532 break;
534 if (i == pta_size)
535 error ("bad value (%s) for -mcpu= switch", ix86_cpu_string);
538 ix86_cost = processor_target_table[ix86_cpu].cost;
539 target_flags |= processor_target_table[ix86_cpu].target_enable;
540 target_flags &= ~processor_target_table[ix86_cpu].target_disable;
542 /* Arrange to set up i386_stack_locals for all functions. */
543 init_machine_status = ix86_init_machine_status;
544 mark_machine_status = ix86_mark_machine_status;
545 free_machine_status = ix86_free_machine_status;
547 /* Validate registers in register allocation order. */
548 if (ix86_reg_alloc_order)
550 int i, ch;
551 for (i = 0; (ch = ix86_reg_alloc_order[i]) != '\0'; i++)
553 int regno = 0;
555 switch (ch)
557 case 'a': regno = 0; break;
558 case 'd': regno = 1; break;
559 case 'c': regno = 2; break;
560 case 'b': regno = 3; break;
561 case 'S': regno = 4; break;
562 case 'D': regno = 5; break;
563 case 'B': regno = 6; break;
565 default: fatal ("Register '%c' is unknown", ch);
568 if (regs_allocated[regno])
569 fatal ("Register '%c' already specified in allocation order", ch);
571 regs_allocated[regno] = 1;
575 /* Validate -mregparm= value. */
576 if (ix86_regparm_string)
578 ix86_regparm = atoi (ix86_regparm_string);
579 if (ix86_regparm < 0 || ix86_regparm > REGPARM_MAX)
580 fatal ("-mregparm=%d is not between 0 and %d",
581 ix86_regparm, REGPARM_MAX);
584 /* Validate -malign-loops= value, or provide default. */
585 ix86_align_loops = processor_target_table[ix86_cpu].align_loop;
586 if (ix86_align_loops_string)
588 ix86_align_loops = atoi (ix86_align_loops_string);
589 if (ix86_align_loops < 0 || ix86_align_loops > MAX_CODE_ALIGN)
590 fatal ("-malign-loops=%d is not between 0 and %d",
591 ix86_align_loops, MAX_CODE_ALIGN);
594 /* Validate -malign-jumps= value, or provide default. */
595 ix86_align_jumps = processor_target_table[ix86_cpu].align_jump;
596 if (ix86_align_jumps_string)
598 ix86_align_jumps = atoi (ix86_align_jumps_string);
599 if (ix86_align_jumps < 0 || ix86_align_jumps > MAX_CODE_ALIGN)
600 fatal ("-malign-jumps=%d is not between 0 and %d",
601 ix86_align_jumps, MAX_CODE_ALIGN);
604 /* Validate -malign-functions= value, or provide default. */
605 ix86_align_funcs = processor_target_table[ix86_cpu].align_func;
606 if (ix86_align_funcs_string)
608 ix86_align_funcs = atoi (ix86_align_funcs_string);
609 if (ix86_align_funcs < 0 || ix86_align_funcs > MAX_CODE_ALIGN)
610 fatal ("-malign-functions=%d is not between 0 and %d",
611 ix86_align_funcs, MAX_CODE_ALIGN);
614 /* Validate -mpreferred-stack-boundary= value, or provide default.
615 The default of 128 bits is for Pentium III's SSE __m128. */
616 ix86_preferred_stack_boundary = 128;
617 if (ix86_preferred_stack_boundary_string)
619 int i = atoi (ix86_preferred_stack_boundary_string);
620 if (i < 2 || i > 31)
621 fatal ("-mpreferred-stack-boundary=%d is not between 2 and 31", i);
622 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
625 /* Validate -mbranch-cost= value, or provide default. */
626 ix86_branch_cost = processor_target_table[ix86_cpu].branch_cost;
627 if (ix86_branch_cost_string)
629 ix86_branch_cost = atoi (ix86_branch_cost_string);
630 if (ix86_branch_cost < 0 || ix86_branch_cost > 5)
631 fatal ("-mbranch-cost=%d is not between 0 and 5",
632 ix86_branch_cost);
635 /* Keep nonleaf frame pointers. */
636 if (TARGET_OMIT_LEAF_FRAME_POINTER)
637 flag_omit_frame_pointer = 1;
639 /* If we're doing fast math, we don't care about comparison order
640 wrt NaNs. This lets us use a shorter comparison sequence. */
641 if (flag_fast_math)
642 target_flags &= ~MASK_IEEE_FP;
644 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
645 on by -msse. */
646 if (TARGET_SSE)
647 target_flags |= MASK_MMX;
650 /* A C statement (sans semicolon) to choose the order in which to
651 allocate hard registers for pseudo-registers local to a basic
652 block.
654 Store the desired register order in the array `reg_alloc_order'.
655 Element 0 should be the register to allocate first; element 1, the
656 next register; and so on.
658 The macro body should not assume anything about the contents of
659 `reg_alloc_order' before execution of the macro.
661 On most machines, it is not necessary to define this macro. */
663 void
664 order_regs_for_local_alloc ()
666 int i, ch, order;
668 /* User specified the register allocation order. */
670 if (ix86_reg_alloc_order)
672 for (i = order = 0; (ch = ix86_reg_alloc_order[i]) != '\0'; i++)
674 int regno = 0;
676 switch (ch)
678 case 'a': regno = 0; break;
679 case 'd': regno = 1; break;
680 case 'c': regno = 2; break;
681 case 'b': regno = 3; break;
682 case 'S': regno = 4; break;
683 case 'D': regno = 5; break;
684 case 'B': regno = 6; break;
687 reg_alloc_order[order++] = regno;
690 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
692 if (! regs_allocated[i])
693 reg_alloc_order[order++] = i;
697 /* If user did not specify a register allocation order, use natural order. */
698 else
700 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
701 reg_alloc_order[i] = i;
705 void
706 optimization_options (level, size)
707 int level;
708 int size ATTRIBUTE_UNUSED;
710 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
711 make the problem with not enough registers even worse. */
712 #ifdef INSN_SCHEDULING
713 if (level > 1)
714 flag_schedule_insns = 0;
715 #endif
718 /* Return nonzero if IDENTIFIER with arguments ARGS is a valid machine specific
719 attribute for DECL. The attributes in ATTRIBUTES have previously been
720 assigned to DECL. */
723 ix86_valid_decl_attribute_p (decl, attributes, identifier, args)
724 tree decl ATTRIBUTE_UNUSED;
725 tree attributes ATTRIBUTE_UNUSED;
726 tree identifier ATTRIBUTE_UNUSED;
727 tree args ATTRIBUTE_UNUSED;
729 return 0;
732 /* Return nonzero if IDENTIFIER with arguments ARGS is a valid machine specific
733 attribute for TYPE. The attributes in ATTRIBUTES have previously been
734 assigned to TYPE. */
737 ix86_valid_type_attribute_p (type, attributes, identifier, args)
738 tree type;
739 tree attributes ATTRIBUTE_UNUSED;
740 tree identifier;
741 tree args;
743 if (TREE_CODE (type) != FUNCTION_TYPE
744 && TREE_CODE (type) != METHOD_TYPE
745 && TREE_CODE (type) != FIELD_DECL
746 && TREE_CODE (type) != TYPE_DECL)
747 return 0;
749 /* Stdcall attribute says callee is responsible for popping arguments
750 if they are not variable. */
751 if (is_attribute_p ("stdcall", identifier))
752 return (args == NULL_TREE);
754 /* Cdecl attribute says the callee is a normal C declaration. */
755 if (is_attribute_p ("cdecl", identifier))
756 return (args == NULL_TREE);
758 /* Regparm attribute specifies how many integer arguments are to be
759 passed in registers. */
760 if (is_attribute_p ("regparm", identifier))
762 tree cst;
764 if (! args || TREE_CODE (args) != TREE_LIST
765 || TREE_CHAIN (args) != NULL_TREE
766 || TREE_VALUE (args) == NULL_TREE)
767 return 0;
769 cst = TREE_VALUE (args);
770 if (TREE_CODE (cst) != INTEGER_CST)
771 return 0;
773 if (compare_tree_int (cst, REGPARM_MAX) > 0)
774 return 0;
776 return 1;
779 return 0;
782 /* Return 0 if the attributes for two types are incompatible, 1 if they
783 are compatible, and 2 if they are nearly compatible (which causes a
784 warning to be generated). */
787 ix86_comp_type_attributes (type1, type2)
788 tree type1;
789 tree type2;
791 /* Check for mismatch of non-default calling convention. */
792 const char *rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
794 if (TREE_CODE (type1) != FUNCTION_TYPE)
795 return 1;
797 /* Check for mismatched return types (cdecl vs stdcall). */
798 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
799 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
800 return 0;
801 return 1;
804 /* Value is the number of bytes of arguments automatically
805 popped when returning from a subroutine call.
806 FUNDECL is the declaration node of the function (as a tree),
807 FUNTYPE is the data type of the function (as a tree),
808 or for a library call it is an identifier node for the subroutine name.
809 SIZE is the number of bytes of arguments passed on the stack.
811 On the 80386, the RTD insn may be used to pop them if the number
812 of args is fixed, but if the number is variable then the caller
813 must pop them all. RTD can't be used for library calls now
814 because the library is compiled with the Unix compiler.
815 Use of RTD is a selectable option, since it is incompatible with
816 standard Unix calling sequences. If the option is not selected,
817 the caller must always pop the args.
819 The attribute stdcall is equivalent to RTD on a per module basis. */
822 ix86_return_pops_args (fundecl, funtype, size)
823 tree fundecl;
824 tree funtype;
825 int size;
827 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
829 /* Cdecl functions override -mrtd, and never pop the stack. */
830 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
832 /* Stdcall functions will pop the stack if not variable args. */
833 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype)))
834 rtd = 1;
836 if (rtd
837 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
838 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
839 == void_type_node)))
840 return size;
843 /* Lose any fake structure return argument. */
844 if (aggregate_value_p (TREE_TYPE (funtype)))
845 return GET_MODE_SIZE (Pmode);
847 return 0;
850 /* Argument support functions. */
852 /* Initialize a variable CUM of type CUMULATIVE_ARGS
853 for a call to a function whose data type is FNTYPE.
854 For a library call, FNTYPE is 0. */
856 void
857 init_cumulative_args (cum, fntype, libname)
858 CUMULATIVE_ARGS *cum; /* Argument info to initialize */
859 tree fntype; /* tree ptr for function decl */
860 rtx libname; /* SYMBOL_REF of library name or 0 */
862 static CUMULATIVE_ARGS zero_cum;
863 tree param, next_param;
865 if (TARGET_DEBUG_ARG)
867 fprintf (stderr, "\ninit_cumulative_args (");
868 if (fntype)
869 fprintf (stderr, "fntype code = %s, ret code = %s",
870 tree_code_name[(int) TREE_CODE (fntype)],
871 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
872 else
873 fprintf (stderr, "no fntype");
875 if (libname)
876 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
879 *cum = zero_cum;
881 /* Set up the number of registers to use for passing arguments. */
882 cum->nregs = ix86_regparm;
883 if (fntype)
885 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype));
887 if (attr)
888 cum->nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
891 /* Determine if this function has variable arguments. This is
892 indicated by the last argument being 'void_type_mode' if there
893 are no variable arguments. If there are variable arguments, then
894 we won't pass anything in registers */
896 if (cum->nregs)
898 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
899 param != 0; param = next_param)
901 next_param = TREE_CHAIN (param);
902 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
903 cum->nregs = 0;
907 if (TARGET_DEBUG_ARG)
908 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
910 return;
913 /* Update the data in CUM to advance over an argument
914 of mode MODE and data type TYPE.
915 (TYPE is null for libcalls where that information may not be available.) */
917 void
918 function_arg_advance (cum, mode, type, named)
919 CUMULATIVE_ARGS *cum; /* current arg information */
920 enum machine_mode mode; /* current arg mode */
921 tree type; /* type of the argument or 0 if lib support */
922 int named; /* whether or not the argument was named */
924 int bytes =
925 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
926 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
928 if (TARGET_DEBUG_ARG)
929 fprintf (stderr,
930 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
931 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
933 cum->words += words;
934 cum->nregs -= words;
935 cum->regno += words;
937 if (cum->nregs <= 0)
939 cum->nregs = 0;
940 cum->regno = 0;
943 return;
946 /* Define where to put the arguments to a function.
947 Value is zero to push the argument on the stack,
948 or a hard register in which to store the argument.
950 MODE is the argument's machine mode.
951 TYPE is the data type of the argument (as a tree).
952 This is null for libcalls where that information may
953 not be available.
954 CUM is a variable of type CUMULATIVE_ARGS which gives info about
955 the preceding args and about the function being called.
956 NAMED is nonzero if this argument is a named parameter
957 (otherwise it is an extra parameter matching an ellipsis). */
959 struct rtx_def *
960 function_arg (cum, mode, type, named)
961 CUMULATIVE_ARGS *cum; /* current arg information */
962 enum machine_mode mode; /* current arg mode */
963 tree type; /* type of the argument or 0 if lib support */
964 int named; /* != 0 for normal args, == 0 for ... args */
966 rtx ret = NULL_RTX;
967 int bytes =
968 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
969 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
971 switch (mode)
973 /* For now, pass fp/complex values on the stack. */
974 default:
975 break;
977 case BLKmode:
978 case DImode:
979 case SImode:
980 case HImode:
981 case QImode:
982 if (words <= cum->nregs)
983 ret = gen_rtx_REG (mode, cum->regno);
984 break;
987 if (TARGET_DEBUG_ARG)
989 fprintf (stderr,
990 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d",
991 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
993 if (ret)
994 fprintf (stderr, ", reg=%%e%s", reg_names[ REGNO(ret) ]);
995 else
996 fprintf (stderr, ", stack");
998 fprintf (stderr, " )\n");
1001 return ret;
1005 /* Return nonzero if OP is (const_int 1), else return zero. */
1008 const_int_1_operand (op, mode)
1009 rtx op;
1010 enum machine_mode mode ATTRIBUTE_UNUSED;
1012 return (GET_CODE (op) == CONST_INT && INTVAL (op) == 1);
1015 /* Returns 1 if OP is either a symbol reference or a sum of a symbol
1016 reference and a constant. */
1019 symbolic_operand (op, mode)
1020 register rtx op;
1021 enum machine_mode mode ATTRIBUTE_UNUSED;
1023 switch (GET_CODE (op))
1025 case SYMBOL_REF:
1026 case LABEL_REF:
1027 return 1;
1029 case CONST:
1030 op = XEXP (op, 0);
1031 if (GET_CODE (op) == SYMBOL_REF
1032 || GET_CODE (op) == LABEL_REF
1033 || (GET_CODE (op) == UNSPEC
1034 && XINT (op, 1) >= 6
1035 && XINT (op, 1) <= 7))
1036 return 1;
1037 if (GET_CODE (op) != PLUS
1038 || GET_CODE (XEXP (op, 1)) != CONST_INT)
1039 return 0;
1041 op = XEXP (op, 0);
1042 if (GET_CODE (op) == SYMBOL_REF
1043 || GET_CODE (op) == LABEL_REF)
1044 return 1;
1045 /* Only @GOTOFF gets offsets. */
1046 if (GET_CODE (op) != UNSPEC
1047 || XINT (op, 1) != 7)
1048 return 0;
1050 op = XVECEXP (op, 0, 0);
1051 if (GET_CODE (op) == SYMBOL_REF
1052 || GET_CODE (op) == LABEL_REF)
1053 return 1;
1054 return 0;
1056 default:
1057 return 0;
1061 /* Return true if the operand contains a @GOT or @GOTOFF reference. */
1064 pic_symbolic_operand (op, mode)
1065 register rtx op;
1066 enum machine_mode mode ATTRIBUTE_UNUSED;
1068 if (GET_CODE (op) == CONST)
1070 op = XEXP (op, 0);
1071 if (GET_CODE (op) == UNSPEC)
1072 return 1;
1073 if (GET_CODE (op) != PLUS
1074 || GET_CODE (XEXP (op, 1)) != CONST_INT)
1075 return 0;
1076 op = XEXP (op, 0);
1077 if (GET_CODE (op) == UNSPEC)
1078 return 1;
1080 return 0;
1083 /* Test for a valid operand for a call instruction. Don't allow the
1084 arg pointer register or virtual regs since they may decay into
1085 reg + const, which the patterns can't handle. */
1088 call_insn_operand (op, mode)
1089 rtx op;
1090 enum machine_mode mode ATTRIBUTE_UNUSED;
1092 /* Disallow indirect through a virtual register. This leads to
1093 compiler aborts when trying to eliminate them. */
1094 if (GET_CODE (op) == REG
1095 && (op == arg_pointer_rtx
1096 || op == frame_pointer_rtx
1097 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
1098 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
1099 return 0;
1101 /* Disallow `call 1234'. Due to varying assembler lameness this
1102 gets either rejected or translated to `call .+1234'. */
1103 if (GET_CODE (op) == CONST_INT)
1104 return 0;
1106 /* Explicitly allow SYMBOL_REF even if pic. */
1107 if (GET_CODE (op) == SYMBOL_REF)
1108 return 1;
1110 /* Half-pic doesn't allow anything but registers and constants.
1111 We've just taken care of the later. */
1112 if (HALF_PIC_P ())
1113 return register_operand (op, Pmode);
1115 /* Otherwise we can allow any general_operand in the address. */
1116 return general_operand (op, Pmode);
1120 constant_call_address_operand (op, mode)
1121 rtx op;
1122 enum machine_mode mode ATTRIBUTE_UNUSED;
1124 if (GET_CODE (op) == CONST
1125 && GET_CODE (XEXP (op, 0)) == PLUS
1126 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
1127 op = XEXP (XEXP (op, 0), 0);
1128 return GET_CODE (op) == SYMBOL_REF;
1131 /* Match exactly zero and one. */
1134 const0_operand (op, mode)
1135 register rtx op;
1136 enum machine_mode mode;
1138 return op == CONST0_RTX (mode);
1142 const1_operand (op, mode)
1143 register rtx op;
1144 enum machine_mode mode ATTRIBUTE_UNUSED;
1146 return op == const1_rtx;
1149 /* Match 2, 4, or 8. Used for leal multiplicands. */
1152 const248_operand (op, mode)
1153 register rtx op;
1154 enum machine_mode mode ATTRIBUTE_UNUSED;
1156 return (GET_CODE (op) == CONST_INT
1157 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
1160 /* True if this is a constant appropriate for an increment or decremenmt. */
1163 incdec_operand (op, mode)
1164 register rtx op;
1165 enum machine_mode mode;
1167 if (op == const1_rtx || op == constm1_rtx)
1168 return 1;
1169 if (GET_CODE (op) != CONST_INT)
1170 return 0;
1171 if (mode == SImode && INTVAL (op) == (HOST_WIDE_INT) 0xffffffff)
1172 return 1;
1173 if (mode == HImode && INTVAL (op) == (HOST_WIDE_INT) 0xffff)
1174 return 1;
1175 if (mode == QImode && INTVAL (op) == (HOST_WIDE_INT) 0xff)
1176 return 1;
1177 return 0;
1180 /* Return false if this is the stack pointer, or any other fake
1181 register eliminable to the stack pointer. Otherwise, this is
1182 a register operand.
1184 This is used to prevent esp from being used as an index reg.
1185 Which would only happen in pathological cases. */
1188 reg_no_sp_operand (op, mode)
1189 register rtx op;
1190 enum machine_mode mode;
1192 rtx t = op;
1193 if (GET_CODE (t) == SUBREG)
1194 t = SUBREG_REG (t);
1195 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
1196 return 0;
1198 return register_operand (op, mode);
1202 mmx_reg_operand (op, mode)
1203 register rtx op;
1204 enum machine_mode mode ATTRIBUTE_UNUSED;
1206 return MMX_REG_P (op);
1209 /* Return false if this is any eliminable register. Otherwise
1210 general_operand. */
1213 general_no_elim_operand (op, mode)
1214 register rtx op;
1215 enum machine_mode mode;
1217 rtx t = op;
1218 if (GET_CODE (t) == SUBREG)
1219 t = SUBREG_REG (t);
1220 if (t == arg_pointer_rtx || t == frame_pointer_rtx
1221 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
1222 || t == virtual_stack_dynamic_rtx)
1223 return 0;
1225 return general_operand (op, mode);
1228 /* Return false if this is any eliminable register. Otherwise
1229 register_operand or const_int. */
1232 nonmemory_no_elim_operand (op, mode)
1233 register rtx op;
1234 enum machine_mode mode;
1236 rtx t = op;
1237 if (GET_CODE (t) == SUBREG)
1238 t = SUBREG_REG (t);
1239 if (t == arg_pointer_rtx || t == frame_pointer_rtx
1240 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
1241 || t == virtual_stack_dynamic_rtx)
1242 return 0;
1244 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
1247 /* Return true if op is a Q_REGS class register. */
1250 q_regs_operand (op, mode)
1251 register rtx op;
1252 enum machine_mode mode;
1254 if (mode != VOIDmode && GET_MODE (op) != mode)
1255 return 0;
1256 if (GET_CODE (op) == SUBREG)
1257 op = SUBREG_REG (op);
1258 return QI_REG_P (op);
1261 /* Return true if op is a NON_Q_REGS class register. */
1264 non_q_regs_operand (op, mode)
1265 register rtx op;
1266 enum machine_mode mode;
1268 if (mode != VOIDmode && GET_MODE (op) != mode)
1269 return 0;
1270 if (GET_CODE (op) == SUBREG)
1271 op = SUBREG_REG (op);
1272 return NON_QI_REG_P (op);
1275 /* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
1276 insns. */
1278 sse_comparison_operator (op, mode)
1279 rtx op;
1280 enum machine_mode mode ATTRIBUTE_UNUSED;
1282 enum rtx_code code = GET_CODE (op);
1283 return code == EQ || code == LT || code == LE || code == UNORDERED;
1285 /* Return 1 if OP is a valid comparison operator in valid mode. */
1287 ix86_comparison_operator (op, mode)
1288 register rtx op;
1289 enum machine_mode mode;
1291 enum machine_mode inmode;
1292 enum rtx_code code = GET_CODE (op);
1293 if (mode != VOIDmode && GET_MODE (op) != mode)
1294 return 0;
1295 if (GET_RTX_CLASS (code) != '<')
1296 return 0;
1297 inmode = GET_MODE (XEXP (op, 0));
1299 if (inmode == CCFPmode || inmode == CCFPUmode)
1301 enum rtx_code second_code, bypass_code;
1302 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
1303 return (bypass_code == NIL && second_code == NIL);
1305 switch (code)
1307 case EQ: case NE:
1308 return 1;
1309 case LT: case GE:
1310 if (inmode == CCmode || inmode == CCGCmode
1311 || inmode == CCGOCmode || inmode == CCNOmode)
1312 return 1;
1313 return 0;
1314 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
1315 if (inmode == CCmode)
1316 return 1;
1317 return 0;
1318 case GT: case LE:
1319 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
1320 return 1;
1321 return 0;
1322 default:
1323 return 0;
1327 /* Return 1 if OP is a comparison operator that can be issued by fcmov. */
1330 fcmov_comparison_operator (op, mode)
1331 register rtx op;
1332 enum machine_mode mode;
1334 enum machine_mode inmode;
1335 enum rtx_code code = GET_CODE (op);
1336 if (mode != VOIDmode && GET_MODE (op) != mode)
1337 return 0;
1338 if (GET_RTX_CLASS (code) != '<')
1339 return 0;
1340 inmode = GET_MODE (XEXP (op, 0));
1341 if (inmode == CCFPmode || inmode == CCFPUmode)
1343 enum rtx_code second_code, bypass_code;
1344 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
1345 if (bypass_code != NIL || second_code != NIL)
1346 return 0;
1347 code = ix86_fp_compare_code_to_integer (code);
1349 /* i387 supports just limited amount of conditional codes. */
1350 switch (code)
1352 case LTU: case GTU: case LEU: case GEU:
1353 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
1354 return 1;
1355 return 0;
1356 case ORDERED: case UNORDERED:
1357 case EQ: case NE:
1358 return 1;
1359 default:
1360 return 0;
1364 /* Return 1 if OP is a binary operator that can be promoted to wider mode. */
1367 promotable_binary_operator (op, mode)
1368 register rtx op;
1369 enum machine_mode mode ATTRIBUTE_UNUSED;
1371 switch (GET_CODE (op))
1373 case MULT:
1374 /* Modern CPUs have same latency for HImode and SImode multiply,
1375 but 386 and 486 do HImode multiply faster. */
1376 return ix86_cpu > PROCESSOR_I486;
1377 case PLUS:
1378 case AND:
1379 case IOR:
1380 case XOR:
1381 case ASHIFT:
1382 return 1;
1383 default:
1384 return 0;
1388 /* Nearly general operand, but accept any const_double, since we wish
1389 to be able to drop them into memory rather than have them get pulled
1390 into registers. */
1393 cmp_fp_expander_operand (op, mode)
1394 register rtx op;
1395 enum machine_mode mode;
1397 if (mode != VOIDmode && mode != GET_MODE (op))
1398 return 0;
1399 if (GET_CODE (op) == CONST_DOUBLE)
1400 return 1;
1401 return general_operand (op, mode);
1404 /* Match an SI or HImode register for a zero_extract. */
1407 ext_register_operand (op, mode)
1408 register rtx op;
1409 enum machine_mode mode ATTRIBUTE_UNUSED;
1411 if (GET_MODE (op) != SImode && GET_MODE (op) != HImode)
1412 return 0;
1413 return register_operand (op, VOIDmode);
1416 /* Return 1 if this is a valid binary floating-point operation.
1417 OP is the expression matched, and MODE is its mode. */
1420 binary_fp_operator (op, mode)
1421 register rtx op;
1422 enum machine_mode mode;
1424 if (mode != VOIDmode && mode != GET_MODE (op))
1425 return 0;
1427 switch (GET_CODE (op))
1429 case PLUS:
1430 case MINUS:
1431 case MULT:
1432 case DIV:
1433 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
1435 default:
1436 return 0;
1441 mult_operator(op, mode)
1442 register rtx op;
1443 enum machine_mode mode ATTRIBUTE_UNUSED;
1445 return GET_CODE (op) == MULT;
1449 div_operator(op, mode)
1450 register rtx op;
1451 enum machine_mode mode ATTRIBUTE_UNUSED;
1453 return GET_CODE (op) == DIV;
1457 arith_or_logical_operator (op, mode)
1458 rtx op;
1459 enum machine_mode mode;
1461 return ((mode == VOIDmode || GET_MODE (op) == mode)
1462 && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
1463 || GET_RTX_CLASS (GET_CODE (op)) == '2'));
1466 /* Returns 1 if OP is memory operand with a displacement. */
1469 memory_displacement_operand (op, mode)
1470 register rtx op;
1471 enum machine_mode mode;
1473 struct ix86_address parts;
1475 if (! memory_operand (op, mode))
1476 return 0;
1478 if (! ix86_decompose_address (XEXP (op, 0), &parts))
1479 abort ();
1481 return parts.disp != NULL_RTX;
1484 /* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
1485 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
1487 ??? It seems likely that this will only work because cmpsi is an
1488 expander, and no actual insns use this. */
1491 cmpsi_operand (op, mode)
1492 rtx op;
1493 enum machine_mode mode;
1495 if (general_operand (op, mode))
1496 return 1;
1498 if (GET_CODE (op) == AND
1499 && GET_MODE (op) == SImode
1500 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
1501 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
1502 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
1503 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
1504 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
1505 && GET_CODE (XEXP (op, 1)) == CONST_INT)
1506 return 1;
1508 return 0;
1511 /* Returns 1 if OP is memory operand that can not be represented by the
1512 modRM array. */
1515 long_memory_operand (op, mode)
1516 register rtx op;
1517 enum machine_mode mode;
1519 if (! memory_operand (op, mode))
1520 return 0;
1522 return memory_address_length (op) != 0;
1525 /* Return nonzero if the rtx is known aligned. */
1528 aligned_operand (op, mode)
1529 rtx op;
1530 enum machine_mode mode;
1532 struct ix86_address parts;
1534 if (!general_operand (op, mode))
1535 return 0;
1537 /* Registers and immediate operands are always "aligned". */
1538 if (GET_CODE (op) != MEM)
1539 return 1;
1541 /* Don't even try to do any aligned optimizations with volatiles. */
1542 if (MEM_VOLATILE_P (op))
1543 return 0;
1545 op = XEXP (op, 0);
1547 /* Pushes and pops are only valid on the stack pointer. */
1548 if (GET_CODE (op) == PRE_DEC
1549 || GET_CODE (op) == POST_INC)
1550 return 1;
1552 /* Decode the address. */
1553 if (! ix86_decompose_address (op, &parts))
1554 abort ();
1556 /* Look for some component that isn't known to be aligned. */
1557 if (parts.index)
1559 if (parts.scale < 4
1560 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
1561 return 0;
1563 if (parts.base)
1565 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
1566 return 0;
1568 if (parts.disp)
1570 if (GET_CODE (parts.disp) != CONST_INT
1571 || (INTVAL (parts.disp) & 3) != 0)
1572 return 0;
1575 /* Didn't find one -- this must be an aligned address. */
1576 return 1;
1579 /* Return true if the constant is something that can be loaded with
1580 a special instruction. Only handle 0.0 and 1.0; others are less
1581 worthwhile. */
1584 standard_80387_constant_p (x)
1585 rtx x;
1587 if (GET_CODE (x) != CONST_DOUBLE)
1588 return -1;
1590 #if ! defined (REAL_IS_NOT_DOUBLE) || defined (REAL_ARITHMETIC)
1592 REAL_VALUE_TYPE d;
1593 jmp_buf handler;
1594 int is0, is1;
1596 if (setjmp (handler))
1597 return 0;
1599 set_float_handler (handler);
1600 REAL_VALUE_FROM_CONST_DOUBLE (d, x);
1601 is0 = REAL_VALUES_EQUAL (d, dconst0) && !REAL_VALUE_MINUS_ZERO (d);
1602 is1 = REAL_VALUES_EQUAL (d, dconst1);
1603 set_float_handler (NULL_PTR);
1605 if (is0)
1606 return 1;
1608 if (is1)
1609 return 2;
1611 /* Note that on the 80387, other constants, such as pi,
1612 are much slower to load as standard constants
1613 than to load from doubles in memory! */
1614 /* ??? Not true on K6: all constants are equal cost. */
1616 #endif
1618 return 0;
1621 /* Returns 1 if OP contains a symbol reference */
1624 symbolic_reference_mentioned_p (op)
1625 rtx op;
1627 register const char *fmt;
1628 register int i;
1630 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
1631 return 1;
1633 fmt = GET_RTX_FORMAT (GET_CODE (op));
1634 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
1636 if (fmt[i] == 'E')
1638 register int j;
1640 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
1641 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
1642 return 1;
1645 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
1646 return 1;
1649 return 0;
1652 /* Return 1 if it is appropriate to emit `ret' instructions in the
1653 body of a function. Do this only if the epilogue is simple, needing a
1654 couple of insns. Prior to reloading, we can't tell how many registers
1655 must be saved, so return 0 then. Return 0 if there is no frame
1656 marker to de-allocate.
1658 If NON_SAVING_SETJMP is defined and true, then it is not possible
1659 for the epilogue to be simple, so return 0. This is a special case
1660 since NON_SAVING_SETJMP will not cause regs_ever_live to change
1661 until final, but jump_optimize may need to know sooner if a
1662 `return' is OK. */
1665 ix86_can_use_return_insn_p ()
1667 HOST_WIDE_INT tsize;
1668 int nregs;
1670 #ifdef NON_SAVING_SETJMP
1671 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
1672 return 0;
1673 #endif
1674 #ifdef FUNCTION_BLOCK_PROFILER_EXIT
1675 if (profile_block_flag == 2)
1676 return 0;
1677 #endif
1679 if (! reload_completed || frame_pointer_needed)
1680 return 0;
1682 /* Don't allow more than 32 pop, since that's all we can do
1683 with one instruction. */
1684 if (current_function_pops_args
1685 && current_function_args_size >= 32768)
1686 return 0;
1688 tsize = ix86_compute_frame_size (get_frame_size (), &nregs, NULL, NULL);
1689 return tsize == 0 && nregs == 0;
1692 static char pic_label_name[32];
1694 /* This function generates code for -fpic that loads %ebx with
1695 the return address of the caller and then returns. */
1697 void
1698 ix86_asm_file_end (file)
1699 FILE *file;
1701 rtx xops[2];
1703 if (! TARGET_DEEP_BRANCH_PREDICTION || pic_label_name[0] == 0)
1704 return;
1706 /* ??? Binutils 2.10 and earlier has a linkonce elimination bug related
1707 to updating relocations to a section being discarded such that this
1708 doesn't work. Ought to detect this at configure time. */
1709 #if 0 && defined (ASM_OUTPUT_SECTION_NAME)
1710 /* The trick here is to create a linkonce section containing the
1711 pic label thunk, but to refer to it with an internal label.
1712 Because the label is internal, we don't have inter-dso name
1713 binding issues on hosts that don't support ".hidden".
1715 In order to use these macros, however, we must create a fake
1716 function decl. */
1718 tree decl = build_decl (FUNCTION_DECL,
1719 get_identifier ("i686.get_pc_thunk"),
1720 error_mark_node);
1721 DECL_ONE_ONLY (decl) = 1;
1722 UNIQUE_SECTION (decl, 0);
1723 named_section (decl, NULL, 0);
1725 #else
1726 text_section ();
1727 #endif
1729 /* This used to call ASM_DECLARE_FUNCTION_NAME() but since it's an
1730 internal (non-global) label that's being emitted, it didn't make
1731 sense to have .type information for local labels. This caused
1732 the SCO OpenServer 5.0.4 ELF assembler grief (why are you giving
1733 me debug info for a label that you're declaring non-global?) this
1734 was changed to call ASM_OUTPUT_LABEL() instead. */
1736 ASM_OUTPUT_LABEL (file, pic_label_name);
1738 xops[0] = pic_offset_table_rtx;
1739 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
1740 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
1741 output_asm_insn ("ret", xops);
1744 void
1745 load_pic_register ()
1747 rtx gotsym, pclab;
1749 gotsym = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
1751 if (TARGET_DEEP_BRANCH_PREDICTION)
1753 if (! pic_label_name[0])
1754 ASM_GENERATE_INTERNAL_LABEL (pic_label_name, "LPR", 0);
1755 pclab = gen_rtx_MEM (QImode, gen_rtx_SYMBOL_REF (Pmode, pic_label_name));
1757 else
1759 pclab = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
1762 emit_insn (gen_prologue_get_pc (pic_offset_table_rtx, pclab));
1764 if (! TARGET_DEEP_BRANCH_PREDICTION)
1765 emit_insn (gen_popsi1 (pic_offset_table_rtx));
1767 emit_insn (gen_prologue_set_got (pic_offset_table_rtx, gotsym, pclab));
1770 /* Generate an SImode "push" pattern for input ARG. */
1772 static rtx
1773 gen_push (arg)
1774 rtx arg;
1776 return gen_rtx_SET (VOIDmode,
1777 gen_rtx_MEM (SImode,
1778 gen_rtx_PRE_DEC (SImode,
1779 stack_pointer_rtx)),
1780 arg);
1783 /* Return number of registers to be saved on the stack. */
1785 static int
1786 ix86_nsaved_regs ()
1788 int nregs = 0;
1789 int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
1790 || current_function_uses_const_pool);
1791 int limit = (frame_pointer_needed
1792 ? HARD_FRAME_POINTER_REGNUM : STACK_POINTER_REGNUM);
1793 int regno;
1795 for (regno = limit - 1; regno >= 0; regno--)
1796 if ((regs_ever_live[regno] && ! call_used_regs[regno])
1797 || (regno == PIC_OFFSET_TABLE_REGNUM && pic_reg_used))
1799 nregs ++;
1801 return nregs;
1804 /* Return the offset between two registers, one to be eliminated, and the other
1805 its replacement, at the start of a routine. */
1807 HOST_WIDE_INT
1808 ix86_initial_elimination_offset (from, to)
1809 int from;
1810 int to;
1812 int padding1;
1813 int nregs;
1815 /* Stack grows downward:
1817 [arguments]
1818 <- ARG_POINTER
1819 saved pc
1821 saved frame pointer if frame_pointer_needed
1822 <- HARD_FRAME_POINTER
1823 [saved regs]
1825 [padding1] \
1826 | <- FRAME_POINTER
1827 [frame] > tsize
1829 [padding2] /
1832 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
1833 /* Skip saved PC and previous frame pointer.
1834 Executed only when frame_pointer_needed. */
1835 return 8;
1836 else if (from == FRAME_POINTER_REGNUM
1837 && to == HARD_FRAME_POINTER_REGNUM)
1839 ix86_compute_frame_size (get_frame_size (), &nregs, &padding1, (int *) 0);
1840 padding1 += nregs * UNITS_PER_WORD;
1841 return -padding1;
1843 else
1845 /* ARG_POINTER or FRAME_POINTER to STACK_POINTER elimination. */
1846 int frame_size = frame_pointer_needed ? 8 : 4;
1847 HOST_WIDE_INT tsize = ix86_compute_frame_size (get_frame_size (),
1848 &nregs, &padding1, (int *) 0);
1850 if (to != STACK_POINTER_REGNUM)
1851 abort ();
1852 else if (from == ARG_POINTER_REGNUM)
1853 return tsize + nregs * UNITS_PER_WORD + frame_size;
1854 else if (from != FRAME_POINTER_REGNUM)
1855 abort ();
1856 else
1857 return tsize - padding1;
1861 /* Compute the size of local storage taking into consideration the
1862 desired stack alignment which is to be maintained. Also determine
1863 the number of registers saved below the local storage.
1865 PADDING1 returns padding before stack frame and PADDING2 returns
1866 padding after stack frame;
1869 static HOST_WIDE_INT
1870 ix86_compute_frame_size (size, nregs_on_stack, rpadding1, rpadding2)
1871 HOST_WIDE_INT size;
1872 int *nregs_on_stack;
1873 int *rpadding1;
1874 int *rpadding2;
1876 int nregs;
1877 int padding1 = 0;
1878 int padding2 = 0;
1879 HOST_WIDE_INT total_size;
1880 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
1881 int offset;
1882 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
1884 nregs = ix86_nsaved_regs ();
1885 total_size = size;
1887 offset = frame_pointer_needed ? 8 : 4;
1889 /* Do some sanity checking of stack_alignment_needed and preferred_alignment,
1890 since i386 port is the only using those features that may break easilly. */
1892 if (size && !stack_alignment_needed)
1893 abort ();
1894 if (!size && stack_alignment_needed != STACK_BOUNDARY / BITS_PER_UNIT)
1895 abort ();
1896 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
1897 abort ();
1898 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
1899 abort ();
1900 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
1901 abort ();
1903 if (stack_alignment_needed < 4)
1904 stack_alignment_needed = 4;
1906 offset += nregs * UNITS_PER_WORD;
1908 if (ACCUMULATE_OUTGOING_ARGS)
1909 total_size += current_function_outgoing_args_size;
1911 total_size += offset;
1913 /* Align start of frame for local function. */
1914 padding1 = ((offset + stack_alignment_needed - 1)
1915 & -stack_alignment_needed) - offset;
1916 total_size += padding1;
1918 /* Align stack boundary. */
1919 padding2 = ((total_size + preferred_alignment - 1)
1920 & -preferred_alignment) - total_size;
1922 if (ACCUMULATE_OUTGOING_ARGS)
1923 padding2 += current_function_outgoing_args_size;
1925 if (nregs_on_stack)
1926 *nregs_on_stack = nregs;
1927 if (rpadding1)
1928 *rpadding1 = padding1;
1929 if (rpadding2)
1930 *rpadding2 = padding2;
1932 return size + padding1 + padding2;
1935 /* Emit code to save registers in the prologue. */
1937 static void
1938 ix86_emit_save_regs ()
1940 register int regno;
1941 int limit;
1942 rtx insn;
1943 int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
1944 || current_function_uses_const_pool);
1945 limit = (frame_pointer_needed
1946 ? HARD_FRAME_POINTER_REGNUM : STACK_POINTER_REGNUM);
1948 for (regno = limit - 1; regno >= 0; regno--)
1949 if ((regs_ever_live[regno] && !call_used_regs[regno])
1950 || (regno == PIC_OFFSET_TABLE_REGNUM && pic_reg_used))
1952 insn = emit_insn (gen_push (gen_rtx_REG (SImode, regno)));
1953 RTX_FRAME_RELATED_P (insn) = 1;
1957 /* Expand the prologue into a bunch of separate insns. */
1959 void
1960 ix86_expand_prologue ()
1962 HOST_WIDE_INT tsize = ix86_compute_frame_size (get_frame_size (), (int *) 0,
1963 (int *) 0, (int *) 0);
1964 rtx insn;
1965 int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
1966 || current_function_uses_const_pool);
1968 /* Note: AT&T enter does NOT have reversed args. Enter is probably
1969 slower on all targets. Also sdb doesn't like it. */
1971 if (frame_pointer_needed)
1973 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
1974 RTX_FRAME_RELATED_P (insn) = 1;
1976 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
1977 RTX_FRAME_RELATED_P (insn) = 1;
1980 ix86_emit_save_regs ();
1982 if (tsize == 0)
1984 else if (! TARGET_STACK_PROBE || tsize < CHECK_STACK_LIMIT)
1986 if (frame_pointer_needed)
1987 insn = emit_insn (gen_pro_epilogue_adjust_stack
1988 (stack_pointer_rtx, stack_pointer_rtx,
1989 GEN_INT (-tsize), hard_frame_pointer_rtx));
1990 else
1991 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
1992 GEN_INT (-tsize)));
1993 RTX_FRAME_RELATED_P (insn) = 1;
1995 else
1997 /* ??? Is this only valid for Win32? */
1999 rtx arg0, sym;
2001 arg0 = gen_rtx_REG (SImode, 0);
2002 emit_move_insn (arg0, GEN_INT (tsize));
2004 sym = gen_rtx_MEM (FUNCTION_MODE,
2005 gen_rtx_SYMBOL_REF (Pmode, "_alloca"));
2006 insn = emit_call_insn (gen_call (sym, const0_rtx));
2008 CALL_INSN_FUNCTION_USAGE (insn)
2009 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, arg0),
2010 CALL_INSN_FUNCTION_USAGE (insn));
2013 #ifdef SUBTARGET_PROLOGUE
2014 SUBTARGET_PROLOGUE;
2015 #endif
2017 if (pic_reg_used)
2018 load_pic_register ();
2020 /* If we are profiling, make sure no instructions are scheduled before
2021 the call to mcount. However, if -fpic, the above call will have
2022 done that. */
2023 if ((profile_flag || profile_block_flag) && ! pic_reg_used)
2024 emit_insn (gen_blockage ());
2027 /* Emit code to add TSIZE to esp value. Use POP instruction when
2028 profitable. */
2030 static void
2031 ix86_emit_epilogue_esp_adjustment (tsize)
2032 int tsize;
2034 /* If a frame pointer is present, we must be sure to tie the sp
2035 to the fp so that we don't mis-schedule. */
2036 if (frame_pointer_needed)
2037 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
2038 stack_pointer_rtx,
2039 GEN_INT (tsize),
2040 hard_frame_pointer_rtx));
2041 else
2042 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
2043 GEN_INT (tsize)));
2046 /* Emit code to restore saved registers using MOV insns. First register
2047 is restored from POINTER + OFFSET. */
2048 static void
2049 ix86_emit_restore_regs_using_mov (pointer, offset)
2050 rtx pointer;
2051 int offset;
2053 int regno;
2054 int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
2055 || current_function_uses_const_pool);
2056 int limit = (frame_pointer_needed
2057 ? HARD_FRAME_POINTER_REGNUM : STACK_POINTER_REGNUM);
2059 for (regno = 0; regno < limit; regno++)
2060 if ((regs_ever_live[regno] && !call_used_regs[regno])
2061 || (regno == PIC_OFFSET_TABLE_REGNUM && pic_reg_used))
2063 emit_move_insn (gen_rtx_REG (SImode, regno),
2064 adj_offsettable_operand (gen_rtx_MEM (SImode,
2065 pointer),
2066 offset));
2067 offset += 4;
2071 /* Restore function stack, frame, and registers. */
2073 void
2074 ix86_expand_epilogue (emit_return)
2075 int emit_return;
2077 int nregs;
2078 int regno;
2080 int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
2081 || current_function_uses_const_pool);
2082 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
2083 HOST_WIDE_INT offset;
2084 HOST_WIDE_INT tsize = ix86_compute_frame_size (get_frame_size (), &nregs,
2085 (int *) 0, (int *) 0);
2087 /* Calculate start of saved registers relative to ebp. */
2088 offset = -nregs * UNITS_PER_WORD;
2090 #ifdef FUNCTION_BLOCK_PROFILER_EXIT
2091 if (profile_block_flag == 2)
2093 FUNCTION_BLOCK_PROFILER_EXIT;
2095 #endif
2097 /* If we're only restoring one register and sp is not valid then
2098 using a move instruction to restore the register since it's
2099 less work than reloading sp and popping the register.
2101 The default code result in stack adjustment using add/lea instruction,
2102 while this code results in LEAVE instruction (or discrete equivalent),
2103 so it is profitable in some other cases as well. Especially when there
2104 are no registers to restore. We also use this code when TARGET_USE_LEAVE
2105 and there is exactly one register to pop. This heruistic may need some
2106 tuning in future. */
2107 if ((!sp_valid && nregs <= 1)
2108 || (frame_pointer_needed && !nregs && tsize)
2109 || (frame_pointer_needed && TARGET_USE_LEAVE && !optimize_size
2110 && nregs == 1))
2112 /* Restore registers. We can use ebp or esp to address the memory
2113 locations. If both are available, default to ebp, since offsets
2114 are known to be small. Only exception is esp pointing directly to the
2115 end of block of saved registers, where we may simplify addressing
2116 mode. */
2118 if (!frame_pointer_needed || (sp_valid && !tsize))
2119 ix86_emit_restore_regs_using_mov (stack_pointer_rtx, tsize);
2120 else
2121 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx, offset);
2123 if (!frame_pointer_needed)
2124 ix86_emit_epilogue_esp_adjustment (tsize + nregs * UNITS_PER_WORD);
2125 /* If not an i386, mov & pop is faster than "leave". */
2126 else if (TARGET_USE_LEAVE || optimize_size)
2127 emit_insn (gen_leave ());
2128 else
2130 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
2131 hard_frame_pointer_rtx,
2132 const0_rtx,
2133 hard_frame_pointer_rtx));
2134 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
2137 else
2139 /* First step is to deallocate the stack frame so that we can
2140 pop the registers. */
2141 if (!sp_valid)
2143 if (!frame_pointer_needed)
2144 abort ();
2145 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
2146 hard_frame_pointer_rtx,
2147 GEN_INT (offset),
2148 hard_frame_pointer_rtx));
2150 else if (tsize)
2151 ix86_emit_epilogue_esp_adjustment (tsize);
2153 for (regno = 0; regno < STACK_POINTER_REGNUM; regno++)
2154 if ((regs_ever_live[regno] && !call_used_regs[regno])
2155 || (regno == PIC_OFFSET_TABLE_REGNUM && pic_reg_used))
2156 emit_insn (gen_popsi1 (gen_rtx_REG (SImode, regno)));
2159 /* Sibcall epilogues don't want a return instruction. */
2160 if (! emit_return)
2161 return;
2163 if (current_function_pops_args && current_function_args_size)
2165 rtx popc = GEN_INT (current_function_pops_args);
2167 /* i386 can only pop 64K bytes. If asked to pop more, pop
2168 return address, do explicit add, and jump indirectly to the
2169 caller. */
2171 if (current_function_pops_args >= 65536)
2173 rtx ecx = gen_rtx_REG (SImode, 2);
2175 emit_insn (gen_popsi1 (ecx));
2176 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
2177 emit_jump_insn (gen_return_indirect_internal (ecx));
2179 else
2180 emit_jump_insn (gen_return_pop_internal (popc));
2182 else
2183 emit_jump_insn (gen_return_internal ());
2186 /* Extract the parts of an RTL expression that is a valid memory address
2187 for an instruction. Return false if the structure of the address is
2188 grossly off. */
2190 static int
2191 ix86_decompose_address (addr, out)
2192 register rtx addr;
2193 struct ix86_address *out;
2195 rtx base = NULL_RTX;
2196 rtx index = NULL_RTX;
2197 rtx disp = NULL_RTX;
2198 HOST_WIDE_INT scale = 1;
2199 rtx scale_rtx = NULL_RTX;
2201 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
2202 base = addr;
2203 else if (GET_CODE (addr) == PLUS)
2205 rtx op0 = XEXP (addr, 0);
2206 rtx op1 = XEXP (addr, 1);
2207 enum rtx_code code0 = GET_CODE (op0);
2208 enum rtx_code code1 = GET_CODE (op1);
2210 if (code0 == REG || code0 == SUBREG)
2212 if (code1 == REG || code1 == SUBREG)
2213 index = op0, base = op1; /* index + base */
2214 else
2215 base = op0, disp = op1; /* base + displacement */
2217 else if (code0 == MULT)
2219 index = XEXP (op0, 0);
2220 scale_rtx = XEXP (op0, 1);
2221 if (code1 == REG || code1 == SUBREG)
2222 base = op1; /* index*scale + base */
2223 else
2224 disp = op1; /* index*scale + disp */
2226 else if (code0 == PLUS && GET_CODE (XEXP (op0, 0)) == MULT)
2228 index = XEXP (XEXP (op0, 0), 0); /* index*scale + base + disp */
2229 scale_rtx = XEXP (XEXP (op0, 0), 1);
2230 base = XEXP (op0, 1);
2231 disp = op1;
2233 else if (code0 == PLUS)
2235 index = XEXP (op0, 0); /* index + base + disp */
2236 base = XEXP (op0, 1);
2237 disp = op1;
2239 else
2240 return FALSE;
2242 else if (GET_CODE (addr) == MULT)
2244 index = XEXP (addr, 0); /* index*scale */
2245 scale_rtx = XEXP (addr, 1);
2247 else if (GET_CODE (addr) == ASHIFT)
2249 rtx tmp;
2251 /* We're called for lea too, which implements ashift on occasion. */
2252 index = XEXP (addr, 0);
2253 tmp = XEXP (addr, 1);
2254 if (GET_CODE (tmp) != CONST_INT)
2255 return FALSE;
2256 scale = INTVAL (tmp);
2257 if ((unsigned HOST_WIDE_INT) scale > 3)
2258 return FALSE;
2259 scale = 1 << scale;
2261 else
2262 disp = addr; /* displacement */
2264 /* Extract the integral value of scale. */
2265 if (scale_rtx)
2267 if (GET_CODE (scale_rtx) != CONST_INT)
2268 return FALSE;
2269 scale = INTVAL (scale_rtx);
2272 /* Allow arg pointer and stack pointer as index if there is not scaling */
2273 if (base && index && scale == 1
2274 && (index == arg_pointer_rtx || index == frame_pointer_rtx
2275 || index == stack_pointer_rtx))
2277 rtx tmp = base;
2278 base = index;
2279 index = tmp;
2282 /* Special case: %ebp cannot be encoded as a base without a displacement. */
2283 if ((base == hard_frame_pointer_rtx
2284 || base == frame_pointer_rtx
2285 || base == arg_pointer_rtx) && !disp)
2286 disp = const0_rtx;
2288 /* Special case: on K6, [%esi] makes the instruction vector decoded.
2289 Avoid this by transforming to [%esi+0]. */
2290 if (ix86_cpu == PROCESSOR_K6 && !optimize_size
2291 && base && !index && !disp
2292 && REG_P (base)
2293 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
2294 disp = const0_rtx;
2296 /* Special case: encode reg+reg instead of reg*2. */
2297 if (!base && index && scale && scale == 2)
2298 base = index, scale = 1;
2300 /* Special case: scaling cannot be encoded without base or displacement. */
2301 if (!base && !disp && index && scale != 1)
2302 disp = const0_rtx;
2304 out->base = base;
2305 out->index = index;
2306 out->disp = disp;
2307 out->scale = scale;
2309 return TRUE;
2312 /* Return cost of the memory address x.
2313 For i386, it is better to use a complex address than let gcc copy
2314 the address into a reg and make a new pseudo. But not if the address
2315 requires to two regs - that would mean more pseudos with longer
2316 lifetimes. */
2318 ix86_address_cost (x)
2319 rtx x;
2321 struct ix86_address parts;
2322 int cost = 1;
2324 if (!ix86_decompose_address (x, &parts))
2325 abort ();
2327 /* More complex memory references are better. */
2328 if (parts.disp && parts.disp != const0_rtx)
2329 cost--;
2331 /* Attempt to minimize number of registers in the address. */
2332 if ((parts.base
2333 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
2334 || (parts.index
2335 && (!REG_P (parts.index)
2336 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
2337 cost++;
2339 if (parts.base
2340 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
2341 && parts.index
2342 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
2343 && parts.base != parts.index)
2344 cost++;
2346 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
2347 since it's predecode logic can't detect the length of instructions
2348 and it degenerates to vector decoded. Increase cost of such
2349 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
2350 to split such addresses or even refuse such addresses at all.
2352 Following addressing modes are affected:
2353 [base+scale*index]
2354 [scale*index+disp]
2355 [base+index]
2357 The first and last case may be avoidable by explicitly coding the zero in
2358 memory address, but I don't have AMD-K6 machine handy to check this
2359 theory. */
2361 if (TARGET_K6
2362 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
2363 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
2364 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
2365 cost += 10;
2367 return cost;
2370 /* If X is a machine specific address (i.e. a symbol or label being
2371 referenced as a displacement from the GOT implemented using an
2372 UNSPEC), then return the base term. Otherwise return X. */
2375 ix86_find_base_term (x)
2376 rtx x;
2378 rtx term;
2380 if (GET_CODE (x) != PLUS
2381 || XEXP (x, 0) != pic_offset_table_rtx
2382 || GET_CODE (XEXP (x, 1)) != CONST)
2383 return x;
2385 term = XEXP (XEXP (x, 1), 0);
2387 if (GET_CODE (term) == PLUS && GET_CODE (XEXP (term, 1)) == CONST_INT)
2388 term = XEXP (term, 0);
2390 if (GET_CODE (term) != UNSPEC
2391 || XVECLEN (term, 0) != 1
2392 || XINT (term, 1) != 7)
2393 return x;
2395 term = XVECEXP (term, 0, 0);
2397 if (GET_CODE (term) != SYMBOL_REF
2398 && GET_CODE (term) != LABEL_REF)
2399 return x;
2401 return term;
2404 /* Determine if a given CONST RTX is a valid memory displacement
2405 in PIC mode. */
2408 legitimate_pic_address_disp_p (disp)
2409 register rtx disp;
2411 if (GET_CODE (disp) != CONST)
2412 return 0;
2413 disp = XEXP (disp, 0);
2415 if (GET_CODE (disp) == PLUS)
2417 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
2418 return 0;
2419 disp = XEXP (disp, 0);
2422 if (GET_CODE (disp) != UNSPEC
2423 || XVECLEN (disp, 0) != 1)
2424 return 0;
2426 /* Must be @GOT or @GOTOFF. */
2427 if (XINT (disp, 1) != 6
2428 && XINT (disp, 1) != 7)
2429 return 0;
2431 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
2432 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
2433 return 0;
2435 return 1;
2438 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
2439 memory address for an instruction. The MODE argument is the machine mode
2440 for the MEM expression that wants to use this address.
2442 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
2443 convert common non-canonical forms to canonical form so that they will
2444 be recognized. */
2447 legitimate_address_p (mode, addr, strict)
2448 enum machine_mode mode;
2449 register rtx addr;
2450 int strict;
2452 struct ix86_address parts;
2453 rtx base, index, disp;
2454 HOST_WIDE_INT scale;
2455 const char *reason = NULL;
2456 rtx reason_rtx = NULL_RTX;
2458 if (TARGET_DEBUG_ADDR)
2460 fprintf (stderr,
2461 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
2462 GET_MODE_NAME (mode), strict);
2463 debug_rtx (addr);
2466 if (! ix86_decompose_address (addr, &parts))
2468 reason = "decomposition failed";
2469 goto report_error;
2472 base = parts.base;
2473 index = parts.index;
2474 disp = parts.disp;
2475 scale = parts.scale;
2477 /* Validate base register.
2479 Don't allow SUBREG's here, it can lead to spill failures when the base
2480 is one word out of a two word structure, which is represented internally
2481 as a DImode int. */
2483 if (base)
2485 reason_rtx = base;
2487 if (GET_CODE (base) != REG)
2489 reason = "base is not a register";
2490 goto report_error;
2493 if (GET_MODE (base) != Pmode)
2495 reason = "base is not in Pmode";
2496 goto report_error;
2499 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (base))
2500 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (base)))
2502 reason = "base is not valid";
2503 goto report_error;
2507 /* Validate index register.
2509 Don't allow SUBREG's here, it can lead to spill failures when the index
2510 is one word out of a two word structure, which is represented internally
2511 as a DImode int. */
2513 if (index)
2515 reason_rtx = index;
2517 if (GET_CODE (index) != REG)
2519 reason = "index is not a register";
2520 goto report_error;
2523 if (GET_MODE (index) != Pmode)
2525 reason = "index is not in Pmode";
2526 goto report_error;
2529 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (index))
2530 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (index)))
2532 reason = "index is not valid";
2533 goto report_error;
2537 /* Validate scale factor. */
2538 if (scale != 1)
2540 reason_rtx = GEN_INT (scale);
2541 if (!index)
2543 reason = "scale without index";
2544 goto report_error;
2547 if (scale != 2 && scale != 4 && scale != 8)
2549 reason = "scale is not a valid multiplier";
2550 goto report_error;
2554 /* Validate displacement. */
2555 if (disp)
2557 reason_rtx = disp;
2559 if (!CONSTANT_ADDRESS_P (disp))
2561 reason = "displacement is not constant";
2562 goto report_error;
2565 if (GET_CODE (disp) == CONST_DOUBLE)
2567 reason = "displacement is a const_double";
2568 goto report_error;
2571 if (flag_pic && SYMBOLIC_CONST (disp))
2573 if (! legitimate_pic_address_disp_p (disp))
2575 reason = "displacement is an invalid pic construct";
2576 goto report_error;
2579 /* This code used to verify that a symbolic pic displacement
2580 includes the pic_offset_table_rtx register.
2582 While this is good idea, unfortunately these constructs may
2583 be created by "adds using lea" optimization for incorrect
2584 code like:
2586 int a;
2587 int foo(int i)
2589 return *(&a+i);
2592 This code is nonsensical, but results in addressing
2593 GOT table with pic_offset_table_rtx base. We can't
2594 just refuse it easilly, since it gets matched by
2595 "addsi3" pattern, that later gets split to lea in the
2596 case output register differs from input. While this
2597 can be handled by separate addsi pattern for this case
2598 that never results in lea, this seems to be easier and
2599 correct fix for crash to disable this test. */
2601 else if (HALF_PIC_P ())
2603 if (! HALF_PIC_ADDRESS_P (disp)
2604 || (base != NULL_RTX || index != NULL_RTX))
2606 reason = "displacement is an invalid half-pic reference";
2607 goto report_error;
2612 /* Everything looks valid. */
2613 if (TARGET_DEBUG_ADDR)
2614 fprintf (stderr, "Success.\n");
2615 return TRUE;
2617 report_error:
2618 if (TARGET_DEBUG_ADDR)
2620 fprintf (stderr, "Error: %s\n", reason);
2621 debug_rtx (reason_rtx);
2623 return FALSE;
2626 /* Return an unique alias set for the GOT. */
2628 static HOST_WIDE_INT
2629 ix86_GOT_alias_set ()
2631 static HOST_WIDE_INT set = -1;
2632 if (set == -1)
2633 set = new_alias_set ();
2634 return set;
2637 /* Return a legitimate reference for ORIG (an address) using the
2638 register REG. If REG is 0, a new pseudo is generated.
2640 There are two types of references that must be handled:
2642 1. Global data references must load the address from the GOT, via
2643 the PIC reg. An insn is emitted to do this load, and the reg is
2644 returned.
2646 2. Static data references, constant pool addresses, and code labels
2647 compute the address as an offset from the GOT, whose base is in
2648 the PIC reg. Static data objects have SYMBOL_REF_FLAG set to
2649 differentiate them from global data objects. The returned
2650 address is the PIC reg + an unspec constant.
2652 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
2653 reg also appears in the address. */
2656 legitimize_pic_address (orig, reg)
2657 rtx orig;
2658 rtx reg;
2660 rtx addr = orig;
2661 rtx new = orig;
2662 rtx base;
2664 if (GET_CODE (addr) == LABEL_REF
2665 || (GET_CODE (addr) == SYMBOL_REF
2666 && (CONSTANT_POOL_ADDRESS_P (addr)
2667 || SYMBOL_REF_FLAG (addr))))
2669 /* This symbol may be referenced via a displacement from the PIC
2670 base address (@GOTOFF). */
2672 current_function_uses_pic_offset_table = 1;
2673 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), 7);
2674 new = gen_rtx_CONST (Pmode, new);
2675 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
2677 if (reg != 0)
2679 emit_move_insn (reg, new);
2680 new = reg;
2683 else if (GET_CODE (addr) == SYMBOL_REF)
2685 /* This symbol must be referenced via a load from the
2686 Global Offset Table (@GOT). */
2688 current_function_uses_pic_offset_table = 1;
2689 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), 6);
2690 new = gen_rtx_CONST (Pmode, new);
2691 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
2692 new = gen_rtx_MEM (Pmode, new);
2693 RTX_UNCHANGING_P (new) = 1;
2694 MEM_ALIAS_SET (new) = ix86_GOT_alias_set ();
2696 if (reg == 0)
2697 reg = gen_reg_rtx (Pmode);
2698 emit_move_insn (reg, new);
2699 new = reg;
2701 else
2703 if (GET_CODE (addr) == CONST)
2705 addr = XEXP (addr, 0);
2706 if (GET_CODE (addr) == UNSPEC)
2708 /* Check that the unspec is one of the ones we generate? */
2710 else if (GET_CODE (addr) != PLUS)
2711 abort ();
2713 if (GET_CODE (addr) == PLUS)
2715 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
2717 /* Check first to see if this is a constant offset from a @GOTOFF
2718 symbol reference. */
2719 if ((GET_CODE (op0) == LABEL_REF
2720 || (GET_CODE (op0) == SYMBOL_REF
2721 && (CONSTANT_POOL_ADDRESS_P (op0)
2722 || SYMBOL_REF_FLAG (op0))))
2723 && GET_CODE (op1) == CONST_INT)
2725 current_function_uses_pic_offset_table = 1;
2726 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0), 7);
2727 new = gen_rtx_PLUS (Pmode, new, op1);
2728 new = gen_rtx_CONST (Pmode, new);
2729 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
2731 if (reg != 0)
2733 emit_move_insn (reg, new);
2734 new = reg;
2737 else
2739 base = legitimize_pic_address (XEXP (addr, 0), reg);
2740 new = legitimize_pic_address (XEXP (addr, 1),
2741 base == reg ? NULL_RTX : reg);
2743 if (GET_CODE (new) == CONST_INT)
2744 new = plus_constant (base, INTVAL (new));
2745 else
2747 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
2749 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
2750 new = XEXP (new, 1);
2752 new = gen_rtx_PLUS (Pmode, base, new);
2757 return new;
2760 /* Try machine-dependent ways of modifying an illegitimate address
2761 to be legitimate. If we find one, return the new, valid address.
2762 This macro is used in only one place: `memory_address' in explow.c.
2764 OLDX is the address as it was before break_out_memory_refs was called.
2765 In some cases it is useful to look at this to decide what needs to be done.
2767 MODE and WIN are passed so that this macro can use
2768 GO_IF_LEGITIMATE_ADDRESS.
2770 It is always safe for this macro to do nothing. It exists to recognize
2771 opportunities to optimize the output.
2773 For the 80386, we handle X+REG by loading X into a register R and
2774 using R+REG. R will go in a general reg and indexing will be used.
2775 However, if REG is a broken-out memory address or multiplication,
2776 nothing needs to be done because REG can certainly go in a general reg.
2778 When -fpic is used, special handling is needed for symbolic references.
2779 See comments by legitimize_pic_address in i386.c for details. */
2782 legitimize_address (x, oldx, mode)
2783 register rtx x;
2784 register rtx oldx ATTRIBUTE_UNUSED;
2785 enum machine_mode mode;
2787 int changed = 0;
2788 unsigned log;
2790 if (TARGET_DEBUG_ADDR)
2792 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
2793 GET_MODE_NAME (mode));
2794 debug_rtx (x);
2797 if (flag_pic && SYMBOLIC_CONST (x))
2798 return legitimize_pic_address (x, 0);
2800 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
2801 if (GET_CODE (x) == ASHIFT
2802 && GET_CODE (XEXP (x, 1)) == CONST_INT
2803 && (log = (unsigned)exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
2805 changed = 1;
2806 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
2807 GEN_INT (1 << log));
2810 if (GET_CODE (x) == PLUS)
2812 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
2814 if (GET_CODE (XEXP (x, 0)) == ASHIFT
2815 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
2816 && (log = (unsigned)exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
2818 changed = 1;
2819 XEXP (x, 0) = gen_rtx_MULT (Pmode,
2820 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
2821 GEN_INT (1 << log));
2824 if (GET_CODE (XEXP (x, 1)) == ASHIFT
2825 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
2826 && (log = (unsigned)exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
2828 changed = 1;
2829 XEXP (x, 1) = gen_rtx_MULT (Pmode,
2830 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
2831 GEN_INT (1 << log));
2834 /* Put multiply first if it isn't already. */
2835 if (GET_CODE (XEXP (x, 1)) == MULT)
2837 rtx tmp = XEXP (x, 0);
2838 XEXP (x, 0) = XEXP (x, 1);
2839 XEXP (x, 1) = tmp;
2840 changed = 1;
2843 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
2844 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
2845 created by virtual register instantiation, register elimination, and
2846 similar optimizations. */
2847 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
2849 changed = 1;
2850 x = gen_rtx_PLUS (Pmode,
2851 gen_rtx_PLUS (Pmode, XEXP (x, 0),
2852 XEXP (XEXP (x, 1), 0)),
2853 XEXP (XEXP (x, 1), 1));
2856 /* Canonicalize
2857 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
2858 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
2859 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
2860 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
2861 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
2862 && CONSTANT_P (XEXP (x, 1)))
2864 rtx constant;
2865 rtx other = NULL_RTX;
2867 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
2869 constant = XEXP (x, 1);
2870 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
2872 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
2874 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
2875 other = XEXP (x, 1);
2877 else
2878 constant = 0;
2880 if (constant)
2882 changed = 1;
2883 x = gen_rtx_PLUS (Pmode,
2884 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
2885 XEXP (XEXP (XEXP (x, 0), 1), 0)),
2886 plus_constant (other, INTVAL (constant)));
2890 if (changed && legitimate_address_p (mode, x, FALSE))
2891 return x;
2893 if (GET_CODE (XEXP (x, 0)) == MULT)
2895 changed = 1;
2896 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
2899 if (GET_CODE (XEXP (x, 1)) == MULT)
2901 changed = 1;
2902 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
2905 if (changed
2906 && GET_CODE (XEXP (x, 1)) == REG
2907 && GET_CODE (XEXP (x, 0)) == REG)
2908 return x;
2910 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
2912 changed = 1;
2913 x = legitimize_pic_address (x, 0);
2916 if (changed && legitimate_address_p (mode, x, FALSE))
2917 return x;
2919 if (GET_CODE (XEXP (x, 0)) == REG)
2921 register rtx temp = gen_reg_rtx (Pmode);
2922 register rtx val = force_operand (XEXP (x, 1), temp);
2923 if (val != temp)
2924 emit_move_insn (temp, val);
2926 XEXP (x, 1) = temp;
2927 return x;
2930 else if (GET_CODE (XEXP (x, 1)) == REG)
2932 register rtx temp = gen_reg_rtx (Pmode);
2933 register rtx val = force_operand (XEXP (x, 0), temp);
2934 if (val != temp)
2935 emit_move_insn (temp, val);
2937 XEXP (x, 0) = temp;
2938 return x;
2942 return x;
2945 /* Print an integer constant expression in assembler syntax. Addition
2946 and subtraction are the only arithmetic that may appear in these
2947 expressions. FILE is the stdio stream to write to, X is the rtx, and
2948 CODE is the operand print code from the output string. */
2950 static void
2951 output_pic_addr_const (file, x, code)
2952 FILE *file;
2953 rtx x;
2954 int code;
2956 char buf[256];
2958 switch (GET_CODE (x))
2960 case PC:
2961 if (flag_pic)
2962 putc ('.', file);
2963 else
2964 abort ();
2965 break;
2967 case SYMBOL_REF:
2968 assemble_name (file, XSTR (x, 0));
2969 if (code == 'P' && ! SYMBOL_REF_FLAG (x))
2970 fputs ("@PLT", file);
2971 break;
2973 case LABEL_REF:
2974 x = XEXP (x, 0);
2975 /* FALLTHRU */
2976 case CODE_LABEL:
2977 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
2978 assemble_name (asm_out_file, buf);
2979 break;
2981 case CONST_INT:
2982 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
2983 break;
2985 case CONST:
2986 /* This used to output parentheses around the expression,
2987 but that does not work on the 386 (either ATT or BSD assembler). */
2988 output_pic_addr_const (file, XEXP (x, 0), code);
2989 break;
2991 case CONST_DOUBLE:
2992 if (GET_MODE (x) == VOIDmode)
2994 /* We can use %d if the number is <32 bits and positive. */
2995 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
2996 fprintf (file, "0x%lx%08lx",
2997 (unsigned long) CONST_DOUBLE_HIGH (x),
2998 (unsigned long) CONST_DOUBLE_LOW (x));
2999 else
3000 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
3002 else
3003 /* We can't handle floating point constants;
3004 PRINT_OPERAND must handle them. */
3005 output_operand_lossage ("floating constant misused");
3006 break;
3008 case PLUS:
3009 /* Some assemblers need integer constants to appear first. */
3010 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
3012 output_pic_addr_const (file, XEXP (x, 0), code);
3013 putc ('+', file);
3014 output_pic_addr_const (file, XEXP (x, 1), code);
3016 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
3018 output_pic_addr_const (file, XEXP (x, 1), code);
3019 putc ('+', file);
3020 output_pic_addr_const (file, XEXP (x, 0), code);
3022 else
3023 abort ();
3024 break;
3026 case MINUS:
3027 putc (ASSEMBLER_DIALECT ? '(' : '[', file);
3028 output_pic_addr_const (file, XEXP (x, 0), code);
3029 putc ('-', file);
3030 output_pic_addr_const (file, XEXP (x, 1), code);
3031 putc (ASSEMBLER_DIALECT ? ')' : ']', file);
3032 break;
3034 case UNSPEC:
3035 if (XVECLEN (x, 0) != 1)
3036 abort ();
3037 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
3038 switch (XINT (x, 1))
3040 case 6:
3041 fputs ("@GOT", file);
3042 break;
3043 case 7:
3044 fputs ("@GOTOFF", file);
3045 break;
3046 case 8:
3047 fputs ("@PLT", file);
3048 break;
3049 default:
3050 output_operand_lossage ("invalid UNSPEC as operand");
3051 break;
3053 break;
3055 default:
3056 output_operand_lossage ("invalid expression as operand");
3060 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
3061 We need to handle our special PIC relocations. */
3063 void
3064 i386_dwarf_output_addr_const (file, x)
3065 FILE *file;
3066 rtx x;
3068 fprintf (file, "%s", INT_ASM_OP);
3069 if (flag_pic)
3070 output_pic_addr_const (file, x, '\0');
3071 else
3072 output_addr_const (file, x);
3073 fputc ('\n', file);
3076 /* In the name of slightly smaller debug output, and to cater to
3077 general assembler losage, recognize PIC+GOTOFF and turn it back
3078 into a direct symbol reference. */
3081 i386_simplify_dwarf_addr (orig_x)
3082 rtx orig_x;
3084 rtx x = orig_x;
3086 if (GET_CODE (x) != PLUS
3087 || GET_CODE (XEXP (x, 0)) != REG
3088 || GET_CODE (XEXP (x, 1)) != CONST)
3089 return orig_x;
3091 x = XEXP (XEXP (x, 1), 0);
3092 if (GET_CODE (x) == UNSPEC
3093 && (XINT (x, 1) == 6
3094 || XINT (x, 1) == 7))
3095 return XVECEXP (x, 0, 0);
3097 if (GET_CODE (x) == PLUS
3098 && GET_CODE (XEXP (x, 0)) == UNSPEC
3099 && GET_CODE (XEXP (x, 1)) == CONST_INT
3100 && (XINT (XEXP (x, 0), 1) == 6
3101 || XINT (XEXP (x, 0), 1) == 7))
3102 return gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
3104 return orig_x;
3107 static void
3108 put_condition_code (code, mode, reverse, fp, file)
3109 enum rtx_code code;
3110 enum machine_mode mode;
3111 int reverse, fp;
3112 FILE *file;
3114 const char *suffix;
3116 if (mode == CCFPmode || mode == CCFPUmode)
3118 enum rtx_code second_code, bypass_code;
3119 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3120 if (bypass_code != NIL || second_code != NIL)
3121 abort();
3122 code = ix86_fp_compare_code_to_integer (code);
3123 mode = CCmode;
3125 if (reverse)
3126 code = reverse_condition (code);
3128 switch (code)
3130 case EQ:
3131 suffix = "e";
3132 break;
3133 case NE:
3134 suffix = "ne";
3135 break;
3136 case GT:
3137 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
3138 abort ();
3139 suffix = "g";
3140 break;
3141 case GTU:
3142 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
3143 Those same assemblers have the same but opposite losage on cmov. */
3144 if (mode != CCmode)
3145 abort ();
3146 suffix = fp ? "nbe" : "a";
3147 break;
3148 case LT:
3149 if (mode == CCNOmode || mode == CCGOCmode)
3150 suffix = "s";
3151 else if (mode == CCmode || mode == CCGCmode)
3152 suffix = "l";
3153 else
3154 abort ();
3155 break;
3156 case LTU:
3157 if (mode != CCmode)
3158 abort ();
3159 suffix = "b";
3160 break;
3161 case GE:
3162 if (mode == CCNOmode || mode == CCGOCmode)
3163 suffix = "ns";
3164 else if (mode == CCmode || mode == CCGCmode)
3165 suffix = "ge";
3166 else
3167 abort ();
3168 break;
3169 case GEU:
3170 /* ??? As above. */
3171 if (mode != CCmode)
3172 abort ();
3173 suffix = fp ? "nb" : "ae";
3174 break;
3175 case LE:
3176 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
3177 abort ();
3178 suffix = "le";
3179 break;
3180 case LEU:
3181 if (mode != CCmode)
3182 abort ();
3183 suffix = "be";
3184 break;
3185 case UNORDERED:
3186 suffix = fp ? "u" : "p";
3187 break;
3188 case ORDERED:
3189 suffix = fp ? "nu" : "np";
3190 break;
3191 default:
3192 abort ();
3194 fputs (suffix, file);
3197 void
3198 print_reg (x, code, file)
3199 rtx x;
3200 int code;
3201 FILE *file;
3203 if (REGNO (x) == ARG_POINTER_REGNUM
3204 || REGNO (x) == FRAME_POINTER_REGNUM
3205 || REGNO (x) == FLAGS_REG
3206 || REGNO (x) == FPSR_REG)
3207 abort ();
3209 if (ASSEMBLER_DIALECT == 0 || USER_LABEL_PREFIX[0] == 0)
3210 putc ('%', file);
3212 if (code == 'w')
3213 code = 2;
3214 else if (code == 'b')
3215 code = 1;
3216 else if (code == 'k')
3217 code = 4;
3218 else if (code == 'y')
3219 code = 3;
3220 else if (code == 'h')
3221 code = 0;
3222 else if (code == 'm' || MMX_REG_P (x))
3223 code = 5;
3224 else
3225 code = GET_MODE_SIZE (GET_MODE (x));
3227 switch (code)
3229 case 5:
3230 fputs (hi_reg_name[REGNO (x)], file);
3231 break;
3232 case 3:
3233 if (STACK_TOP_P (x))
3235 fputs ("st(0)", file);
3236 break;
3238 /* FALLTHRU */
3239 case 4:
3240 case 8:
3241 case 12:
3242 if (! FP_REG_P (x))
3243 putc ('e', file);
3244 /* FALLTHRU */
3245 case 16:
3246 case 2:
3247 fputs (hi_reg_name[REGNO (x)], file);
3248 break;
3249 case 1:
3250 fputs (qi_reg_name[REGNO (x)], file);
3251 break;
3252 case 0:
3253 fputs (qi_high_reg_name[REGNO (x)], file);
3254 break;
3255 default:
3256 abort ();
3260 /* Meaning of CODE:
3261 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
3262 C -- print opcode suffix for set/cmov insn.
3263 c -- like C, but print reversed condition
3264 R -- print the prefix for register names.
3265 z -- print the opcode suffix for the size of the current operand.
3266 * -- print a star (in certain assembler syntax)
3267 A -- print an absolute memory reference.
3268 w -- print the operand as if it's a "word" (HImode) even if it isn't.
3269 s -- print a shift double count, followed by the assemblers argument
3270 delimiter.
3271 b -- print the QImode name of the register for the indicated operand.
3272 %b0 would print %al if operands[0] is reg 0.
3273 w -- likewise, print the HImode name of the register.
3274 k -- likewise, print the SImode name of the register.
3275 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
3276 y -- print "st(0)" instead of "st" as a register.
3277 m -- print "st(n)" as an mmx register. */
3279 void
3280 print_operand (file, x, code)
3281 FILE *file;
3282 rtx x;
3283 int code;
3285 if (code)
3287 switch (code)
3289 case '*':
3290 if (ASSEMBLER_DIALECT == 0)
3291 putc ('*', file);
3292 return;
3294 case 'A':
3295 if (ASSEMBLER_DIALECT == 0)
3296 putc ('*', file);
3297 else if (ASSEMBLER_DIALECT == 1)
3299 /* Intel syntax. For absolute addresses, registers should not
3300 be surrounded by braces. */
3301 if (GET_CODE (x) != REG)
3303 putc ('[', file);
3304 PRINT_OPERAND (file, x, 0);
3305 putc (']', file);
3306 return;
3310 PRINT_OPERAND (file, x, 0);
3311 return;
3314 case 'L':
3315 if (ASSEMBLER_DIALECT == 0)
3316 putc ('l', file);
3317 return;
3319 case 'W':
3320 if (ASSEMBLER_DIALECT == 0)
3321 putc ('w', file);
3322 return;
3324 case 'B':
3325 if (ASSEMBLER_DIALECT == 0)
3326 putc ('b', file);
3327 return;
3329 case 'Q':
3330 if (ASSEMBLER_DIALECT == 0)
3331 putc ('l', file);
3332 return;
3334 case 'S':
3335 if (ASSEMBLER_DIALECT == 0)
3336 putc ('s', file);
3337 return;
3339 case 'T':
3340 if (ASSEMBLER_DIALECT == 0)
3341 putc ('t', file);
3342 return;
3344 case 'z':
3345 /* 387 opcodes don't get size suffixes if the operands are
3346 registers. */
3348 if (STACK_REG_P (x))
3349 return;
3351 /* this is the size of op from size of operand */
3352 switch (GET_MODE_SIZE (GET_MODE (x)))
3354 case 2:
3355 #ifdef HAVE_GAS_FILDS_FISTS
3356 putc ('s', file);
3357 #endif
3358 return;
3360 case 4:
3361 if (GET_MODE (x) == SFmode)
3363 putc ('s', file);
3364 return;
3366 else
3367 putc ('l', file);
3368 return;
3370 case 12:
3371 case 16:
3372 putc ('t', file);
3373 return;
3375 case 8:
3376 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
3378 #ifdef GAS_MNEMONICS
3379 putc ('q', file);
3380 #else
3381 putc ('l', file);
3382 putc ('l', file);
3383 #endif
3385 else
3386 putc ('l', file);
3387 return;
3389 default:
3390 abort ();
3393 case 'b':
3394 case 'w':
3395 case 'k':
3396 case 'h':
3397 case 'y':
3398 case 'm':
3399 case 'X':
3400 case 'P':
3401 break;
3403 case 's':
3404 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
3406 PRINT_OPERAND (file, x, 0);
3407 putc (',', file);
3409 return;
3411 case 'C':
3412 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
3413 return;
3414 case 'F':
3415 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
3416 return;
3418 /* Like above, but reverse condition */
3419 case 'c':
3420 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
3421 return;
3422 case 'f':
3423 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
3424 return;
3426 default:
3428 char str[50];
3429 sprintf (str, "invalid operand code `%c'", code);
3430 output_operand_lossage (str);
3435 if (GET_CODE (x) == REG)
3437 PRINT_REG (x, code, file);
3440 else if (GET_CODE (x) == MEM)
3442 /* No `byte ptr' prefix for call instructions. */
3443 if (ASSEMBLER_DIALECT != 0 && code != 'X' && code != 'P')
3445 const char * size;
3446 switch (GET_MODE_SIZE (GET_MODE (x)))
3448 case 1: size = "BYTE"; break;
3449 case 2: size = "WORD"; break;
3450 case 4: size = "DWORD"; break;
3451 case 8: size = "QWORD"; break;
3452 case 12: size = "XWORD"; break;
3453 case 16: size = "XMMWORD"; break;
3454 default:
3455 abort ();
3458 /* Check for explicit size override (codes 'b', 'w' and 'k') */
3459 if (code == 'b')
3460 size = "BYTE";
3461 else if (code == 'w')
3462 size = "WORD";
3463 else if (code == 'k')
3464 size = "DWORD";
3466 fputs (size, file);
3467 fputs (" PTR ", file);
3470 x = XEXP (x, 0);
3471 if (flag_pic && CONSTANT_ADDRESS_P (x))
3472 output_pic_addr_const (file, x, code);
3473 else
3474 output_address (x);
3477 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
3479 REAL_VALUE_TYPE r;
3480 long l;
3482 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3483 REAL_VALUE_TO_TARGET_SINGLE (r, l);
3485 if (ASSEMBLER_DIALECT == 0)
3486 putc ('$', file);
3487 fprintf (file, "0x%lx", l);
3490 /* These float cases don't actually occur as immediate operands. */
3491 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
3493 REAL_VALUE_TYPE r;
3494 char dstr[30];
3496 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3497 REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr);
3498 fprintf (file, "%s", dstr);
3501 else if (GET_CODE (x) == CONST_DOUBLE
3502 && (GET_MODE (x) == XFmode || GET_MODE (x) == TFmode))
3504 REAL_VALUE_TYPE r;
3505 char dstr[30];
3507 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3508 REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr);
3509 fprintf (file, "%s", dstr);
3511 else
3513 if (code != 'P')
3515 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
3517 if (ASSEMBLER_DIALECT == 0)
3518 putc ('$', file);
3520 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
3521 || GET_CODE (x) == LABEL_REF)
3523 if (ASSEMBLER_DIALECT == 0)
3524 putc ('$', file);
3525 else
3526 fputs ("OFFSET FLAT:", file);
3529 if (GET_CODE (x) == CONST_INT)
3530 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
3531 else if (flag_pic)
3532 output_pic_addr_const (file, x, code);
3533 else
3534 output_addr_const (file, x);
3538 /* Print a memory operand whose address is ADDR. */
3540 void
3541 print_operand_address (file, addr)
3542 FILE *file;
3543 register rtx addr;
3545 struct ix86_address parts;
3546 rtx base, index, disp;
3547 int scale;
3549 if (! ix86_decompose_address (addr, &parts))
3550 abort ();
3552 base = parts.base;
3553 index = parts.index;
3554 disp = parts.disp;
3555 scale = parts.scale;
3557 if (!base && !index)
3559 /* Displacement only requires special attention. */
3561 if (GET_CODE (disp) == CONST_INT)
3563 if (ASSEMBLER_DIALECT != 0)
3565 if (USER_LABEL_PREFIX[0] == 0)
3566 putc ('%', file);
3567 fputs ("ds:", file);
3569 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (addr));
3571 else if (flag_pic)
3572 output_pic_addr_const (file, addr, 0);
3573 else
3574 output_addr_const (file, addr);
3576 else
3578 if (ASSEMBLER_DIALECT == 0)
3580 if (disp)
3582 if (flag_pic)
3583 output_pic_addr_const (file, disp, 0);
3584 else if (GET_CODE (disp) == LABEL_REF)
3585 output_asm_label (disp);
3586 else
3587 output_addr_const (file, disp);
3590 putc ('(', file);
3591 if (base)
3592 PRINT_REG (base, 0, file);
3593 if (index)
3595 putc (',', file);
3596 PRINT_REG (index, 0, file);
3597 if (scale != 1)
3598 fprintf (file, ",%d", scale);
3600 putc (')', file);
3602 else
3604 rtx offset = NULL_RTX;
3606 if (disp)
3608 /* Pull out the offset of a symbol; print any symbol itself. */
3609 if (GET_CODE (disp) == CONST
3610 && GET_CODE (XEXP (disp, 0)) == PLUS
3611 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
3613 offset = XEXP (XEXP (disp, 0), 1);
3614 disp = gen_rtx_CONST (VOIDmode,
3615 XEXP (XEXP (disp, 0), 0));
3618 if (flag_pic)
3619 output_pic_addr_const (file, disp, 0);
3620 else if (GET_CODE (disp) == LABEL_REF)
3621 output_asm_label (disp);
3622 else if (GET_CODE (disp) == CONST_INT)
3623 offset = disp;
3624 else
3625 output_addr_const (file, disp);
3628 putc ('[', file);
3629 if (base)
3631 PRINT_REG (base, 0, file);
3632 if (offset)
3634 if (INTVAL (offset) >= 0)
3635 putc ('+', file);
3636 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
3639 else if (offset)
3640 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
3641 else
3642 putc ('0', file);
3644 if (index)
3646 putc ('+', file);
3647 PRINT_REG (index, 0, file);
3648 if (scale != 1)
3649 fprintf (file, "*%d", scale);
3651 putc (']', file);
3656 /* Split one or more DImode RTL references into pairs of SImode
3657 references. The RTL can be REG, offsettable MEM, integer constant, or
3658 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
3659 split and "num" is its length. lo_half and hi_half are output arrays
3660 that parallel "operands". */
3662 void
3663 split_di (operands, num, lo_half, hi_half)
3664 rtx operands[];
3665 int num;
3666 rtx lo_half[], hi_half[];
3668 while (num--)
3670 rtx op = operands[num];
3671 if (CONSTANT_P (op))
3672 split_double (op, &lo_half[num], &hi_half[num]);
3673 else if (! reload_completed)
3675 lo_half[num] = gen_lowpart (SImode, op);
3676 hi_half[num] = gen_highpart (SImode, op);
3678 else if (GET_CODE (op) == REG)
3680 lo_half[num] = gen_rtx_REG (SImode, REGNO (op));
3681 hi_half[num] = gen_rtx_REG (SImode, REGNO (op) + 1);
3683 else if (offsettable_memref_p (op))
3685 rtx lo_addr = XEXP (op, 0);
3686 rtx hi_addr = XEXP (adj_offsettable_operand (op, 4), 0);
3687 lo_half[num] = change_address (op, SImode, lo_addr);
3688 hi_half[num] = change_address (op, SImode, hi_addr);
3690 else
3691 abort ();
3695 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
3696 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
3697 is the expression of the binary operation. The output may either be
3698 emitted here, or returned to the caller, like all output_* functions.
3700 There is no guarantee that the operands are the same mode, as they
3701 might be within FLOAT or FLOAT_EXTEND expressions. */
3703 #ifndef SYSV386_COMPAT
3704 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
3705 wants to fix the assemblers because that causes incompatibility
3706 with gcc. No-one wants to fix gcc because that causes
3707 incompatibility with assemblers... You can use the option of
3708 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
3709 #define SYSV386_COMPAT 1
3710 #endif
3712 const char *
3713 output_387_binary_op (insn, operands)
3714 rtx insn;
3715 rtx *operands;
3717 static char buf[30];
3718 const char *p;
3720 #ifdef ENABLE_CHECKING
3721 /* Even if we do not want to check the inputs, this documents input
3722 constraints. Which helps in understanding the following code. */
3723 if (STACK_REG_P (operands[0])
3724 && ((REG_P (operands[1])
3725 && REGNO (operands[0]) == REGNO (operands[1])
3726 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
3727 || (REG_P (operands[2])
3728 && REGNO (operands[0]) == REGNO (operands[2])
3729 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
3730 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
3731 ; /* ok */
3732 else
3733 abort ();
3734 #endif
3736 switch (GET_CODE (operands[3]))
3738 case PLUS:
3739 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
3740 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
3741 p = "fiadd";
3742 else
3743 p = "fadd";
3744 break;
3746 case MINUS:
3747 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
3748 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
3749 p = "fisub";
3750 else
3751 p = "fsub";
3752 break;
3754 case MULT:
3755 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
3756 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
3757 p = "fimul";
3758 else
3759 p = "fmul";
3760 break;
3762 case DIV:
3763 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
3764 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
3765 p = "fidiv";
3766 else
3767 p = "fdiv";
3768 break;
3770 default:
3771 abort ();
3774 strcpy (buf, p);
3776 switch (GET_CODE (operands[3]))
3778 case MULT:
3779 case PLUS:
3780 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
3782 rtx temp = operands[2];
3783 operands[2] = operands[1];
3784 operands[1] = temp;
3787 /* know operands[0] == operands[1]. */
3789 if (GET_CODE (operands[2]) == MEM)
3791 p = "%z2\t%2";
3792 break;
3795 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
3797 if (STACK_TOP_P (operands[0]))
3798 /* How is it that we are storing to a dead operand[2]?
3799 Well, presumably operands[1] is dead too. We can't
3800 store the result to st(0) as st(0) gets popped on this
3801 instruction. Instead store to operands[2] (which I
3802 think has to be st(1)). st(1) will be popped later.
3803 gcc <= 2.8.1 didn't have this check and generated
3804 assembly code that the Unixware assembler rejected. */
3805 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
3806 else
3807 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
3808 break;
3811 if (STACK_TOP_P (operands[0]))
3812 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
3813 else
3814 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
3815 break;
3817 case MINUS:
3818 case DIV:
3819 if (GET_CODE (operands[1]) == MEM)
3821 p = "r%z1\t%1";
3822 break;
3825 if (GET_CODE (operands[2]) == MEM)
3827 p = "%z2\t%2";
3828 break;
3831 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
3833 #if SYSV386_COMPAT
3834 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
3835 derived assemblers, confusingly reverse the direction of
3836 the operation for fsub{r} and fdiv{r} when the
3837 destination register is not st(0). The Intel assembler
3838 doesn't have this brain damage. Read !SYSV386_COMPAT to
3839 figure out what the hardware really does. */
3840 if (STACK_TOP_P (operands[0]))
3841 p = "{p\t%0, %2|rp\t%2, %0}";
3842 else
3843 p = "{rp\t%2, %0|p\t%0, %2}";
3844 #else
3845 if (STACK_TOP_P (operands[0]))
3846 /* As above for fmul/fadd, we can't store to st(0). */
3847 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
3848 else
3849 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
3850 #endif
3851 break;
3854 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
3856 #if SYSV386_COMPAT
3857 if (STACK_TOP_P (operands[0]))
3858 p = "{rp\t%0, %1|p\t%1, %0}";
3859 else
3860 p = "{p\t%1, %0|rp\t%0, %1}";
3861 #else
3862 if (STACK_TOP_P (operands[0]))
3863 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
3864 else
3865 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
3866 #endif
3867 break;
3870 if (STACK_TOP_P (operands[0]))
3872 if (STACK_TOP_P (operands[1]))
3873 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
3874 else
3875 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
3876 break;
3878 else if (STACK_TOP_P (operands[1]))
3880 #if SYSV386_COMPAT
3881 p = "{\t%1, %0|r\t%0, %1}";
3882 #else
3883 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
3884 #endif
3886 else
3888 #if SYSV386_COMPAT
3889 p = "{r\t%2, %0|\t%0, %2}";
3890 #else
3891 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
3892 #endif
3894 break;
3896 default:
3897 abort ();
3900 strcat (buf, p);
3901 return buf;
3904 /* Output code for INSN to convert a float to a signed int. OPERANDS
3905 are the insn operands. The output may be [HSD]Imode and the input
3906 operand may be [SDX]Fmode. */
3908 const char *
3909 output_fix_trunc (insn, operands)
3910 rtx insn;
3911 rtx *operands;
3913 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
3914 int dimode_p = GET_MODE (operands[0]) == DImode;
3915 rtx xops[4];
3917 /* Jump through a hoop or two for DImode, since the hardware has no
3918 non-popping instruction. We used to do this a different way, but
3919 that was somewhat fragile and broke with post-reload splitters. */
3920 if (dimode_p && !stack_top_dies)
3921 output_asm_insn ("fld\t%y1", operands);
3923 if (! STACK_TOP_P (operands[1]))
3924 abort ();
3926 xops[0] = GEN_INT (12);
3927 xops[1] = adj_offsettable_operand (operands[2], 1);
3928 xops[1] = change_address (xops[1], QImode, NULL_RTX);
3930 xops[2] = operands[0];
3931 if (GET_CODE (operands[0]) != MEM)
3932 xops[2] = operands[3];
3934 output_asm_insn ("fnstcw\t%2", operands);
3935 output_asm_insn ("mov{l}\t{%2, %4|%4, %2}", operands);
3936 output_asm_insn ("mov{b}\t{%0, %1|%1, %0}", xops);
3937 output_asm_insn ("fldcw\t%2", operands);
3938 output_asm_insn ("mov{l}\t{%4, %2|%2, %4}", operands);
3940 if (stack_top_dies || dimode_p)
3941 output_asm_insn ("fistp%z2\t%2", xops);
3942 else
3943 output_asm_insn ("fist%z2\t%2", xops);
3945 output_asm_insn ("fldcw\t%2", operands);
3947 if (GET_CODE (operands[0]) != MEM)
3949 if (dimode_p)
3951 split_di (operands+0, 1, xops+0, xops+1);
3952 split_di (operands+3, 1, xops+2, xops+3);
3953 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
3954 output_asm_insn ("mov{l}\t{%3, %1|%1, %3}", xops);
3956 else if (GET_MODE (operands[0]) == SImode)
3957 output_asm_insn ("mov{l}\t{%3, %0|%0, %3}", operands);
3958 else
3959 output_asm_insn ("mov{w}\t{%3, %0|%0, %3}", operands);
3962 return "";
3965 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
3966 should be used and 2 when fnstsw should be used. UNORDERED_P is true
3967 when fucom should be used. */
3969 const char *
3970 output_fp_compare (insn, operands, eflags_p, unordered_p)
3971 rtx insn;
3972 rtx *operands;
3973 int eflags_p, unordered_p;
3975 int stack_top_dies;
3976 rtx cmp_op0 = operands[0];
3977 rtx cmp_op1 = operands[1];
3979 if (eflags_p == 2)
3981 cmp_op0 = cmp_op1;
3982 cmp_op1 = operands[2];
3985 if (! STACK_TOP_P (cmp_op0))
3986 abort ();
3988 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
3990 if (STACK_REG_P (cmp_op1)
3991 && stack_top_dies
3992 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
3993 && REGNO (cmp_op1) != FIRST_STACK_REG)
3995 /* If both the top of the 387 stack dies, and the other operand
3996 is also a stack register that dies, then this must be a
3997 `fcompp' float compare */
3999 if (eflags_p == 1)
4001 /* There is no double popping fcomi variant. Fortunately,
4002 eflags is immune from the fstp's cc clobbering. */
4003 if (unordered_p)
4004 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
4005 else
4006 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
4007 return "fstp\t%y0";
4009 else
4011 if (eflags_p == 2)
4013 if (unordered_p)
4014 return "fucompp\n\tfnstsw\t%0";
4015 else
4016 return "fcompp\n\tfnstsw\t%0";
4018 else
4020 if (unordered_p)
4021 return "fucompp";
4022 else
4023 return "fcompp";
4027 else
4029 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
4031 static const char * const alt[24] =
4033 "fcom%z1\t%y1",
4034 "fcomp%z1\t%y1",
4035 "fucom%z1\t%y1",
4036 "fucomp%z1\t%y1",
4038 "ficom%z1\t%y1",
4039 "ficomp%z1\t%y1",
4040 NULL,
4041 NULL,
4043 "fcomi\t{%y1, %0|%0, %y1}",
4044 "fcomip\t{%y1, %0|%0, %y1}",
4045 "fucomi\t{%y1, %0|%0, %y1}",
4046 "fucomip\t{%y1, %0|%0, %y1}",
4048 NULL,
4049 NULL,
4050 NULL,
4051 NULL,
4053 "fcom%z2\t%y2\n\tfnstsw\t%0",
4054 "fcomp%z2\t%y2\n\tfnstsw\t%0",
4055 "fucom%z2\t%y2\n\tfnstsw\t%0",
4056 "fucomp%z2\t%y2\n\tfnstsw\t%0",
4058 "ficom%z2\t%y2\n\tfnstsw\t%0",
4059 "ficomp%z2\t%y2\n\tfnstsw\t%0",
4060 NULL,
4061 NULL
4064 int mask;
4065 const char *ret;
4067 mask = eflags_p << 3;
4068 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
4069 mask |= unordered_p << 1;
4070 mask |= stack_top_dies;
4072 if (mask >= 24)
4073 abort ();
4074 ret = alt[mask];
4075 if (ret == NULL)
4076 abort ();
4078 return ret;
4082 /* Output assembler code to FILE to initialize basic-block profiling.
4084 If profile_block_flag == 2
4086 Output code to call the subroutine `__bb_init_trace_func'
4087 and pass two parameters to it. The first parameter is
4088 the address of a block allocated in the object module.
4089 The second parameter is the number of the first basic block
4090 of the function.
4092 The name of the block is a local symbol made with this statement:
4094 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 0);
4096 Of course, since you are writing the definition of
4097 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
4098 can take a short cut in the definition of this macro and use the
4099 name that you know will result.
4101 The number of the first basic block of the function is
4102 passed to the macro in BLOCK_OR_LABEL.
4104 If described in a virtual assembler language the code to be
4105 output looks like:
4107 parameter1 <- LPBX0
4108 parameter2 <- BLOCK_OR_LABEL
4109 call __bb_init_trace_func
4111 else if profile_block_flag != 0
4113 Output code to call the subroutine `__bb_init_func'
4114 and pass one single parameter to it, which is the same
4115 as the first parameter to `__bb_init_trace_func'.
4117 The first word of this parameter is a flag which will be nonzero if
4118 the object module has already been initialized. So test this word
4119 first, and do not call `__bb_init_func' if the flag is nonzero.
4120 Note: When profile_block_flag == 2 the test need not be done
4121 but `__bb_init_trace_func' *must* be called.
4123 BLOCK_OR_LABEL may be used to generate a label number as a
4124 branch destination in case `__bb_init_func' will not be called.
4126 If described in a virtual assembler language the code to be
4127 output looks like:
4129 cmp (LPBX0),0
4130 jne local_label
4131 parameter1 <- LPBX0
4132 call __bb_init_func
4133 local_label:
4136 void
4137 ix86_output_function_block_profiler (file, block_or_label)
4138 FILE *file;
4139 int block_or_label;
4141 static int num_func = 0;
4142 rtx xops[8];
4143 char block_table[80], false_label[80];
4145 ASM_GENERATE_INTERNAL_LABEL (block_table, "LPBX", 0);
4147 xops[1] = gen_rtx_SYMBOL_REF (VOIDmode, block_table);
4148 xops[5] = stack_pointer_rtx;
4149 xops[7] = gen_rtx_REG (Pmode, 0); /* eax */
4151 CONSTANT_POOL_ADDRESS_P (xops[1]) = TRUE;
4153 switch (profile_block_flag)
4155 case 2:
4156 xops[2] = GEN_INT (block_or_label);
4157 xops[3] = gen_rtx_MEM (Pmode,
4158 gen_rtx_SYMBOL_REF (VOIDmode, "__bb_init_trace_func"));
4159 xops[6] = GEN_INT (8);
4161 output_asm_insn ("push{l}\t%2", xops);
4162 if (!flag_pic)
4163 output_asm_insn ("push{l}\t%1", xops);
4164 else
4166 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a1}", xops);
4167 output_asm_insn ("push{l}\t%7", xops);
4169 output_asm_insn ("call\t%P3", xops);
4170 output_asm_insn ("add{l}\t{%6, %5|%5, %6}", xops);
4171 break;
4173 default:
4174 ASM_GENERATE_INTERNAL_LABEL (false_label, "LPBZ", num_func);
4176 xops[0] = const0_rtx;
4177 xops[2] = gen_rtx_MEM (Pmode,
4178 gen_rtx_SYMBOL_REF (VOIDmode, false_label));
4179 xops[3] = gen_rtx_MEM (Pmode,
4180 gen_rtx_SYMBOL_REF (VOIDmode, "__bb_init_func"));
4181 xops[4] = gen_rtx_MEM (Pmode, xops[1]);
4182 xops[6] = GEN_INT (4);
4184 CONSTANT_POOL_ADDRESS_P (xops[2]) = TRUE;
4186 output_asm_insn ("cmp{l}\t{%0, %4|%4, %0}", xops);
4187 output_asm_insn ("jne\t%2", xops);
4189 if (!flag_pic)
4190 output_asm_insn ("push{l}\t%1", xops);
4191 else
4193 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a2}", xops);
4194 output_asm_insn ("push{l}\t%7", xops);
4196 output_asm_insn ("call\t%P3", xops);
4197 output_asm_insn ("add{l}\t{%6, %5|%5, %6}", xops);
4198 ASM_OUTPUT_INTERNAL_LABEL (file, "LPBZ", num_func);
4199 num_func++;
4200 break;
4204 /* Output assembler code to FILE to increment a counter associated
4205 with basic block number BLOCKNO.
4207 If profile_block_flag == 2
4209 Output code to initialize the global structure `__bb' and
4210 call the function `__bb_trace_func' which will increment the
4211 counter.
4213 `__bb' consists of two words. In the first word the number
4214 of the basic block has to be stored. In the second word
4215 the address of a block allocated in the object module
4216 has to be stored.
4218 The basic block number is given by BLOCKNO.
4220 The address of the block is given by the label created with
4222 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 0);
4224 by FUNCTION_BLOCK_PROFILER.
4226 Of course, since you are writing the definition of
4227 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
4228 can take a short cut in the definition of this macro and use the
4229 name that you know will result.
4231 If described in a virtual assembler language the code to be
4232 output looks like:
4234 move BLOCKNO -> (__bb)
4235 move LPBX0 -> (__bb+4)
4236 call __bb_trace_func
4238 Note that function `__bb_trace_func' must not change the
4239 machine state, especially the flag register. To grant
4240 this, you must output code to save and restore registers
4241 either in this macro or in the macros MACHINE_STATE_SAVE
4242 and MACHINE_STATE_RESTORE. The last two macros will be
4243 used in the function `__bb_trace_func', so you must make
4244 sure that the function prologue does not change any
4245 register prior to saving it with MACHINE_STATE_SAVE.
4247 else if profile_block_flag != 0
4249 Output code to increment the counter directly.
4250 Basic blocks are numbered separately from zero within each
4251 compiled object module. The count associated with block number
4252 BLOCKNO is at index BLOCKNO in an array of words; the name of
4253 this array is a local symbol made with this statement:
4255 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 2);
4257 Of course, since you are writing the definition of
4258 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
4259 can take a short cut in the definition of this macro and use the
4260 name that you know will result.
4262 If described in a virtual assembler language the code to be
4263 output looks like:
4265 inc (LPBX2+4*BLOCKNO)
4268 void
4269 ix86_output_block_profiler (file, blockno)
4270 FILE *file ATTRIBUTE_UNUSED;
4271 int blockno;
4273 rtx xops[8], cnt_rtx;
4274 char counts[80];
4275 char *block_table = counts;
4277 switch (profile_block_flag)
4279 case 2:
4280 ASM_GENERATE_INTERNAL_LABEL (block_table, "LPBX", 0);
4282 xops[1] = gen_rtx_SYMBOL_REF (VOIDmode, block_table);
4283 xops[2] = GEN_INT (blockno);
4284 xops[3] = gen_rtx_MEM (Pmode,
4285 gen_rtx_SYMBOL_REF (VOIDmode, "__bb_trace_func"));
4286 xops[4] = gen_rtx_SYMBOL_REF (VOIDmode, "__bb");
4287 xops[5] = plus_constant (xops[4], 4);
4288 xops[0] = gen_rtx_MEM (SImode, xops[4]);
4289 xops[6] = gen_rtx_MEM (SImode, xops[5]);
4291 CONSTANT_POOL_ADDRESS_P (xops[1]) = TRUE;
4293 output_asm_insn ("pushf", xops);
4294 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4295 if (flag_pic)
4297 xops[7] = gen_rtx_REG (Pmode, 0); /* eax */
4298 output_asm_insn ("push{l}\t%7", xops);
4299 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a1}", xops);
4300 output_asm_insn ("mov{l}\t{%7, %6|%6, %7}", xops);
4301 output_asm_insn ("pop{l}\t%7", xops);
4303 else
4304 output_asm_insn ("mov{l}\t{%1, %6|%6, %1}", xops);
4305 output_asm_insn ("call\t%P3", xops);
4306 output_asm_insn ("popf", xops);
4308 break;
4310 default:
4311 ASM_GENERATE_INTERNAL_LABEL (counts, "LPBX", 2);
4312 cnt_rtx = gen_rtx_SYMBOL_REF (VOIDmode, counts);
4313 SYMBOL_REF_FLAG (cnt_rtx) = TRUE;
4315 if (blockno)
4316 cnt_rtx = plus_constant (cnt_rtx, blockno*4);
4318 if (flag_pic)
4319 cnt_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, cnt_rtx);
4321 xops[0] = gen_rtx_MEM (SImode, cnt_rtx);
4322 output_asm_insn ("inc{l}\t%0", xops);
4324 break;
4328 void
4329 ix86_expand_move (mode, operands)
4330 enum machine_mode mode;
4331 rtx operands[];
4333 int strict = (reload_in_progress || reload_completed);
4334 rtx insn;
4336 if (flag_pic && mode == Pmode && symbolic_operand (operands[1], Pmode))
4338 /* Emit insns to move operands[1] into operands[0]. */
4340 if (GET_CODE (operands[0]) == MEM)
4341 operands[1] = force_reg (Pmode, operands[1]);
4342 else
4344 rtx temp = operands[0];
4345 if (GET_CODE (temp) != REG)
4346 temp = gen_reg_rtx (Pmode);
4347 temp = legitimize_pic_address (operands[1], temp);
4348 if (temp == operands[0])
4349 return;
4350 operands[1] = temp;
4353 else
4355 if (GET_CODE (operands[0]) == MEM
4356 && (GET_MODE (operands[0]) == QImode
4357 || !push_operand (operands[0], mode))
4358 && GET_CODE (operands[1]) == MEM)
4359 operands[1] = force_reg (mode, operands[1]);
4361 if (push_operand (operands[0], mode)
4362 && ! general_no_elim_operand (operands[1], mode))
4363 operands[1] = copy_to_mode_reg (mode, operands[1]);
4365 if (FLOAT_MODE_P (mode))
4367 /* If we are loading a floating point constant to a register,
4368 force the value to memory now, since we'll get better code
4369 out the back end. */
4371 if (strict)
4373 else if (GET_CODE (operands[1]) == CONST_DOUBLE
4374 && register_operand (operands[0], mode))
4375 operands[1] = validize_mem (force_const_mem (mode, operands[1]));
4379 insn = gen_rtx_SET (VOIDmode, operands[0], operands[1]);
4381 emit_insn (insn);
4384 /* Attempt to expand a binary operator. Make the expansion closer to the
4385 actual machine, then just general_operand, which will allow 3 separate
4386 memory references (one output, two input) in a single insn. */
4388 void
4389 ix86_expand_binary_operator (code, mode, operands)
4390 enum rtx_code code;
4391 enum machine_mode mode;
4392 rtx operands[];
4394 int matching_memory;
4395 rtx src1, src2, dst, op, clob;
4397 dst = operands[0];
4398 src1 = operands[1];
4399 src2 = operands[2];
4401 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
4402 if (GET_RTX_CLASS (code) == 'c'
4403 && (rtx_equal_p (dst, src2)
4404 || immediate_operand (src1, mode)))
4406 rtx temp = src1;
4407 src1 = src2;
4408 src2 = temp;
4411 /* If the destination is memory, and we do not have matching source
4412 operands, do things in registers. */
4413 matching_memory = 0;
4414 if (GET_CODE (dst) == MEM)
4416 if (rtx_equal_p (dst, src1))
4417 matching_memory = 1;
4418 else if (GET_RTX_CLASS (code) == 'c'
4419 && rtx_equal_p (dst, src2))
4420 matching_memory = 2;
4421 else
4422 dst = gen_reg_rtx (mode);
4425 /* Both source operands cannot be in memory. */
4426 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
4428 if (matching_memory != 2)
4429 src2 = force_reg (mode, src2);
4430 else
4431 src1 = force_reg (mode, src1);
4434 /* If the operation is not commutable, source 1 cannot be a constant
4435 or non-matching memory. */
4436 if ((CONSTANT_P (src1)
4437 || (!matching_memory && GET_CODE (src1) == MEM))
4438 && GET_RTX_CLASS (code) != 'c')
4439 src1 = force_reg (mode, src1);
4441 /* If optimizing, copy to regs to improve CSE */
4442 if (optimize && ! no_new_pseudos)
4444 if (GET_CODE (dst) == MEM)
4445 dst = gen_reg_rtx (mode);
4446 if (GET_CODE (src1) == MEM)
4447 src1 = force_reg (mode, src1);
4448 if (GET_CODE (src2) == MEM)
4449 src2 = force_reg (mode, src2);
4452 /* Emit the instruction. */
4454 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
4455 if (reload_in_progress)
4457 /* Reload doesn't know about the flags register, and doesn't know that
4458 it doesn't want to clobber it. We can only do this with PLUS. */
4459 if (code != PLUS)
4460 abort ();
4461 emit_insn (op);
4463 else
4465 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
4466 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
4469 /* Fix up the destination if needed. */
4470 if (dst != operands[0])
4471 emit_move_insn (operands[0], dst);
4474 /* Return TRUE or FALSE depending on whether the binary operator meets the
4475 appropriate constraints. */
4478 ix86_binary_operator_ok (code, mode, operands)
4479 enum rtx_code code;
4480 enum machine_mode mode ATTRIBUTE_UNUSED;
4481 rtx operands[3];
4483 /* Both source operands cannot be in memory. */
4484 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
4485 return 0;
4486 /* If the operation is not commutable, source 1 cannot be a constant. */
4487 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
4488 return 0;
4489 /* If the destination is memory, we must have a matching source operand. */
4490 if (GET_CODE (operands[0]) == MEM
4491 && ! (rtx_equal_p (operands[0], operands[1])
4492 || (GET_RTX_CLASS (code) == 'c'
4493 && rtx_equal_p (operands[0], operands[2]))))
4494 return 0;
4495 /* If the operation is not commutable and the source 1 is memory, we must
4496 have a matching destionation. */
4497 if (GET_CODE (operands[1]) == MEM
4498 && GET_RTX_CLASS (code) != 'c'
4499 && ! rtx_equal_p (operands[0], operands[1]))
4500 return 0;
4501 return 1;
4504 /* Attempt to expand a unary operator. Make the expansion closer to the
4505 actual machine, then just general_operand, which will allow 2 separate
4506 memory references (one output, one input) in a single insn. */
4508 void
4509 ix86_expand_unary_operator (code, mode, operands)
4510 enum rtx_code code;
4511 enum machine_mode mode;
4512 rtx operands[];
4514 int matching_memory;
4515 rtx src, dst, op, clob;
4517 dst = operands[0];
4518 src = operands[1];
4520 /* If the destination is memory, and we do not have matching source
4521 operands, do things in registers. */
4522 matching_memory = 0;
4523 if (GET_CODE (dst) == MEM)
4525 if (rtx_equal_p (dst, src))
4526 matching_memory = 1;
4527 else
4528 dst = gen_reg_rtx (mode);
4531 /* When source operand is memory, destination must match. */
4532 if (!matching_memory && GET_CODE (src) == MEM)
4533 src = force_reg (mode, src);
4535 /* If optimizing, copy to regs to improve CSE */
4536 if (optimize && ! no_new_pseudos)
4538 if (GET_CODE (dst) == MEM)
4539 dst = gen_reg_rtx (mode);
4540 if (GET_CODE (src) == MEM)
4541 src = force_reg (mode, src);
4544 /* Emit the instruction. */
4546 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
4547 if (reload_in_progress || code == NOT)
4549 /* Reload doesn't know about the flags register, and doesn't know that
4550 it doesn't want to clobber it. */
4551 if (code != NOT)
4552 abort ();
4553 emit_insn (op);
4555 else
4557 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
4558 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
4561 /* Fix up the destination if needed. */
4562 if (dst != operands[0])
4563 emit_move_insn (operands[0], dst);
4566 /* Return TRUE or FALSE depending on whether the unary operator meets the
4567 appropriate constraints. */
4570 ix86_unary_operator_ok (code, mode, operands)
4571 enum rtx_code code ATTRIBUTE_UNUSED;
4572 enum machine_mode mode ATTRIBUTE_UNUSED;
4573 rtx operands[2] ATTRIBUTE_UNUSED;
4575 /* If one of operands is memory, source and destination must match. */
4576 if ((GET_CODE (operands[0]) == MEM
4577 || GET_CODE (operands[1]) == MEM)
4578 && ! rtx_equal_p (operands[0], operands[1]))
4579 return FALSE;
4580 return TRUE;
4583 /* Return TRUE or FALSE depending on whether the first SET in INSN
4584 has source and destination with matching CC modes, and that the
4585 CC mode is at least as constrained as REQ_MODE. */
4588 ix86_match_ccmode (insn, req_mode)
4589 rtx insn;
4590 enum machine_mode req_mode;
4592 rtx set;
4593 enum machine_mode set_mode;
4595 set = PATTERN (insn);
4596 if (GET_CODE (set) == PARALLEL)
4597 set = XVECEXP (set, 0, 0);
4598 if (GET_CODE (set) != SET)
4599 abort ();
4600 if (GET_CODE (SET_SRC (set)) != COMPARE)
4601 abort ();
4603 set_mode = GET_MODE (SET_DEST (set));
4604 switch (set_mode)
4606 case CCNOmode:
4607 if (req_mode != CCNOmode
4608 && (req_mode != CCmode
4609 || XEXP (SET_SRC (set), 1) != const0_rtx))
4610 return 0;
4611 break;
4612 case CCmode:
4613 if (req_mode == CCGCmode)
4614 return 0;
4615 /* FALLTHRU */
4616 case CCGCmode:
4617 if (req_mode == CCGOCmode || req_mode == CCNOmode)
4618 return 0;
4619 /* FALLTHRU */
4620 case CCGOCmode:
4621 if (req_mode == CCZmode)
4622 return 0;
4623 /* FALLTHRU */
4624 case CCZmode:
4625 break;
4627 default:
4628 abort ();
4631 return (GET_MODE (SET_SRC (set)) == set_mode);
4634 /* Generate insn patterns to do an integer compare of OPERANDS. */
4636 static rtx
4637 ix86_expand_int_compare (code, op0, op1)
4638 enum rtx_code code;
4639 rtx op0, op1;
4641 enum machine_mode cmpmode;
4642 rtx tmp, flags;
4644 cmpmode = SELECT_CC_MODE (code, op0, op1);
4645 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
4647 /* This is very simple, but making the interface the same as in the
4648 FP case makes the rest of the code easier. */
4649 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
4650 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
4652 /* Return the test that should be put into the flags user, i.e.
4653 the bcc, scc, or cmov instruction. */
4654 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
4657 /* Figure out whether to use ordered or unordered fp comparisons.
4658 Return the appropriate mode to use. */
4660 enum machine_mode
4661 ix86_fp_compare_mode (code)
4662 enum rtx_code code ATTRIBUTE_UNUSED;
4664 /* ??? In order to make all comparisons reversible, we do all comparisons
4665 non-trapping when compiling for IEEE. Once gcc is able to distinguish
4666 all forms trapping and nontrapping comparisons, we can make inequality
4667 comparisons trapping again, since it results in better code when using
4668 FCOM based compares. */
4669 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
4672 enum machine_mode
4673 ix86_cc_mode (code, op0, op1)
4674 enum rtx_code code;
4675 rtx op0, op1;
4677 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
4678 return ix86_fp_compare_mode (code);
4679 switch (code)
4681 /* Only zero flag is needed. */
4682 case EQ: /* ZF=0 */
4683 case NE: /* ZF!=0 */
4684 return CCZmode;
4685 /* Codes needing carry flag. */
4686 case GEU: /* CF=0 */
4687 case GTU: /* CF=0 & ZF=0 */
4688 case LTU: /* CF=1 */
4689 case LEU: /* CF=1 | ZF=1 */
4690 return CCmode;
4691 /* Codes possibly doable only with sign flag when
4692 comparing against zero. */
4693 case GE: /* SF=OF or SF=0 */
4694 case LT: /* SF<>OF or SF=1 */
4695 if (op1 == const0_rtx)
4696 return CCGOCmode;
4697 else
4698 /* For other cases Carry flag is not required. */
4699 return CCGCmode;
4700 /* Codes doable only with sign flag when comparing
4701 against zero, but we miss jump instruction for it
4702 so we need to use relational tests agains overflow
4703 that thus needs to be zero. */
4704 case GT: /* ZF=0 & SF=OF */
4705 case LE: /* ZF=1 | SF<>OF */
4706 if (op1 == const0_rtx)
4707 return CCNOmode;
4708 else
4709 return CCGCmode;
4710 default:
4711 abort ();
4715 /* Return true if we should use an FCOMI instruction for this fp comparison. */
4718 ix86_use_fcomi_compare (code)
4719 enum rtx_code code ATTRIBUTE_UNUSED;
4721 enum rtx_code swapped_code = swap_condition (code);
4722 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
4723 || (ix86_fp_comparison_cost (swapped_code)
4724 == ix86_fp_comparison_fcomi_cost (swapped_code)));
4727 /* Swap, force into registers, or otherwise massage the two operands
4728 to a fp comparison. The operands are updated in place; the new
4729 comparsion code is returned. */
4731 static enum rtx_code
4732 ix86_prepare_fp_compare_args (code, pop0, pop1)
4733 enum rtx_code code;
4734 rtx *pop0, *pop1;
4736 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
4737 rtx op0 = *pop0, op1 = *pop1;
4738 enum machine_mode op_mode = GET_MODE (op0);
4740 /* All of the unordered compare instructions only work on registers.
4741 The same is true of the XFmode compare instructions. The same is
4742 true of the fcomi compare instructions. */
4744 if (fpcmp_mode == CCFPUmode
4745 || op_mode == XFmode
4746 || op_mode == TFmode
4747 || ix86_use_fcomi_compare (code))
4749 op0 = force_reg (op_mode, op0);
4750 op1 = force_reg (op_mode, op1);
4752 else
4754 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
4755 things around if they appear profitable, otherwise force op0
4756 into a register. */
4758 if (standard_80387_constant_p (op0) == 0
4759 || (GET_CODE (op0) == MEM
4760 && ! (standard_80387_constant_p (op1) == 0
4761 || GET_CODE (op1) == MEM)))
4763 rtx tmp;
4764 tmp = op0, op0 = op1, op1 = tmp;
4765 code = swap_condition (code);
4768 if (GET_CODE (op0) != REG)
4769 op0 = force_reg (op_mode, op0);
4771 if (CONSTANT_P (op1))
4773 if (standard_80387_constant_p (op1))
4774 op1 = force_reg (op_mode, op1);
4775 else
4776 op1 = validize_mem (force_const_mem (op_mode, op1));
4780 /* Try to rearrange the comparison to make it cheaper. */
4781 if (ix86_fp_comparison_cost (code)
4782 > ix86_fp_comparison_cost (swap_condition (code))
4783 && (GET_CODE (op0) == REG || !reload_completed))
4785 rtx tmp;
4786 tmp = op0, op0 = op1, op1 = tmp;
4787 code = swap_condition (code);
4788 if (GET_CODE (op0) != REG)
4789 op0 = force_reg (op_mode, op0);
4792 *pop0 = op0;
4793 *pop1 = op1;
4794 return code;
4797 /* Convert comparison codes we use to represent FP comparison to integer
4798 code that will result in proper branch. Return UNKNOWN if no such code
4799 is available. */
4800 static enum rtx_code
4801 ix86_fp_compare_code_to_integer (code)
4802 enum rtx_code code;
4804 switch (code)
4806 case GT:
4807 return GTU;
4808 case GE:
4809 return GEU;
4810 case ORDERED:
4811 case UNORDERED:
4812 return code;
4813 break;
4814 case UNEQ:
4815 return EQ;
4816 break;
4817 case UNLT:
4818 return LTU;
4819 break;
4820 case UNLE:
4821 return LEU;
4822 break;
4823 case LTGT:
4824 return NE;
4825 break;
4826 default:
4827 return UNKNOWN;
4831 /* Split comparison code CODE into comparisons we can do using branch
4832 instructions. BYPASS_CODE is comparison code for branch that will
4833 branch around FIRST_CODE and SECOND_CODE. If some of branches
4834 is not required, set value to NIL.
4835 We never require more than two branches. */
4836 static void
4837 ix86_fp_comparison_codes (code, bypass_code, first_code, second_code)
4838 enum rtx_code code, *bypass_code, *first_code, *second_code;
4840 *first_code = code;
4841 *bypass_code = NIL;
4842 *second_code = NIL;
4844 /* The fcomi comparison sets flags as follows:
4846 cmp ZF PF CF
4847 > 0 0 0
4848 < 0 0 1
4849 = 1 0 0
4850 un 1 1 1 */
4852 switch (code)
4854 case GT: /* GTU - CF=0 & ZF=0 */
4855 case GE: /* GEU - CF=0 */
4856 case ORDERED: /* PF=0 */
4857 case UNORDERED: /* PF=1 */
4858 case UNEQ: /* EQ - ZF=1 */
4859 case UNLT: /* LTU - CF=1 */
4860 case UNLE: /* LEU - CF=1 | ZF=1 */
4861 case LTGT: /* EQ - ZF=0 */
4862 break;
4863 case LT: /* LTU - CF=1 - fails on unordered */
4864 *first_code = UNLT;
4865 *bypass_code = UNORDERED;
4866 break;
4867 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
4868 *first_code = UNLE;
4869 *bypass_code = UNORDERED;
4870 break;
4871 case EQ: /* EQ - ZF=1 - fails on unordered */
4872 *first_code = UNEQ;
4873 *bypass_code = UNORDERED;
4874 break;
4875 case NE: /* NE - ZF=0 - fails on unordered */
4876 *first_code = LTGT;
4877 *second_code = UNORDERED;
4878 break;
4879 case UNGE: /* GEU - CF=0 - fails on unordered */
4880 *first_code = GE;
4881 *second_code = UNORDERED;
4882 break;
4883 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
4884 *first_code = GT;
4885 *second_code = UNORDERED;
4886 break;
4887 default:
4888 abort ();
4890 if (!TARGET_IEEE_FP)
4892 *second_code = NIL;
4893 *bypass_code = NIL;
4897 /* Return cost of comparison done fcom + arithmetics operations on AX.
4898 All following functions do use number of instructions as an cost metrics.
4899 In future this should be tweaked to compute bytes for optimize_size and
4900 take into account performance of various instructions on various CPUs. */
4901 static int
4902 ix86_fp_comparison_arithmetics_cost (code)
4903 enum rtx_code code;
4905 if (!TARGET_IEEE_FP)
4906 return 4;
4907 /* The cost of code output by ix86_expand_fp_compare. */
4908 switch (code)
4910 case UNLE:
4911 case UNLT:
4912 case LTGT:
4913 case GT:
4914 case GE:
4915 case UNORDERED:
4916 case ORDERED:
4917 case UNEQ:
4918 return 4;
4919 break;
4920 case LT:
4921 case NE:
4922 case EQ:
4923 case UNGE:
4924 return 5;
4925 break;
4926 case LE:
4927 case UNGT:
4928 return 6;
4929 break;
4930 default:
4931 abort ();
4935 /* Return cost of comparison done using fcomi operation.
4936 See ix86_fp_comparison_arithmetics_cost for the metrics. */
4937 static int
4938 ix86_fp_comparison_fcomi_cost (code)
4939 enum rtx_code code;
4941 enum rtx_code bypass_code, first_code, second_code;
4942 /* Return arbitarily high cost when instruction is not supported - this
4943 prevents gcc from using it. */
4944 if (!TARGET_CMOVE)
4945 return 1024;
4946 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
4947 return (bypass_code != NIL || second_code != NIL) + 2;
4950 /* Return cost of comparison done using sahf operation.
4951 See ix86_fp_comparison_arithmetics_cost for the metrics. */
4952 static int
4953 ix86_fp_comparison_sahf_cost (code)
4954 enum rtx_code code;
4956 enum rtx_code bypass_code, first_code, second_code;
4957 /* Return arbitarily high cost when instruction is not preferred - this
4958 avoids gcc from using it. */
4959 if (!TARGET_USE_SAHF && !optimize_size)
4960 return 1024;
4961 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
4962 return (bypass_code != NIL || second_code != NIL) + 3;
4965 /* Compute cost of the comparison done using any method.
4966 See ix86_fp_comparison_arithmetics_cost for the metrics. */
4967 static int
4968 ix86_fp_comparison_cost (code)
4969 enum rtx_code code;
4971 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
4972 int min;
4974 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
4975 sahf_cost = ix86_fp_comparison_sahf_cost (code);
4977 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
4978 if (min > sahf_cost)
4979 min = sahf_cost;
4980 if (min > fcomi_cost)
4981 min = fcomi_cost;
4982 return min;
4985 /* Generate insn patterns to do a floating point compare of OPERANDS. */
4987 static rtx
4988 ix86_expand_fp_compare (code, op0, op1, scratch, second_test, bypass_test)
4989 enum rtx_code code;
4990 rtx op0, op1, scratch;
4991 rtx *second_test;
4992 rtx *bypass_test;
4994 enum machine_mode fpcmp_mode, intcmp_mode;
4995 rtx tmp, tmp2;
4996 int cost = ix86_fp_comparison_cost (code);
4997 enum rtx_code bypass_code, first_code, second_code;
4999 fpcmp_mode = ix86_fp_compare_mode (code);
5000 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
5002 if (second_test)
5003 *second_test = NULL_RTX;
5004 if (bypass_test)
5005 *bypass_test = NULL_RTX;
5007 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
5009 /* Do fcomi/sahf based test when profitable. */
5010 if ((bypass_code == NIL || bypass_test)
5011 && (second_code == NIL || second_test)
5012 && ix86_fp_comparison_arithmetics_cost (code) > cost)
5014 if (TARGET_CMOVE)
5016 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
5017 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
5018 tmp);
5019 emit_insn (tmp);
5021 else
5023 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
5024 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), 9);
5025 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
5026 emit_insn (gen_x86_sahf_1 (scratch));
5029 /* The FP codes work out to act like unsigned. */
5030 intcmp_mode = fpcmp_mode;
5031 code = first_code;
5032 if (bypass_code != NIL)
5033 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
5034 gen_rtx_REG (intcmp_mode, FLAGS_REG),
5035 const0_rtx);
5036 if (second_code != NIL)
5037 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
5038 gen_rtx_REG (intcmp_mode, FLAGS_REG),
5039 const0_rtx);
5041 else
5043 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
5044 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
5045 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), 9);
5046 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
5048 /* In the unordered case, we have to check C2 for NaN's, which
5049 doesn't happen to work out to anything nice combination-wise.
5050 So do some bit twiddling on the value we've got in AH to come
5051 up with an appropriate set of condition codes. */
5053 intcmp_mode = CCNOmode;
5054 switch (code)
5056 case GT:
5057 case UNGT:
5058 if (code == GT || !TARGET_IEEE_FP)
5060 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
5061 code = EQ;
5063 else
5065 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
5066 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
5067 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
5068 intcmp_mode = CCmode;
5069 code = GEU;
5071 break;
5072 case LT:
5073 case UNLT:
5074 if (code == LT && TARGET_IEEE_FP)
5076 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
5077 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
5078 intcmp_mode = CCmode;
5079 code = EQ;
5081 else
5083 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
5084 code = NE;
5086 break;
5087 case GE:
5088 case UNGE:
5089 if (code == GE || !TARGET_IEEE_FP)
5091 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
5092 code = EQ;
5094 else
5096 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
5097 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
5098 GEN_INT (0x01)));
5099 code = NE;
5101 break;
5102 case LE:
5103 case UNLE:
5104 if (code == LE && TARGET_IEEE_FP)
5106 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
5107 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
5108 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
5109 intcmp_mode = CCmode;
5110 code = LTU;
5112 else
5114 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
5115 code = NE;
5117 break;
5118 case EQ:
5119 case UNEQ:
5120 if (code == EQ && TARGET_IEEE_FP)
5122 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
5123 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
5124 intcmp_mode = CCmode;
5125 code = EQ;
5127 else
5129 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
5130 code = NE;
5131 break;
5133 break;
5134 case NE:
5135 case LTGT:
5136 if (code == NE && TARGET_IEEE_FP)
5138 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
5139 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
5140 GEN_INT (0x40)));
5141 code = NE;
5143 else
5145 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
5146 code = EQ;
5148 break;
5150 case UNORDERED:
5151 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
5152 code = NE;
5153 break;
5154 case ORDERED:
5155 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
5156 code = EQ;
5157 break;
5159 default:
5160 abort ();
5164 /* Return the test that should be put into the flags user, i.e.
5165 the bcc, scc, or cmov instruction. */
5166 return gen_rtx_fmt_ee (code, VOIDmode,
5167 gen_rtx_REG (intcmp_mode, FLAGS_REG),
5168 const0_rtx);
5172 ix86_expand_compare (code, second_test, bypass_test)
5173 enum rtx_code code;
5174 rtx *second_test, *bypass_test;
5176 rtx op0, op1, ret;
5177 op0 = ix86_compare_op0;
5178 op1 = ix86_compare_op1;
5180 if (second_test)
5181 *second_test = NULL_RTX;
5182 if (bypass_test)
5183 *bypass_test = NULL_RTX;
5185 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
5186 ret = ix86_expand_fp_compare (code, op0, op1, gen_reg_rtx (HImode),
5187 second_test, bypass_test);
5188 else
5189 ret = ix86_expand_int_compare (code, op0, op1);
5191 return ret;
5194 void
5195 ix86_expand_branch (code, label)
5196 enum rtx_code code;
5197 rtx label;
5199 rtx tmp;
5201 switch (GET_MODE (ix86_compare_op0))
5203 case QImode:
5204 case HImode:
5205 case SImode:
5206 tmp = ix86_expand_compare (code, NULL, NULL);
5207 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
5208 gen_rtx_LABEL_REF (VOIDmode, label),
5209 pc_rtx);
5210 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
5211 return;
5213 case SFmode:
5214 case DFmode:
5215 case XFmode:
5216 case TFmode:
5217 /* Don't expand the comparison early, so that we get better code
5218 when jump or whoever decides to reverse the comparison. */
5220 rtvec vec;
5221 int use_fcomi;
5223 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
5224 &ix86_compare_op1);
5226 tmp = gen_rtx_fmt_ee (code, VOIDmode,
5227 ix86_compare_op0, ix86_compare_op1);
5228 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
5229 gen_rtx_LABEL_REF (VOIDmode, label),
5230 pc_rtx);
5231 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
5233 use_fcomi = ix86_use_fcomi_compare (code);
5234 vec = rtvec_alloc (3 + !use_fcomi);
5235 RTVEC_ELT (vec, 0) = tmp;
5236 RTVEC_ELT (vec, 1)
5237 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
5238 RTVEC_ELT (vec, 2)
5239 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
5240 if (! use_fcomi)
5241 RTVEC_ELT (vec, 3)
5242 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
5244 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
5245 return;
5248 case DImode:
5249 /* Expand DImode branch into multiple compare+branch. */
5251 rtx lo[2], hi[2], label2;
5252 enum rtx_code code1, code2, code3;
5254 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
5256 tmp = ix86_compare_op0;
5257 ix86_compare_op0 = ix86_compare_op1;
5258 ix86_compare_op1 = tmp;
5259 code = swap_condition (code);
5261 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
5262 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
5264 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
5265 avoid two branches. This costs one extra insn, so disable when
5266 optimizing for size. */
5268 if ((code == EQ || code == NE)
5269 && (!optimize_size
5270 || hi[1] == const0_rtx || lo[1] == const0_rtx))
5272 rtx xor0, xor1;
5274 xor1 = hi[0];
5275 if (hi[1] != const0_rtx)
5276 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
5277 NULL_RTX, 0, OPTAB_WIDEN);
5279 xor0 = lo[0];
5280 if (lo[1] != const0_rtx)
5281 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
5282 NULL_RTX, 0, OPTAB_WIDEN);
5284 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
5285 NULL_RTX, 0, OPTAB_WIDEN);
5287 ix86_compare_op0 = tmp;
5288 ix86_compare_op1 = const0_rtx;
5289 ix86_expand_branch (code, label);
5290 return;
5293 /* Otherwise, if we are doing less-than or greater-or-equal-than,
5294 op1 is a constant and the low word is zero, then we can just
5295 examine the high word. */
5297 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
5298 switch (code)
5300 case LT: case LTU: case GE: case GEU:
5301 ix86_compare_op0 = hi[0];
5302 ix86_compare_op1 = hi[1];
5303 ix86_expand_branch (code, label);
5304 return;
5305 default:
5306 break;
5309 /* Otherwise, we need two or three jumps. */
5311 label2 = gen_label_rtx ();
5313 code1 = code;
5314 code2 = swap_condition (code);
5315 code3 = unsigned_condition (code);
5317 switch (code)
5319 case LT: case GT: case LTU: case GTU:
5320 break;
5322 case LE: code1 = LT; code2 = GT; break;
5323 case GE: code1 = GT; code2 = LT; break;
5324 case LEU: code1 = LTU; code2 = GTU; break;
5325 case GEU: code1 = GTU; code2 = LTU; break;
5327 case EQ: code1 = NIL; code2 = NE; break;
5328 case NE: code2 = NIL; break;
5330 default:
5331 abort ();
5335 * a < b =>
5336 * if (hi(a) < hi(b)) goto true;
5337 * if (hi(a) > hi(b)) goto false;
5338 * if (lo(a) < lo(b)) goto true;
5339 * false:
5342 ix86_compare_op0 = hi[0];
5343 ix86_compare_op1 = hi[1];
5345 if (code1 != NIL)
5346 ix86_expand_branch (code1, label);
5347 if (code2 != NIL)
5348 ix86_expand_branch (code2, label2);
5350 ix86_compare_op0 = lo[0];
5351 ix86_compare_op1 = lo[1];
5352 ix86_expand_branch (code3, label);
5354 if (code2 != NIL)
5355 emit_label (label2);
5356 return;
5359 default:
5360 abort ();
5364 /* Split branch based on floating point condition. */
5365 void
5366 ix86_split_fp_branch (condition, op1, op2, target1, target2, tmp)
5367 rtx condition, op1, op2, target1, target2, tmp;
5369 rtx second, bypass;
5370 rtx label = NULL_RTX;
5371 enum rtx_code code = GET_CODE (condition);
5373 if (target2 != pc_rtx)
5375 rtx tmp = target2;
5376 code = reverse_condition_maybe_unordered (code);
5377 target2 = target1;
5378 target1 = tmp;
5381 condition = ix86_expand_fp_compare (code, op1, op2,
5382 tmp, &second, &bypass);
5383 if (bypass != NULL_RTX)
5385 label = gen_label_rtx ();
5386 emit_jump_insn (gen_rtx_SET
5387 (VOIDmode, pc_rtx,
5388 gen_rtx_IF_THEN_ELSE (VOIDmode,
5389 bypass,
5390 gen_rtx_LABEL_REF (VOIDmode,
5391 label),
5392 pc_rtx)));
5394 /* AMD Athlon and probably other CPUs too have fast bypass path between the
5395 comparison and first branch. The second branch takes longer to execute
5396 so place first branch the worse predicable one if possible. */
5397 if (second != NULL_RTX
5398 && (GET_CODE (second) == UNORDERED || GET_CODE (second) == ORDERED))
5400 rtx tmp = condition;
5401 condition = second;
5402 second = tmp;
5404 emit_jump_insn (gen_rtx_SET
5405 (VOIDmode, pc_rtx,
5406 gen_rtx_IF_THEN_ELSE (VOIDmode,
5407 condition, target1, target2)));
5408 if (second != NULL_RTX)
5409 emit_jump_insn (gen_rtx_SET
5410 (VOIDmode, pc_rtx,
5411 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1, target2)));
5412 if (label != NULL_RTX)
5413 emit_label (label);
5417 ix86_expand_setcc (code, dest)
5418 enum rtx_code code;
5419 rtx dest;
5421 rtx ret, tmp, tmpreg;
5422 rtx second_test, bypass_test;
5423 int type;
5425 if (GET_MODE (ix86_compare_op0) == DImode)
5426 return 0; /* FAIL */
5428 /* Three modes of generation:
5429 0 -- destination does not overlap compare sources:
5430 clear dest first, emit strict_low_part setcc.
5431 1 -- destination does overlap compare sources:
5432 emit subreg setcc, zero extend.
5433 2 -- destination is in QImode:
5434 emit setcc only.
5437 type = 0;
5439 if (GET_MODE (dest) == QImode)
5440 type = 2;
5441 else if (reg_overlap_mentioned_p (dest, ix86_compare_op0)
5442 || reg_overlap_mentioned_p (dest, ix86_compare_op1))
5443 type = 1;
5445 if (type == 0)
5446 emit_move_insn (dest, const0_rtx);
5448 ret = ix86_expand_compare (code, &second_test, &bypass_test);
5449 PUT_MODE (ret, QImode);
5451 tmp = dest;
5452 tmpreg = dest;
5453 if (type == 0)
5455 tmp = gen_lowpart (QImode, dest);
5456 tmpreg = tmp;
5457 tmp = gen_rtx_STRICT_LOW_PART (VOIDmode, tmp);
5459 else if (type == 1)
5461 if (!cse_not_expected)
5462 tmp = gen_reg_rtx (QImode);
5463 else
5464 tmp = gen_lowpart (QImode, dest);
5465 tmpreg = tmp;
5468 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
5469 if (bypass_test || second_test)
5471 rtx test = second_test;
5472 int bypass = 0;
5473 rtx tmp2 = gen_reg_rtx (QImode);
5474 if (bypass_test)
5476 if (second_test)
5477 abort();
5478 test = bypass_test;
5479 bypass = 1;
5480 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
5482 PUT_MODE (test, QImode);
5483 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
5485 if (bypass)
5486 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
5487 else
5488 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
5491 if (type == 1)
5493 rtx clob;
5495 tmp = gen_rtx_ZERO_EXTEND (GET_MODE (dest), tmp);
5496 tmp = gen_rtx_SET (VOIDmode, dest, tmp);
5497 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
5498 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
5499 emit_insn (tmp);
5502 return 1; /* DONE */
5506 ix86_expand_int_movcc (operands)
5507 rtx operands[];
5509 enum rtx_code code = GET_CODE (operands[1]), compare_code;
5510 rtx compare_seq, compare_op;
5511 rtx second_test, bypass_test;
5513 /* When the compare code is not LTU or GEU, we can not use sbbl case.
5514 In case comparsion is done with immediate, we can convert it to LTU or
5515 GEU by altering the integer. */
5517 if ((code == LEU || code == GTU)
5518 && GET_CODE (ix86_compare_op1) == CONST_INT
5519 && GET_MODE (operands[0]) != HImode
5520 && (unsigned int)INTVAL (ix86_compare_op1) != 0xffffffff
5521 && GET_CODE (operands[2]) == CONST_INT
5522 && GET_CODE (operands[3]) == CONST_INT)
5524 if (code == LEU)
5525 code = LTU;
5526 else
5527 code = GEU;
5528 ix86_compare_op1 = GEN_INT (INTVAL (ix86_compare_op1) + 1);
5531 start_sequence ();
5532 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
5533 compare_seq = gen_sequence ();
5534 end_sequence ();
5536 compare_code = GET_CODE (compare_op);
5538 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
5539 HImode insns, we'd be swallowed in word prefix ops. */
5541 if (GET_MODE (operands[0]) != HImode
5542 && GET_CODE (operands[2]) == CONST_INT
5543 && GET_CODE (operands[3]) == CONST_INT)
5545 rtx out = operands[0];
5546 HOST_WIDE_INT ct = INTVAL (operands[2]);
5547 HOST_WIDE_INT cf = INTVAL (operands[3]);
5548 HOST_WIDE_INT diff;
5550 if ((compare_code == LTU || compare_code == GEU)
5551 && !second_test && !bypass_test)
5554 /* Detect overlap between destination and compare sources. */
5555 rtx tmp = out;
5557 /* To simplify rest of code, restrict to the GEU case. */
5558 if (compare_code == LTU)
5560 int tmp = ct;
5561 ct = cf;
5562 cf = tmp;
5563 compare_code = reverse_condition (compare_code);
5564 code = reverse_condition (code);
5566 diff = ct - cf;
5568 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
5569 || reg_overlap_mentioned_p (out, ix86_compare_op1))
5570 tmp = gen_reg_rtx (SImode);
5572 emit_insn (compare_seq);
5573 emit_insn (gen_x86_movsicc_0_m1 (tmp));
5575 if (diff == 1)
5578 * cmpl op0,op1
5579 * sbbl dest,dest
5580 * [addl dest, ct]
5582 * Size 5 - 8.
5584 if (ct)
5585 emit_insn (gen_addsi3 (out, out, GEN_INT (ct)));
5587 else if (cf == -1)
5590 * cmpl op0,op1
5591 * sbbl dest,dest
5592 * orl $ct, dest
5594 * Size 8.
5596 emit_insn (gen_iorsi3 (out, out, GEN_INT (ct)));
5598 else if (diff == -1 && ct)
5601 * cmpl op0,op1
5602 * sbbl dest,dest
5603 * xorl $-1, dest
5604 * [addl dest, cf]
5606 * Size 8 - 11.
5608 emit_insn (gen_one_cmplsi2 (tmp, tmp));
5609 if (cf)
5610 emit_insn (gen_addsi3 (out, out, GEN_INT (cf)));
5612 else
5615 * cmpl op0,op1
5616 * sbbl dest,dest
5617 * andl cf - ct, dest
5618 * [addl dest, ct]
5620 * Size 8 - 11.
5622 emit_insn (gen_andsi3 (out, out, GEN_INT (cf - ct)));
5623 if (ct)
5624 emit_insn (gen_addsi3 (out, out, GEN_INT (ct)));
5627 if (tmp != out)
5628 emit_move_insn (out, tmp);
5630 return 1; /* DONE */
5633 diff = ct - cf;
5634 if (diff < 0)
5636 HOST_WIDE_INT tmp;
5637 tmp = ct, ct = cf, cf = tmp;
5638 diff = -diff;
5639 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
5641 /* We may be reversing unordered compare to normal compare, that
5642 is not valid in general (we may convert non-trapping condition
5643 to trapping one), however on i386 we currently emit all
5644 comparisons unordered. */
5645 compare_code = reverse_condition_maybe_unordered (compare_code);
5646 code = reverse_condition_maybe_unordered (code);
5648 else
5650 compare_code = reverse_condition (compare_code);
5651 code = reverse_condition (code);
5654 if (diff == 1 || diff == 2 || diff == 4 || diff == 8
5655 || diff == 3 || diff == 5 || diff == 9)
5658 * xorl dest,dest
5659 * cmpl op1,op2
5660 * setcc dest
5661 * lea cf(dest*(ct-cf)),dest
5663 * Size 14.
5665 * This also catches the degenerate setcc-only case.
5668 rtx tmp;
5669 int nops;
5671 out = emit_store_flag (out, code, ix86_compare_op0,
5672 ix86_compare_op1, VOIDmode, 0, 1);
5674 nops = 0;
5675 if (diff == 1)
5676 tmp = out;
5677 else
5679 tmp = gen_rtx_MULT (SImode, out, GEN_INT (diff & ~1));
5680 nops++;
5681 if (diff & 1)
5683 tmp = gen_rtx_PLUS (SImode, tmp, out);
5684 nops++;
5687 if (cf != 0)
5689 tmp = gen_rtx_PLUS (SImode, tmp, GEN_INT (cf));
5690 nops++;
5692 if (tmp != out)
5694 if (nops == 0)
5695 emit_move_insn (out, tmp);
5696 else if (nops == 1)
5698 rtx clob;
5700 clob = gen_rtx_REG (CCmode, FLAGS_REG);
5701 clob = gen_rtx_CLOBBER (VOIDmode, clob);
5703 tmp = gen_rtx_SET (VOIDmode, out, tmp);
5704 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
5705 emit_insn (tmp);
5707 else
5708 emit_insn (gen_rtx_SET (VOIDmode, out, tmp));
5710 if (out != operands[0])
5711 emit_move_insn (operands[0], out);
5713 return 1; /* DONE */
5717 * General case: Jumpful:
5718 * xorl dest,dest cmpl op1, op2
5719 * cmpl op1, op2 movl ct, dest
5720 * setcc dest jcc 1f
5721 * decl dest movl cf, dest
5722 * andl (cf-ct),dest 1:
5723 * addl ct,dest
5725 * Size 20. Size 14.
5727 * This is reasonably steep, but branch mispredict costs are
5728 * high on modern cpus, so consider failing only if optimizing
5729 * for space.
5731 * %%% Parameterize branch_cost on the tuning architecture, then
5732 * use that. The 80386 couldn't care less about mispredicts.
5735 if (!optimize_size && !TARGET_CMOVE)
5737 if (ct == 0)
5739 ct = cf;
5740 cf = 0;
5741 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
5743 /* We may be reversing unordered compare to normal compare,
5744 that is not valid in general (we may convert non-trapping
5745 condition to trapping one), however on i386 we currently
5746 emit all comparisons unordered. */
5747 compare_code = reverse_condition_maybe_unordered (compare_code);
5748 code = reverse_condition_maybe_unordered (code);
5750 else
5752 compare_code = reverse_condition (compare_code);
5753 code = reverse_condition (code);
5757 out = emit_store_flag (out, code, ix86_compare_op0,
5758 ix86_compare_op1, VOIDmode, 0, 1);
5760 emit_insn (gen_addsi3 (out, out, constm1_rtx));
5761 emit_insn (gen_andsi3 (out, out, GEN_INT (cf-ct)));
5762 if (ct != 0)
5763 emit_insn (gen_addsi3 (out, out, GEN_INT (ct)));
5764 if (out != operands[0])
5765 emit_move_insn (operands[0], out);
5767 return 1; /* DONE */
5771 if (!TARGET_CMOVE)
5773 /* Try a few things more with specific constants and a variable. */
5775 optab op;
5776 rtx var, orig_out, out, tmp;
5778 if (optimize_size)
5779 return 0; /* FAIL */
5781 /* If one of the two operands is an interesting constant, load a
5782 constant with the above and mask it in with a logical operation. */
5784 if (GET_CODE (operands[2]) == CONST_INT)
5786 var = operands[3];
5787 if (INTVAL (operands[2]) == 0)
5788 operands[3] = constm1_rtx, op = and_optab;
5789 else if (INTVAL (operands[2]) == -1)
5790 operands[3] = const0_rtx, op = ior_optab;
5791 else
5792 return 0; /* FAIL */
5794 else if (GET_CODE (operands[3]) == CONST_INT)
5796 var = operands[2];
5797 if (INTVAL (operands[3]) == 0)
5798 operands[2] = constm1_rtx, op = and_optab;
5799 else if (INTVAL (operands[3]) == -1)
5800 operands[2] = const0_rtx, op = ior_optab;
5801 else
5802 return 0; /* FAIL */
5804 else
5805 return 0; /* FAIL */
5807 orig_out = operands[0];
5808 tmp = gen_reg_rtx (GET_MODE (orig_out));
5809 operands[0] = tmp;
5811 /* Recurse to get the constant loaded. */
5812 if (ix86_expand_int_movcc (operands) == 0)
5813 return 0; /* FAIL */
5815 /* Mask in the interesting variable. */
5816 out = expand_binop (GET_MODE (orig_out), op, var, tmp, orig_out, 0,
5817 OPTAB_WIDEN);
5818 if (out != orig_out)
5819 emit_move_insn (orig_out, out);
5821 return 1; /* DONE */
5825 * For comparison with above,
5827 * movl cf,dest
5828 * movl ct,tmp
5829 * cmpl op1,op2
5830 * cmovcc tmp,dest
5832 * Size 15.
5835 if (! nonimmediate_operand (operands[2], GET_MODE (operands[0])))
5836 operands[2] = force_reg (GET_MODE (operands[0]), operands[2]);
5837 if (! nonimmediate_operand (operands[3], GET_MODE (operands[0])))
5838 operands[3] = force_reg (GET_MODE (operands[0]), operands[3]);
5840 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
5842 rtx tmp = gen_reg_rtx (GET_MODE (operands[0]));
5843 emit_move_insn (tmp, operands[3]);
5844 operands[3] = tmp;
5846 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
5848 rtx tmp = gen_reg_rtx (GET_MODE (operands[0]));
5849 emit_move_insn (tmp, operands[2]);
5850 operands[2] = tmp;
5853 emit_insn (compare_seq);
5854 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
5855 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
5856 compare_op, operands[2],
5857 operands[3])));
5858 if (bypass_test)
5859 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
5860 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
5861 bypass_test,
5862 operands[3],
5863 operands[0])));
5864 if (second_test)
5865 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
5866 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
5867 second_test,
5868 operands[2],
5869 operands[0])));
5871 return 1; /* DONE */
5875 ix86_expand_fp_movcc (operands)
5876 rtx operands[];
5878 enum rtx_code code;
5879 rtx tmp;
5880 rtx compare_op, second_test, bypass_test;
5882 /* The floating point conditional move instructions don't directly
5883 support conditions resulting from a signed integer comparison. */
5885 code = GET_CODE (operands[1]);
5886 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
5888 /* The floating point conditional move instructions don't directly
5889 support signed integer comparisons. */
5891 if (!fcmov_comparison_operator (compare_op, VOIDmode))
5893 if (second_test != NULL || bypass_test != NULL)
5894 abort();
5895 tmp = gen_reg_rtx (QImode);
5896 ix86_expand_setcc (code, tmp);
5897 code = NE;
5898 ix86_compare_op0 = tmp;
5899 ix86_compare_op1 = const0_rtx;
5900 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
5902 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
5904 tmp = gen_reg_rtx (GET_MODE (operands[0]));
5905 emit_move_insn (tmp, operands[3]);
5906 operands[3] = tmp;
5908 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
5910 tmp = gen_reg_rtx (GET_MODE (operands[0]));
5911 emit_move_insn (tmp, operands[2]);
5912 operands[2] = tmp;
5915 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
5916 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
5917 compare_op,
5918 operands[2],
5919 operands[3])));
5920 if (bypass_test)
5921 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
5922 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
5923 bypass_test,
5924 operands[3],
5925 operands[0])));
5926 if (second_test)
5927 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
5928 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
5929 second_test,
5930 operands[2],
5931 operands[0])));
5933 return 1;
5936 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
5937 works for floating pointer parameters and nonoffsetable memories.
5938 For pushes, it returns just stack offsets; the values will be saved
5939 in the right order. Maximally three parts are generated. */
5941 static int
5942 ix86_split_to_parts (operand, parts, mode)
5943 rtx operand;
5944 rtx *parts;
5945 enum machine_mode mode;
5947 int size = mode == TFmode ? 3 : GET_MODE_SIZE (mode) / 4;
5949 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
5950 abort ();
5951 if (size < 2 || size > 3)
5952 abort ();
5954 /* Optimize constant pool reference to immediates. This is used by fp moves,
5955 that force all constants to memory to allow combining. */
5957 if (GET_CODE (operand) == MEM
5958 && GET_CODE (XEXP (operand, 0)) == SYMBOL_REF
5959 && CONSTANT_POOL_ADDRESS_P (XEXP (operand, 0)))
5960 operand = get_pool_constant (XEXP (operand, 0));
5962 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
5964 /* The only non-offsetable memories we handle are pushes. */
5965 if (! push_operand (operand, VOIDmode))
5966 abort ();
5968 PUT_MODE (operand, SImode);
5969 parts[0] = parts[1] = parts[2] = operand;
5971 else
5973 if (mode == DImode)
5974 split_di (&operand, 1, &parts[0], &parts[1]);
5975 else
5977 if (REG_P (operand))
5979 if (!reload_completed)
5980 abort ();
5981 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
5982 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
5983 if (size == 3)
5984 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
5986 else if (offsettable_memref_p (operand))
5988 PUT_MODE (operand, SImode);
5989 parts[0] = operand;
5990 parts[1] = adj_offsettable_operand (operand, 4);
5991 if (size == 3)
5992 parts[2] = adj_offsettable_operand (operand, 8);
5994 else if (GET_CODE (operand) == CONST_DOUBLE)
5996 REAL_VALUE_TYPE r;
5997 long l[4];
5999 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
6000 switch (mode)
6002 case XFmode:
6003 case TFmode:
6004 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
6005 parts[2] = GEN_INT (l[2]);
6006 break;
6007 case DFmode:
6008 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
6009 break;
6010 default:
6011 abort ();
6013 parts[1] = GEN_INT (l[1]);
6014 parts[0] = GEN_INT (l[0]);
6016 else
6017 abort ();
6021 return size;
6024 /* Emit insns to perform a move or push of DI, DF, and XF values.
6025 Return false when normal moves are needed; true when all required
6026 insns have been emitted. Operands 2-4 contain the input values
6027 int the correct order; operands 5-7 contain the output values. */
6030 ix86_split_long_move (operands1)
6031 rtx operands1[];
6033 rtx part[2][3];
6034 rtx operands[2];
6035 int size;
6036 int push = 0;
6037 int collisions = 0;
6039 /* Make our own copy to avoid clobbering the operands. */
6040 operands[0] = copy_rtx (operands1[0]);
6041 operands[1] = copy_rtx (operands1[1]);
6043 /* The only non-offsettable memory we handle is push. */
6044 if (push_operand (operands[0], VOIDmode))
6045 push = 1;
6046 else if (GET_CODE (operands[0]) == MEM
6047 && ! offsettable_memref_p (operands[0]))
6048 abort ();
6050 size = ix86_split_to_parts (operands[0], part[0], GET_MODE (operands1[0]));
6051 ix86_split_to_parts (operands[1], part[1], GET_MODE (operands1[0]));
6053 /* When emitting push, take care for source operands on the stack. */
6054 if (push && GET_CODE (operands[1]) == MEM
6055 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
6057 if (size == 3)
6058 part[1][1] = part[1][2];
6059 part[1][0] = part[1][1];
6062 /* We need to do copy in the right order in case an address register
6063 of the source overlaps the destination. */
6064 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
6066 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
6067 collisions++;
6068 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
6069 collisions++;
6070 if (size == 3
6071 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
6072 collisions++;
6074 /* Collision in the middle part can be handled by reordering. */
6075 if (collisions == 1 && size == 3
6076 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
6078 rtx tmp;
6079 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
6080 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
6083 /* If there are more collisions, we can't handle it by reordering.
6084 Do an lea to the last part and use only one colliding move. */
6085 else if (collisions > 1)
6087 collisions = 1;
6088 emit_insn (gen_rtx_SET (VOIDmode, part[0][size - 1],
6089 XEXP (part[1][0], 0)));
6090 part[1][0] = change_address (part[1][0], SImode, part[0][size - 1]);
6091 part[1][1] = adj_offsettable_operand (part[1][0], 4);
6092 if (size == 3)
6093 part[1][2] = adj_offsettable_operand (part[1][0], 8);
6097 if (push)
6099 if (size == 3)
6101 /* We use only first 12 bytes of TFmode value, but for pushing we
6102 are required to adjust stack as if we were pushing real 16byte
6103 value. */
6104 if (GET_MODE (operands1[0]) == TFmode)
6105 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
6106 GEN_INT (-4)));
6107 emit_insn (gen_push (part[1][2]));
6109 emit_insn (gen_push (part[1][1]));
6110 emit_insn (gen_push (part[1][0]));
6111 return 1;
6114 /* Choose correct order to not overwrite the source before it is copied. */
6115 if ((REG_P (part[0][0])
6116 && REG_P (part[1][1])
6117 && (REGNO (part[0][0]) == REGNO (part[1][1])
6118 || (size == 3
6119 && REGNO (part[0][0]) == REGNO (part[1][2]))))
6120 || (collisions > 0
6121 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
6123 if (size == 3)
6125 operands1[2] = part[0][2];
6126 operands1[3] = part[0][1];
6127 operands1[4] = part[0][0];
6128 operands1[5] = part[1][2];
6129 operands1[6] = part[1][1];
6130 operands1[7] = part[1][0];
6132 else
6134 operands1[2] = part[0][1];
6135 operands1[3] = part[0][0];
6136 operands1[5] = part[1][1];
6137 operands1[6] = part[1][0];
6140 else
6142 if (size == 3)
6144 operands1[2] = part[0][0];
6145 operands1[3] = part[0][1];
6146 operands1[4] = part[0][2];
6147 operands1[5] = part[1][0];
6148 operands1[6] = part[1][1];
6149 operands1[7] = part[1][2];
6151 else
6153 operands1[2] = part[0][0];
6154 operands1[3] = part[0][1];
6155 operands1[5] = part[1][0];
6156 operands1[6] = part[1][1];
6160 return 0;
6163 void
6164 ix86_split_ashldi (operands, scratch)
6165 rtx *operands, scratch;
6167 rtx low[2], high[2];
6168 int count;
6170 if (GET_CODE (operands[2]) == CONST_INT)
6172 split_di (operands, 2, low, high);
6173 count = INTVAL (operands[2]) & 63;
6175 if (count >= 32)
6177 emit_move_insn (high[0], low[1]);
6178 emit_move_insn (low[0], const0_rtx);
6180 if (count > 32)
6181 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
6183 else
6185 if (!rtx_equal_p (operands[0], operands[1]))
6186 emit_move_insn (operands[0], operands[1]);
6187 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
6188 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
6191 else
6193 if (!rtx_equal_p (operands[0], operands[1]))
6194 emit_move_insn (operands[0], operands[1]);
6196 split_di (operands, 1, low, high);
6198 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
6199 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
6201 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
6203 if (! no_new_pseudos)
6204 scratch = force_reg (SImode, const0_rtx);
6205 else
6206 emit_move_insn (scratch, const0_rtx);
6208 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
6209 scratch));
6211 else
6212 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
6216 void
6217 ix86_split_ashrdi (operands, scratch)
6218 rtx *operands, scratch;
6220 rtx low[2], high[2];
6221 int count;
6223 if (GET_CODE (operands[2]) == CONST_INT)
6225 split_di (operands, 2, low, high);
6226 count = INTVAL (operands[2]) & 63;
6228 if (count >= 32)
6230 emit_move_insn (low[0], high[1]);
6232 if (! reload_completed)
6233 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
6234 else
6236 emit_move_insn (high[0], low[0]);
6237 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
6240 if (count > 32)
6241 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
6243 else
6245 if (!rtx_equal_p (operands[0], operands[1]))
6246 emit_move_insn (operands[0], operands[1]);
6247 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
6248 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
6251 else
6253 if (!rtx_equal_p (operands[0], operands[1]))
6254 emit_move_insn (operands[0], operands[1]);
6256 split_di (operands, 1, low, high);
6258 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
6259 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
6261 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
6263 if (! no_new_pseudos)
6264 scratch = gen_reg_rtx (SImode);
6265 emit_move_insn (scratch, high[0]);
6266 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
6267 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
6268 scratch));
6270 else
6271 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
6275 void
6276 ix86_split_lshrdi (operands, scratch)
6277 rtx *operands, scratch;
6279 rtx low[2], high[2];
6280 int count;
6282 if (GET_CODE (operands[2]) == CONST_INT)
6284 split_di (operands, 2, low, high);
6285 count = INTVAL (operands[2]) & 63;
6287 if (count >= 32)
6289 emit_move_insn (low[0], high[1]);
6290 emit_move_insn (high[0], const0_rtx);
6292 if (count > 32)
6293 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
6295 else
6297 if (!rtx_equal_p (operands[0], operands[1]))
6298 emit_move_insn (operands[0], operands[1]);
6299 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
6300 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
6303 else
6305 if (!rtx_equal_p (operands[0], operands[1]))
6306 emit_move_insn (operands[0], operands[1]);
6308 split_di (operands, 1, low, high);
6310 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
6311 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
6313 /* Heh. By reversing the arguments, we can reuse this pattern. */
6314 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
6316 if (! no_new_pseudos)
6317 scratch = force_reg (SImode, const0_rtx);
6318 else
6319 emit_move_insn (scratch, const0_rtx);
6321 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
6322 scratch));
6324 else
6325 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
6329 /* Expand the appropriate insns for doing strlen if not just doing
6330 repnz; scasb
6332 out = result, initialized with the start address
6333 align_rtx = alignment of the address.
6334 scratch = scratch register, initialized with the startaddress when
6335 not aligned, otherwise undefined
6337 This is just the body. It needs the initialisations mentioned above and
6338 some address computing at the end. These things are done in i386.md. */
6340 void
6341 ix86_expand_strlensi_unroll_1 (out, align_rtx, scratch)
6342 rtx out, align_rtx, scratch;
6344 int align;
6345 rtx tmp;
6346 rtx align_2_label = NULL_RTX;
6347 rtx align_3_label = NULL_RTX;
6348 rtx align_4_label = gen_label_rtx ();
6349 rtx end_0_label = gen_label_rtx ();
6350 rtx mem;
6351 rtx tmpreg = gen_reg_rtx (SImode);
6353 align = 0;
6354 if (GET_CODE (align_rtx) == CONST_INT)
6355 align = INTVAL (align_rtx);
6357 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
6359 /* Is there a known alignment and is it less than 4? */
6360 if (align < 4)
6362 /* Is there a known alignment and is it not 2? */
6363 if (align != 2)
6365 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
6366 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
6368 /* Leave just the 3 lower bits. */
6369 align_rtx = expand_binop (SImode, and_optab, scratch, GEN_INT (3),
6370 NULL_RTX, 0, OPTAB_WIDEN);
6372 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
6373 SImode, 1, 0, align_4_label);
6374 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL,
6375 SImode, 1, 0, align_2_label);
6376 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL,
6377 SImode, 1, 0, align_3_label);
6379 else
6381 /* Since the alignment is 2, we have to check 2 or 0 bytes;
6382 check if is aligned to 4 - byte. */
6384 align_rtx = expand_binop (SImode, and_optab, scratch, GEN_INT (2),
6385 NULL_RTX, 0, OPTAB_WIDEN);
6387 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
6388 SImode, 1, 0, align_4_label);
6391 mem = gen_rtx_MEM (QImode, out);
6393 /* Now compare the bytes. */
6395 /* Compare the first n unaligned byte on a byte per byte basis. */
6396 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
6397 QImode, 1, 0, end_0_label);
6399 /* Increment the address. */
6400 emit_insn (gen_addsi3 (out, out, const1_rtx));
6402 /* Not needed with an alignment of 2 */
6403 if (align != 2)
6405 emit_label (align_2_label);
6407 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
6408 QImode, 1, 0, end_0_label);
6410 emit_insn (gen_addsi3 (out, out, const1_rtx));
6412 emit_label (align_3_label);
6415 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
6416 QImode, 1, 0, end_0_label);
6418 emit_insn (gen_addsi3 (out, out, const1_rtx));
6421 /* Generate loop to check 4 bytes at a time. It is not a good idea to
6422 align this loop. It gives only huge programs, but does not help to
6423 speed up. */
6424 emit_label (align_4_label);
6426 mem = gen_rtx_MEM (SImode, out);
6427 emit_move_insn (scratch, mem);
6428 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
6430 /* This formula yields a nonzero result iff one of the bytes is zero.
6431 This saves three branches inside loop and many cycles. */
6433 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
6434 emit_insn (gen_one_cmplsi2 (scratch, scratch));
6435 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
6436 emit_insn (gen_andsi3 (tmpreg, tmpreg, GEN_INT (0x80808080)));
6437 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0,
6438 SImode, 1, 0, align_4_label);
6440 if (TARGET_CMOVE)
6442 rtx reg = gen_reg_rtx (SImode);
6443 emit_move_insn (reg, tmpreg);
6444 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
6446 /* If zero is not in the first two bytes, move two bytes forward. */
6447 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
6448 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
6449 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
6450 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
6451 gen_rtx_IF_THEN_ELSE (SImode, tmp,
6452 reg,
6453 tmpreg)));
6454 /* Emit lea manually to avoid clobbering of flags. */
6455 emit_insn (gen_rtx_SET (SImode, reg,
6456 gen_rtx_PLUS (SImode, out, GEN_INT (2))));
6458 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
6459 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
6460 emit_insn (gen_rtx_SET (VOIDmode, out,
6461 gen_rtx_IF_THEN_ELSE (SImode, tmp,
6462 reg,
6463 out)));
6466 else
6468 rtx end_2_label = gen_label_rtx ();
6469 /* Is zero in the first two bytes? */
6471 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
6472 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
6473 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
6474 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
6475 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
6476 pc_rtx);
6477 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
6478 JUMP_LABEL (tmp) = end_2_label;
6480 /* Not in the first two. Move two bytes forward. */
6481 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
6482 emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
6484 emit_label (end_2_label);
6488 /* Avoid branch in fixing the byte. */
6489 tmpreg = gen_lowpart (QImode, tmpreg);
6490 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
6491 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3)));
6493 emit_label (end_0_label);
6496 /* Clear stack slot assignments remembered from previous functions.
6497 This is called from INIT_EXPANDERS once before RTL is emitted for each
6498 function. */
6500 static void
6501 ix86_init_machine_status (p)
6502 struct function *p;
6504 p->machine = (struct machine_function *)
6505 xcalloc (1, sizeof (struct machine_function));
6508 /* Mark machine specific bits of P for GC. */
6509 static void
6510 ix86_mark_machine_status (p)
6511 struct function *p;
6513 struct machine_function *machine = p->machine;
6514 enum machine_mode mode;
6515 int n;
6517 if (! machine)
6518 return;
6520 for (mode = VOIDmode; (int) mode < (int) MAX_MACHINE_MODE;
6521 mode = (enum machine_mode) ((int) mode + 1))
6522 for (n = 0; n < MAX_386_STACK_LOCALS; n++)
6523 ggc_mark_rtx (machine->stack_locals[(int) mode][n]);
6526 static void
6527 ix86_free_machine_status (p)
6528 struct function *p;
6530 free (p->machine);
6531 p->machine = NULL;
6534 /* Return a MEM corresponding to a stack slot with mode MODE.
6535 Allocate a new slot if necessary.
6537 The RTL for a function can have several slots available: N is
6538 which slot to use. */
6541 assign_386_stack_local (mode, n)
6542 enum machine_mode mode;
6543 int n;
6545 if (n < 0 || n >= MAX_386_STACK_LOCALS)
6546 abort ();
6548 if (ix86_stack_locals[(int) mode][n] == NULL_RTX)
6549 ix86_stack_locals[(int) mode][n]
6550 = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
6552 return ix86_stack_locals[(int) mode][n];
6555 /* Calculate the length of the memory address in the instruction
6556 encoding. Does not include the one-byte modrm, opcode, or prefix. */
6558 static int
6559 memory_address_length (addr)
6560 rtx addr;
6562 struct ix86_address parts;
6563 rtx base, index, disp;
6564 int len;
6566 if (GET_CODE (addr) == PRE_DEC
6567 || GET_CODE (addr) == POST_INC)
6568 return 0;
6570 if (! ix86_decompose_address (addr, &parts))
6571 abort ();
6573 base = parts.base;
6574 index = parts.index;
6575 disp = parts.disp;
6576 len = 0;
6578 /* Register Indirect. */
6579 if (base && !index && !disp)
6581 /* Special cases: ebp and esp need the two-byte modrm form. */
6582 if (addr == stack_pointer_rtx
6583 || addr == arg_pointer_rtx
6584 || addr == frame_pointer_rtx
6585 || addr == hard_frame_pointer_rtx)
6586 len = 1;
6589 /* Direct Addressing. */
6590 else if (disp && !base && !index)
6591 len = 4;
6593 else
6595 /* Find the length of the displacement constant. */
6596 if (disp)
6598 if (GET_CODE (disp) == CONST_INT
6599 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K'))
6600 len = 1;
6601 else
6602 len = 4;
6605 /* An index requires the two-byte modrm form. */
6606 if (index)
6607 len += 1;
6610 return len;
6613 /* Compute default value for "length_immediate" attribute. When SHORTFORM is set
6614 expect that insn have 8bit immediate alternative. */
6616 ix86_attr_length_immediate_default (insn, shortform)
6617 rtx insn;
6618 int shortform;
6620 int len = 0;
6621 int i;
6622 extract_insn_cached (insn);
6623 for (i = recog_data.n_operands - 1; i >= 0; --i)
6624 if (CONSTANT_P (recog_data.operand[i]))
6626 if (len)
6627 abort ();
6628 if (shortform
6629 && GET_CODE (recog_data.operand[i]) == CONST_INT
6630 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
6631 len = 1;
6632 else
6634 switch (get_attr_mode (insn))
6636 case MODE_QI:
6637 len+=1;
6638 break;
6639 case MODE_HI:
6640 len+=2;
6641 break;
6642 case MODE_SI:
6643 len+=4;
6644 break;
6645 default:
6646 fatal_insn ("Unknown insn mode", insn);
6650 return len;
6652 /* Compute default value for "length_address" attribute. */
6654 ix86_attr_length_address_default (insn)
6655 rtx insn;
6657 int i;
6658 extract_insn_cached (insn);
6659 for (i = recog_data.n_operands - 1; i >= 0; --i)
6660 if (GET_CODE (recog_data.operand[i]) == MEM)
6662 return memory_address_length (XEXP (recog_data.operand[i], 0));
6663 break;
6665 return 0;
6668 /* Return the maximum number of instructions a cpu can issue. */
6671 ix86_issue_rate ()
6673 switch (ix86_cpu)
6675 case PROCESSOR_PENTIUM:
6676 case PROCESSOR_K6:
6677 return 2;
6679 case PROCESSOR_PENTIUMPRO:
6680 return 3;
6682 default:
6683 return 1;
6687 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
6688 by DEP_INSN and nothing set by DEP_INSN. */
6690 static int
6691 ix86_flags_dependant (insn, dep_insn, insn_type)
6692 rtx insn, dep_insn;
6693 enum attr_type insn_type;
6695 rtx set, set2;
6697 /* Simplify the test for uninteresting insns. */
6698 if (insn_type != TYPE_SETCC
6699 && insn_type != TYPE_ICMOV
6700 && insn_type != TYPE_FCMOV
6701 && insn_type != TYPE_IBR)
6702 return 0;
6704 if ((set = single_set (dep_insn)) != 0)
6706 set = SET_DEST (set);
6707 set2 = NULL_RTX;
6709 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
6710 && XVECLEN (PATTERN (dep_insn), 0) == 2
6711 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
6712 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
6714 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
6715 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
6717 else
6718 return 0;
6720 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
6721 return 0;
6723 /* This test is true if the dependant insn reads the flags but
6724 not any other potentially set register. */
6725 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
6726 return 0;
6728 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
6729 return 0;
6731 return 1;
6734 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
6735 address with operands set by DEP_INSN. */
6737 static int
6738 ix86_agi_dependant (insn, dep_insn, insn_type)
6739 rtx insn, dep_insn;
6740 enum attr_type insn_type;
6742 rtx addr;
6744 if (insn_type == TYPE_LEA)
6746 addr = PATTERN (insn);
6747 if (GET_CODE (addr) == SET)
6749 else if (GET_CODE (addr) == PARALLEL
6750 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
6751 addr = XVECEXP (addr, 0, 0);
6752 else
6753 abort ();
6754 addr = SET_SRC (addr);
6756 else
6758 int i;
6759 extract_insn_cached (insn);
6760 for (i = recog_data.n_operands - 1; i >= 0; --i)
6761 if (GET_CODE (recog_data.operand[i]) == MEM)
6763 addr = XEXP (recog_data.operand[i], 0);
6764 goto found;
6766 return 0;
6767 found:;
6770 return modified_in_p (addr, dep_insn);
6774 ix86_adjust_cost (insn, link, dep_insn, cost)
6775 rtx insn, link, dep_insn;
6776 int cost;
6778 enum attr_type insn_type, dep_insn_type;
6779 enum attr_memory memory;
6780 rtx set, set2;
6781 int dep_insn_code_number;
6783 /* Anti and output depenancies have zero cost on all CPUs. */
6784 if (REG_NOTE_KIND (link) != 0)
6785 return 0;
6787 dep_insn_code_number = recog_memoized (dep_insn);
6789 /* If we can't recognize the insns, we can't really do anything. */
6790 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
6791 return cost;
6793 insn_type = get_attr_type (insn);
6794 dep_insn_type = get_attr_type (dep_insn);
6796 /* Prologue and epilogue allocators can have a false dependency on ebp.
6797 This results in one cycle extra stall on Pentium prologue scheduling,
6798 so handle this important case manually. */
6799 if (dep_insn_code_number == CODE_FOR_pro_epilogue_adjust_stack
6800 && dep_insn_type == TYPE_ALU
6801 && !reg_mentioned_p (stack_pointer_rtx, insn))
6802 return 0;
6804 switch (ix86_cpu)
6806 case PROCESSOR_PENTIUM:
6807 /* Address Generation Interlock adds a cycle of latency. */
6808 if (ix86_agi_dependant (insn, dep_insn, insn_type))
6809 cost += 1;
6811 /* ??? Compares pair with jump/setcc. */
6812 if (ix86_flags_dependant (insn, dep_insn, insn_type))
6813 cost = 0;
6815 /* Floating point stores require value to be ready one cycle ealier. */
6816 if (insn_type == TYPE_FMOV
6817 && get_attr_memory (insn) == MEMORY_STORE
6818 && !ix86_agi_dependant (insn, dep_insn, insn_type))
6819 cost += 1;
6820 break;
6822 case PROCESSOR_PENTIUMPRO:
6823 /* Since we can't represent delayed latencies of load+operation,
6824 increase the cost here for non-imov insns. */
6825 if (dep_insn_type != TYPE_IMOV
6826 && dep_insn_type != TYPE_FMOV
6827 && ((memory = get_attr_memory (dep_insn) == MEMORY_LOAD)
6828 || memory == MEMORY_BOTH))
6829 cost += 1;
6831 /* INT->FP conversion is expensive. */
6832 if (get_attr_fp_int_src (dep_insn))
6833 cost += 5;
6835 /* There is one cycle extra latency between an FP op and a store. */
6836 if (insn_type == TYPE_FMOV
6837 && (set = single_set (dep_insn)) != NULL_RTX
6838 && (set2 = single_set (insn)) != NULL_RTX
6839 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
6840 && GET_CODE (SET_DEST (set2)) == MEM)
6841 cost += 1;
6842 break;
6844 case PROCESSOR_K6:
6845 /* The esp dependency is resolved before the instruction is really
6846 finished. */
6847 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
6848 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
6849 return 1;
6851 /* Since we can't represent delayed latencies of load+operation,
6852 increase the cost here for non-imov insns. */
6853 if ((memory = get_attr_memory (dep_insn) == MEMORY_LOAD)
6854 || memory == MEMORY_BOTH)
6855 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
6857 /* INT->FP conversion is expensive. */
6858 if (get_attr_fp_int_src (dep_insn))
6859 cost += 5;
6860 break;
6862 case PROCESSOR_ATHLON:
6863 if ((memory = get_attr_memory (dep_insn)) == MEMORY_LOAD
6864 || memory == MEMORY_BOTH)
6866 if (dep_insn_type == TYPE_IMOV || dep_insn_type == TYPE_FMOV)
6867 cost += 2;
6868 else
6869 cost += 3;
6872 default:
6873 break;
6876 return cost;
6879 static union
6881 struct ppro_sched_data
6883 rtx decode[3];
6884 int issued_this_cycle;
6885 } ppro;
6886 } ix86_sched_data;
6888 static int
6889 ix86_safe_length (insn)
6890 rtx insn;
6892 if (recog_memoized (insn) >= 0)
6893 return get_attr_length(insn);
6894 else
6895 return 128;
6898 static int
6899 ix86_safe_length_prefix (insn)
6900 rtx insn;
6902 if (recog_memoized (insn) >= 0)
6903 return get_attr_length(insn);
6904 else
6905 return 0;
6908 static enum attr_memory
6909 ix86_safe_memory (insn)
6910 rtx insn;
6912 if (recog_memoized (insn) >= 0)
6913 return get_attr_memory(insn);
6914 else
6915 return MEMORY_UNKNOWN;
6918 static enum attr_pent_pair
6919 ix86_safe_pent_pair (insn)
6920 rtx insn;
6922 if (recog_memoized (insn) >= 0)
6923 return get_attr_pent_pair(insn);
6924 else
6925 return PENT_PAIR_NP;
6928 static enum attr_ppro_uops
6929 ix86_safe_ppro_uops (insn)
6930 rtx insn;
6932 if (recog_memoized (insn) >= 0)
6933 return get_attr_ppro_uops (insn);
6934 else
6935 return PPRO_UOPS_MANY;
6938 static void
6939 ix86_dump_ppro_packet (dump)
6940 FILE *dump;
6942 if (ix86_sched_data.ppro.decode[0])
6944 fprintf (dump, "PPRO packet: %d",
6945 INSN_UID (ix86_sched_data.ppro.decode[0]));
6946 if (ix86_sched_data.ppro.decode[1])
6947 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
6948 if (ix86_sched_data.ppro.decode[2])
6949 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
6950 fputc ('\n', dump);
6954 /* We're beginning a new block. Initialize data structures as necessary. */
6956 void
6957 ix86_sched_init (dump, sched_verbose)
6958 FILE *dump ATTRIBUTE_UNUSED;
6959 int sched_verbose ATTRIBUTE_UNUSED;
6961 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
6964 /* Shift INSN to SLOT, and shift everything else down. */
6966 static void
6967 ix86_reorder_insn (insnp, slot)
6968 rtx *insnp, *slot;
6970 if (insnp != slot)
6972 rtx insn = *insnp;
6974 insnp[0] = insnp[1];
6975 while (++insnp != slot);
6976 *insnp = insn;
6980 /* Find an instruction with given pairability and minimal amount of cycles
6981 lost by the fact that the CPU waits for both pipelines to finish before
6982 reading next instructions. Also take care that both instructions together
6983 can not exceed 7 bytes. */
6985 static rtx *
6986 ix86_pent_find_pair (e_ready, ready, type, first)
6987 rtx *e_ready;
6988 rtx *ready;
6989 enum attr_pent_pair type;
6990 rtx first;
6992 int mincycles, cycles;
6993 enum attr_pent_pair tmp;
6994 enum attr_memory memory;
6995 rtx *insnp, *bestinsnp = NULL;
6997 if (ix86_safe_length (first) > 7 + ix86_safe_length_prefix (first))
6998 return NULL;
7000 memory = ix86_safe_memory (first);
7001 cycles = result_ready_cost (first);
7002 mincycles = INT_MAX;
7004 for (insnp = e_ready; insnp >= ready && mincycles; --insnp)
7005 if ((tmp = ix86_safe_pent_pair (*insnp)) == type
7006 && ix86_safe_length (*insnp) <= 7 + ix86_safe_length_prefix (*insnp))
7008 enum attr_memory second_memory;
7009 int secondcycles, currentcycles;
7011 second_memory = ix86_safe_memory (*insnp);
7012 secondcycles = result_ready_cost (*insnp);
7013 currentcycles = abs (cycles - secondcycles);
7015 if (secondcycles >= 1 && cycles >= 1)
7017 /* Two read/modify/write instructions together takes two
7018 cycles longer. */
7019 if (memory == MEMORY_BOTH && second_memory == MEMORY_BOTH)
7020 currentcycles += 2;
7022 /* Read modify/write instruction followed by read/modify
7023 takes one cycle longer. */
7024 if (memory == MEMORY_BOTH && second_memory == MEMORY_LOAD
7025 && tmp != PENT_PAIR_UV
7026 && ix86_safe_pent_pair (first) != PENT_PAIR_UV)
7027 currentcycles += 1;
7029 if (currentcycles < mincycles)
7030 bestinsnp = insnp, mincycles = currentcycles;
7033 return bestinsnp;
7036 /* Subroutines of ix86_sched_reorder. */
7038 static void
7039 ix86_sched_reorder_pentium (ready, e_ready)
7040 rtx *ready;
7041 rtx *e_ready;
7043 enum attr_pent_pair pair1, pair2;
7044 rtx *insnp;
7046 /* This wouldn't be necessary if Haifa knew that static insn ordering
7047 is important to which pipe an insn is issued to. So we have to make
7048 some minor rearrangements. */
7050 pair1 = ix86_safe_pent_pair (*e_ready);
7052 /* If the first insn is non-pairable, let it be. */
7053 if (pair1 == PENT_PAIR_NP)
7054 return;
7056 pair2 = PENT_PAIR_NP;
7057 insnp = 0;
7059 /* If the first insn is UV or PV pairable, search for a PU
7060 insn to go with. */
7061 if (pair1 == PENT_PAIR_UV || pair1 == PENT_PAIR_PV)
7063 insnp = ix86_pent_find_pair (e_ready-1, ready,
7064 PENT_PAIR_PU, *e_ready);
7065 if (insnp)
7066 pair2 = PENT_PAIR_PU;
7069 /* If the first insn is PU or UV pairable, search for a PV
7070 insn to go with. */
7071 if (pair2 == PENT_PAIR_NP
7072 && (pair1 == PENT_PAIR_PU || pair1 == PENT_PAIR_UV))
7074 insnp = ix86_pent_find_pair (e_ready-1, ready,
7075 PENT_PAIR_PV, *e_ready);
7076 if (insnp)
7077 pair2 = PENT_PAIR_PV;
7080 /* If the first insn is pairable, search for a UV
7081 insn to go with. */
7082 if (pair2 == PENT_PAIR_NP)
7084 insnp = ix86_pent_find_pair (e_ready-1, ready,
7085 PENT_PAIR_UV, *e_ready);
7086 if (insnp)
7087 pair2 = PENT_PAIR_UV;
7090 if (pair2 == PENT_PAIR_NP)
7091 return;
7093 /* Found something! Decide if we need to swap the order. */
7094 if (pair1 == PENT_PAIR_PV || pair2 == PENT_PAIR_PU
7095 || (pair1 == PENT_PAIR_UV && pair2 == PENT_PAIR_UV
7096 && ix86_safe_memory (*e_ready) == MEMORY_BOTH
7097 && ix86_safe_memory (*insnp) == MEMORY_LOAD))
7098 ix86_reorder_insn (insnp, e_ready);
7099 else
7100 ix86_reorder_insn (insnp, e_ready - 1);
7103 static void
7104 ix86_sched_reorder_ppro (ready, e_ready)
7105 rtx *ready;
7106 rtx *e_ready;
7108 rtx decode[3];
7109 enum attr_ppro_uops cur_uops;
7110 int issued_this_cycle;
7111 rtx *insnp;
7112 int i;
7114 /* At this point .ppro.decode contains the state of the three
7115 decoders from last "cycle". That is, those insns that were
7116 actually independent. But here we're scheduling for the
7117 decoder, and we may find things that are decodable in the
7118 same cycle. */
7120 memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode));
7121 issued_this_cycle = 0;
7123 insnp = e_ready;
7124 cur_uops = ix86_safe_ppro_uops (*insnp);
7126 /* If the decoders are empty, and we've a complex insn at the
7127 head of the priority queue, let it issue without complaint. */
7128 if (decode[0] == NULL)
7130 if (cur_uops == PPRO_UOPS_MANY)
7132 decode[0] = *insnp;
7133 goto ppro_done;
7136 /* Otherwise, search for a 2-4 uop unsn to issue. */
7137 while (cur_uops != PPRO_UOPS_FEW)
7139 if (insnp == ready)
7140 break;
7141 cur_uops = ix86_safe_ppro_uops (*--insnp);
7144 /* If so, move it to the head of the line. */
7145 if (cur_uops == PPRO_UOPS_FEW)
7146 ix86_reorder_insn (insnp, e_ready);
7148 /* Issue the head of the queue. */
7149 issued_this_cycle = 1;
7150 decode[0] = *e_ready--;
7153 /* Look for simple insns to fill in the other two slots. */
7154 for (i = 1; i < 3; ++i)
7155 if (decode[i] == NULL)
7157 if (ready >= e_ready)
7158 goto ppro_done;
7160 insnp = e_ready;
7161 cur_uops = ix86_safe_ppro_uops (*insnp);
7162 while (cur_uops != PPRO_UOPS_ONE)
7164 if (insnp == ready)
7165 break;
7166 cur_uops = ix86_safe_ppro_uops (*--insnp);
7169 /* Found one. Move it to the head of the queue and issue it. */
7170 if (cur_uops == PPRO_UOPS_ONE)
7172 ix86_reorder_insn (insnp, e_ready);
7173 decode[i] = *e_ready--;
7174 issued_this_cycle++;
7175 continue;
7178 /* ??? Didn't find one. Ideally, here we would do a lazy split
7179 of 2-uop insns, issue one and queue the other. */
7182 ppro_done:
7183 if (issued_this_cycle == 0)
7184 issued_this_cycle = 1;
7185 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
7188 /* We are about to being issuing insns for this clock cycle.
7189 Override the default sort algorithm to better slot instructions. */
7191 ix86_sched_reorder (dump, sched_verbose, ready, n_ready, clock_var)
7192 FILE *dump ATTRIBUTE_UNUSED;
7193 int sched_verbose ATTRIBUTE_UNUSED;
7194 rtx *ready;
7195 int n_ready;
7196 int clock_var ATTRIBUTE_UNUSED;
7198 rtx *e_ready = ready + n_ready - 1;
7200 if (n_ready < 2)
7201 goto out;
7203 switch (ix86_cpu)
7205 default:
7206 break;
7208 case PROCESSOR_PENTIUM:
7209 ix86_sched_reorder_pentium (ready, e_ready);
7210 break;
7212 case PROCESSOR_PENTIUMPRO:
7213 ix86_sched_reorder_ppro (ready, e_ready);
7214 break;
7217 out:
7218 return ix86_issue_rate ();
7221 /* We are about to issue INSN. Return the number of insns left on the
7222 ready queue that can be issued this cycle. */
7225 ix86_variable_issue (dump, sched_verbose, insn, can_issue_more)
7226 FILE *dump;
7227 int sched_verbose;
7228 rtx insn;
7229 int can_issue_more;
7231 int i;
7232 switch (ix86_cpu)
7234 default:
7235 return can_issue_more - 1;
7237 case PROCESSOR_PENTIUMPRO:
7239 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
7241 if (uops == PPRO_UOPS_MANY)
7243 if (sched_verbose)
7244 ix86_dump_ppro_packet (dump);
7245 ix86_sched_data.ppro.decode[0] = insn;
7246 ix86_sched_data.ppro.decode[1] = NULL;
7247 ix86_sched_data.ppro.decode[2] = NULL;
7248 if (sched_verbose)
7249 ix86_dump_ppro_packet (dump);
7250 ix86_sched_data.ppro.decode[0] = NULL;
7252 else if (uops == PPRO_UOPS_FEW)
7254 if (sched_verbose)
7255 ix86_dump_ppro_packet (dump);
7256 ix86_sched_data.ppro.decode[0] = insn;
7257 ix86_sched_data.ppro.decode[1] = NULL;
7258 ix86_sched_data.ppro.decode[2] = NULL;
7260 else
7262 for (i = 0; i < 3; ++i)
7263 if (ix86_sched_data.ppro.decode[i] == NULL)
7265 ix86_sched_data.ppro.decode[i] = insn;
7266 break;
7268 if (i == 3)
7269 abort ();
7270 if (i == 2)
7272 if (sched_verbose)
7273 ix86_dump_ppro_packet (dump);
7274 ix86_sched_data.ppro.decode[0] = NULL;
7275 ix86_sched_data.ppro.decode[1] = NULL;
7276 ix86_sched_data.ppro.decode[2] = NULL;
7280 return --ix86_sched_data.ppro.issued_this_cycle;
7284 /* Compute the alignment given to a constant that is being placed in memory.
7285 EXP is the constant and ALIGN is the alignment that the object would
7286 ordinarily have.
7287 The value of this function is used instead of that alignment to align
7288 the object. */
7291 ix86_constant_alignment (exp, align)
7292 tree exp;
7293 int align;
7295 if (TREE_CODE (exp) == REAL_CST)
7297 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
7298 return 64;
7299 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
7300 return 128;
7302 else if (TREE_CODE (exp) == STRING_CST && TREE_STRING_LENGTH (exp) >= 31
7303 && align < 256)
7304 return 256;
7306 return align;
7309 /* Compute the alignment for a static variable.
7310 TYPE is the data type, and ALIGN is the alignment that
7311 the object would ordinarily have. The value of this function is used
7312 instead of that alignment to align the object. */
7315 ix86_data_alignment (type, align)
7316 tree type;
7317 int align;
7319 if (AGGREGATE_TYPE_P (type)
7320 && TYPE_SIZE (type)
7321 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
7322 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
7323 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
7324 return 256;
7326 if (TREE_CODE (type) == ARRAY_TYPE)
7328 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
7329 return 64;
7330 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
7331 return 128;
7333 else if (TREE_CODE (type) == COMPLEX_TYPE)
7336 if (TYPE_MODE (type) == DCmode && align < 64)
7337 return 64;
7338 if (TYPE_MODE (type) == XCmode && align < 128)
7339 return 128;
7341 else if ((TREE_CODE (type) == RECORD_TYPE
7342 || TREE_CODE (type) == UNION_TYPE
7343 || TREE_CODE (type) == QUAL_UNION_TYPE)
7344 && TYPE_FIELDS (type))
7346 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
7347 return 64;
7348 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
7349 return 128;
7351 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
7352 || TREE_CODE (type) == INTEGER_TYPE)
7354 if (TYPE_MODE (type) == DFmode && align < 64)
7355 return 64;
7356 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
7357 return 128;
7360 return align;
7363 /* Compute the alignment for a local variable.
7364 TYPE is the data type, and ALIGN is the alignment that
7365 the object would ordinarily have. The value of this macro is used
7366 instead of that alignment to align the object. */
7369 ix86_local_alignment (type, align)
7370 tree type;
7371 int align;
7373 if (TREE_CODE (type) == ARRAY_TYPE)
7375 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
7376 return 64;
7377 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
7378 return 128;
7380 else if (TREE_CODE (type) == COMPLEX_TYPE)
7382 if (TYPE_MODE (type) == DCmode && align < 64)
7383 return 64;
7384 if (TYPE_MODE (type) == XCmode && align < 128)
7385 return 128;
7387 else if ((TREE_CODE (type) == RECORD_TYPE
7388 || TREE_CODE (type) == UNION_TYPE
7389 || TREE_CODE (type) == QUAL_UNION_TYPE)
7390 && TYPE_FIELDS (type))
7392 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
7393 return 64;
7394 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
7395 return 128;
7397 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
7398 || TREE_CODE (type) == INTEGER_TYPE)
7401 if (TYPE_MODE (type) == DFmode && align < 64)
7402 return 64;
7403 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
7404 return 128;
7406 return align;
7409 #define def_builtin(NAME, TYPE, CODE) \
7410 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, NULL_PTR)
7411 struct builtin_description
7413 enum insn_code icode;
7414 const char * name;
7415 enum ix86_builtins code;
7416 enum rtx_code comparison;
7417 unsigned int flag;
7420 static struct builtin_description bdesc_comi[] =
7422 { CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, EQ, 0 },
7423 { CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, LT, 0 },
7424 { CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, LE, 0 },
7425 { CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, LT, 1 },
7426 { CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, LE, 1 },
7427 { CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, NE, 0 },
7428 { CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, EQ, 0 },
7429 { CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, LT, 0 },
7430 { CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, LE, 0 },
7431 { CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, LT, 1 },
7432 { CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, LE, 1 },
7433 { CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, NE, 0 }
7436 static struct builtin_description bdesc_2arg[] =
7438 /* SSE */
7439 { CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
7440 { CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
7441 { CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
7442 { CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
7443 { CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
7444 { CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
7445 { CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
7446 { CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
7448 { CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
7449 { CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
7450 { CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
7451 { CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
7452 { CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
7453 { CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
7454 { CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
7455 { CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
7456 { CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
7457 { CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
7458 { CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
7459 { CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
7460 { CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
7461 { CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
7462 { CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
7463 { CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgtss", IX86_BUILTIN_CMPGTSS, LT, 1 },
7464 { CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgess", IX86_BUILTIN_CMPGESS, LE, 1 },
7465 { CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
7466 { CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
7467 { CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
7468 { CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
7469 { CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, LT, 1 },
7470 { CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, LE, 1 },
7471 { CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
7473 { CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
7474 { CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
7475 { CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
7476 { CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
7478 { CODE_FOR_sse_andti3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
7479 { CODE_FOR_sse_nandti3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
7480 { CODE_FOR_sse_iorti3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
7481 { CODE_FOR_sse_xorti3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
7483 { CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
7484 { CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
7485 { CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
7486 { CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
7487 { CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
7489 /* MMX */
7490 { CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
7491 { CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
7492 { CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
7493 { CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
7494 { CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
7495 { CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
7497 { CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
7498 { CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
7499 { CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
7500 { CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
7501 { CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
7502 { CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
7503 { CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
7504 { CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
7506 { CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
7507 { CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
7508 { CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
7510 { CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
7511 { CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
7512 { CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
7513 { CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
7515 { CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
7516 { CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
7518 { CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
7519 { CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
7520 { CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
7521 { CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
7522 { CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
7523 { CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
7525 { CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
7526 { CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
7527 { CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
7528 { CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
7530 { CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
7531 { CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
7532 { CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
7533 { CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
7534 { CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
7535 { CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
7537 /* Special. */
7538 { CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
7539 { CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
7540 { CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
7542 { CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
7543 { CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
7545 { CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
7546 { CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
7547 { CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
7548 { CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
7549 { CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
7550 { CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
7552 { CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
7553 { CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
7554 { CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
7555 { CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
7556 { CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
7557 { CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
7559 { CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
7560 { CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
7561 { CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
7562 { CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
7564 { CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
7565 { CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 }
7569 static struct builtin_description bdesc_1arg[] =
7571 { CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
7572 { CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
7574 { CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
7575 { CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
7576 { CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
7578 { CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
7579 { CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
7580 { CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
7581 { CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 }
7585 /* Expand all the target specific builtins. This is not called if TARGET_MMX
7586 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
7587 builtins. */
7588 void
7589 ix86_init_builtins ()
7591 struct builtin_description * d;
7592 size_t i;
7593 tree endlink = void_list_node;
7595 tree pchar_type_node = build_pointer_type (char_type_node);
7596 tree pfloat_type_node = build_pointer_type (float_type_node);
7597 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
7598 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
7600 /* Comparisons. */
7601 tree int_ftype_v4sf_v4sf
7602 = build_function_type (integer_type_node,
7603 tree_cons (NULL_TREE, V4SF_type_node,
7604 tree_cons (NULL_TREE,
7605 V4SF_type_node,
7606 endlink)));
7607 tree v4si_ftype_v4sf_v4sf
7608 = build_function_type (V4SI_type_node,
7609 tree_cons (NULL_TREE, V4SF_type_node,
7610 tree_cons (NULL_TREE,
7611 V4SF_type_node,
7612 endlink)));
7613 /* MMX/SSE/integer conversions. */
7614 tree int_ftype_v4sf_int
7615 = build_function_type (integer_type_node,
7616 tree_cons (NULL_TREE, V4SF_type_node,
7617 tree_cons (NULL_TREE,
7618 integer_type_node,
7619 endlink)));
7620 tree int_ftype_v4sf
7621 = build_function_type (integer_type_node,
7622 tree_cons (NULL_TREE, V4SF_type_node,
7623 endlink));
7624 tree int_ftype_v8qi
7625 = build_function_type (integer_type_node,
7626 tree_cons (NULL_TREE, V8QI_type_node,
7627 endlink));
7628 tree int_ftype_v2si
7629 = build_function_type (integer_type_node,
7630 tree_cons (NULL_TREE, V2SI_type_node,
7631 endlink));
7632 tree v2si_ftype_int
7633 = build_function_type (V2SI_type_node,
7634 tree_cons (NULL_TREE, integer_type_node,
7635 endlink));
7636 tree v4sf_ftype_v4sf_int
7637 = build_function_type (integer_type_node,
7638 tree_cons (NULL_TREE, V4SF_type_node,
7639 tree_cons (NULL_TREE, integer_type_node,
7640 endlink)));
7641 tree v4sf_ftype_v4sf_v2si
7642 = build_function_type (V4SF_type_node,
7643 tree_cons (NULL_TREE, V4SF_type_node,
7644 tree_cons (NULL_TREE, V2SI_type_node,
7645 endlink)));
7646 tree int_ftype_v4hi_int
7647 = build_function_type (integer_type_node,
7648 tree_cons (NULL_TREE, V4HI_type_node,
7649 tree_cons (NULL_TREE, integer_type_node,
7650 endlink)));
7651 tree v4hi_ftype_v4hi_int_int
7652 = build_function_type (V4HI_type_node,
7653 tree_cons (NULL_TREE, V4HI_type_node,
7654 tree_cons (NULL_TREE, integer_type_node,
7655 tree_cons (NULL_TREE,
7656 integer_type_node,
7657 endlink))));
7658 /* Miscellaneous. */
7659 tree v8qi_ftype_v4hi_v4hi
7660 = build_function_type (V8QI_type_node,
7661 tree_cons (NULL_TREE, V4HI_type_node,
7662 tree_cons (NULL_TREE, V4HI_type_node,
7663 endlink)));
7664 tree v4hi_ftype_v2si_v2si
7665 = build_function_type (V4HI_type_node,
7666 tree_cons (NULL_TREE, V2SI_type_node,
7667 tree_cons (NULL_TREE, V2SI_type_node,
7668 endlink)));
7669 tree v4sf_ftype_v4sf_v4sf_int
7670 = build_function_type (V4SF_type_node,
7671 tree_cons (NULL_TREE, V4SF_type_node,
7672 tree_cons (NULL_TREE, V4SF_type_node,
7673 tree_cons (NULL_TREE,
7674 integer_type_node,
7675 endlink))));
7676 tree v4hi_ftype_v8qi_v8qi
7677 = build_function_type (V4HI_type_node,
7678 tree_cons (NULL_TREE, V8QI_type_node,
7679 tree_cons (NULL_TREE, V8QI_type_node,
7680 endlink)));
7681 tree v2si_ftype_v4hi_v4hi
7682 = build_function_type (V2SI_type_node,
7683 tree_cons (NULL_TREE, V4HI_type_node,
7684 tree_cons (NULL_TREE, V4HI_type_node,
7685 endlink)));
7686 tree v4hi_ftype_v4hi_int
7687 = build_function_type (V4HI_type_node,
7688 tree_cons (NULL_TREE, V4HI_type_node,
7689 tree_cons (NULL_TREE, integer_type_node,
7690 endlink)));
7691 tree di_ftype_di_int
7692 = build_function_type (long_long_unsigned_type_node,
7693 tree_cons (NULL_TREE, long_long_unsigned_type_node,
7694 tree_cons (NULL_TREE, integer_type_node,
7695 endlink)));
7696 tree v8qi_ftype_v8qi_di
7697 = build_function_type (V8QI_type_node,
7698 tree_cons (NULL_TREE, V8QI_type_node,
7699 tree_cons (NULL_TREE,
7700 long_long_integer_type_node,
7701 endlink)));
7702 tree v4hi_ftype_v4hi_di
7703 = build_function_type (V4HI_type_node,
7704 tree_cons (NULL_TREE, V4HI_type_node,
7705 tree_cons (NULL_TREE,
7706 long_long_integer_type_node,
7707 endlink)));
7708 tree v2si_ftype_v2si_di
7709 = build_function_type (V2SI_type_node,
7710 tree_cons (NULL_TREE, V2SI_type_node,
7711 tree_cons (NULL_TREE,
7712 long_long_integer_type_node,
7713 endlink)));
7714 tree void_ftype_void
7715 = build_function_type (void_type_node, endlink);
7716 tree void_ftype_pchar_int
7717 = build_function_type (void_type_node,
7718 tree_cons (NULL_TREE, pchar_type_node,
7719 tree_cons (NULL_TREE, integer_type_node,
7720 endlink)));
7721 tree void_ftype_unsigned
7722 = build_function_type (void_type_node,
7723 tree_cons (NULL_TREE, unsigned_type_node,
7724 endlink));
7725 tree unsigned_ftype_void
7726 = build_function_type (unsigned_type_node, endlink);
7727 tree di_ftype_void
7728 = build_function_type (long_long_unsigned_type_node, endlink);
7729 tree ti_ftype_void
7730 = build_function_type (intTI_type_node, endlink);
7731 tree v2si_ftype_v4sf
7732 = build_function_type (V2SI_type_node,
7733 tree_cons (NULL_TREE, V4SF_type_node,
7734 endlink));
7735 /* Loads/stores. */
7736 tree maskmovq_args = tree_cons (NULL_TREE, V8QI_type_node,
7737 tree_cons (NULL_TREE, V8QI_type_node,
7738 tree_cons (NULL_TREE,
7739 pchar_type_node,
7740 endlink)));
7741 tree void_ftype_v8qi_v8qi_pchar
7742 = build_function_type (void_type_node, maskmovq_args);
7743 tree v4sf_ftype_pfloat
7744 = build_function_type (V4SF_type_node,
7745 tree_cons (NULL_TREE, pfloat_type_node,
7746 endlink));
7747 tree v4sf_ftype_float
7748 = build_function_type (V4SF_type_node,
7749 tree_cons (NULL_TREE, float_type_node,
7750 endlink));
7751 tree v4sf_ftype_float_float_float_float
7752 = build_function_type (V4SF_type_node,
7753 tree_cons (NULL_TREE, float_type_node,
7754 tree_cons (NULL_TREE, float_type_node,
7755 tree_cons (NULL_TREE,
7756 float_type_node,
7757 tree_cons (NULL_TREE,
7758 float_type_node,
7759 endlink)))));
7760 /* @@@ the type is bogus */
7761 tree v4sf_ftype_v4sf_pv2si
7762 = build_function_type (V4SF_type_node,
7763 tree_cons (NULL_TREE, V4SF_type_node,
7764 tree_cons (NULL_TREE, pv2si_type_node,
7765 endlink)));
7766 tree v4sf_ftype_pv2si_v4sf
7767 = build_function_type (V4SF_type_node,
7768 tree_cons (NULL_TREE, V4SF_type_node,
7769 tree_cons (NULL_TREE, pv2si_type_node,
7770 endlink)));
7771 tree void_ftype_pfloat_v4sf
7772 = build_function_type (void_type_node,
7773 tree_cons (NULL_TREE, pfloat_type_node,
7774 tree_cons (NULL_TREE, V4SF_type_node,
7775 endlink)));
7776 tree void_ftype_pdi_di
7777 = build_function_type (void_type_node,
7778 tree_cons (NULL_TREE, pdi_type_node,
7779 tree_cons (NULL_TREE,
7780 long_long_unsigned_type_node,
7781 endlink)));
7782 /* Normal vector unops. */
7783 tree v4sf_ftype_v4sf
7784 = build_function_type (V4SF_type_node,
7785 tree_cons (NULL_TREE, V4SF_type_node,
7786 endlink));
7788 /* Normal vector binops. */
7789 tree v4sf_ftype_v4sf_v4sf
7790 = build_function_type (V4SF_type_node,
7791 tree_cons (NULL_TREE, V4SF_type_node,
7792 tree_cons (NULL_TREE, V4SF_type_node,
7793 endlink)));
7794 tree v8qi_ftype_v8qi_v8qi
7795 = build_function_type (V8QI_type_node,
7796 tree_cons (NULL_TREE, V8QI_type_node,
7797 tree_cons (NULL_TREE, V8QI_type_node,
7798 endlink)));
7799 tree v4hi_ftype_v4hi_v4hi
7800 = build_function_type (V4HI_type_node,
7801 tree_cons (NULL_TREE, V4HI_type_node,
7802 tree_cons (NULL_TREE, V4HI_type_node,
7803 endlink)));
7804 tree v2si_ftype_v2si_v2si
7805 = build_function_type (V2SI_type_node,
7806 tree_cons (NULL_TREE, V2SI_type_node,
7807 tree_cons (NULL_TREE, V2SI_type_node,
7808 endlink)));
7809 tree ti_ftype_ti_ti
7810 = build_function_type (intTI_type_node,
7811 tree_cons (NULL_TREE, intTI_type_node,
7812 tree_cons (NULL_TREE, intTI_type_node,
7813 endlink)));
7814 tree di_ftype_di_di
7815 = build_function_type (long_long_unsigned_type_node,
7816 tree_cons (NULL_TREE, long_long_unsigned_type_node,
7817 tree_cons (NULL_TREE,
7818 long_long_unsigned_type_node,
7819 endlink)));
7821 /* Add all builtins that are more or less simple operations on two
7822 operands. */
7823 for (i = 0, d = bdesc_2arg; i < sizeof (bdesc_2arg) / sizeof *d; i++, d++)
7825 /* Use one of the operands; the target can have a different mode for
7826 mask-generating compares. */
7827 enum machine_mode mode;
7828 tree type;
7830 if (d->name == 0)
7831 continue;
7832 mode = insn_data[d->icode].operand[1].mode;
7834 if (! TARGET_SSE && ! VALID_MMX_REG_MODE (mode))
7835 continue;
7837 switch (mode)
7839 case V4SFmode:
7840 type = v4sf_ftype_v4sf_v4sf;
7841 break;
7842 case V8QImode:
7843 type = v8qi_ftype_v8qi_v8qi;
7844 break;
7845 case V4HImode:
7846 type = v4hi_ftype_v4hi_v4hi;
7847 break;
7848 case V2SImode:
7849 type = v2si_ftype_v2si_v2si;
7850 break;
7851 case TImode:
7852 type = ti_ftype_ti_ti;
7853 break;
7854 case DImode:
7855 type = di_ftype_di_di;
7856 break;
7858 default:
7859 abort ();
7862 /* Override for comparisons. */
7863 if (d->icode == CODE_FOR_maskcmpv4sf3
7864 || d->icode == CODE_FOR_maskncmpv4sf3
7865 || d->icode == CODE_FOR_vmmaskcmpv4sf3
7866 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
7867 type = v4si_ftype_v4sf_v4sf;
7869 def_builtin (d->name, type, d->code);
7872 /* Add the remaining MMX insns with somewhat more complicated types. */
7873 def_builtin ("__builtin_ia32_m_from_int", v2si_ftype_int, IX86_BUILTIN_M_FROM_INT);
7874 def_builtin ("__builtin_ia32_m_to_int", int_ftype_v2si, IX86_BUILTIN_M_TO_INT);
7875 def_builtin ("__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
7876 def_builtin ("__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
7877 def_builtin ("__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
7878 def_builtin ("__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
7879 def_builtin ("__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
7880 def_builtin ("__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
7881 def_builtin ("__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
7883 def_builtin ("__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
7884 def_builtin ("__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
7885 def_builtin ("__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
7887 def_builtin ("__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
7888 def_builtin ("__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
7890 def_builtin ("__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
7891 def_builtin ("__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
7893 /* Everything beyond this point is SSE only. */
7894 if (! TARGET_SSE)
7895 return;
7897 /* comi/ucomi insns. */
7898 for (i = 0, d = bdesc_comi; i < sizeof (bdesc_comi) / sizeof *d; i++, d++)
7899 def_builtin (d->name, int_ftype_v4sf_v4sf, d->code);
7901 def_builtin ("__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
7902 def_builtin ("__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
7903 def_builtin ("__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
7905 def_builtin ("__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
7906 def_builtin ("__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
7907 def_builtin ("__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
7908 def_builtin ("__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
7909 def_builtin ("__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
7910 def_builtin ("__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
7912 def_builtin ("__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
7913 def_builtin ("__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
7915 def_builtin ("__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
7917 def_builtin ("__builtin_ia32_loadaps", v4sf_ftype_pfloat, IX86_BUILTIN_LOADAPS);
7918 def_builtin ("__builtin_ia32_loadups", v4sf_ftype_pfloat, IX86_BUILTIN_LOADUPS);
7919 def_builtin ("__builtin_ia32_loadss", v4sf_ftype_pfloat, IX86_BUILTIN_LOADSS);
7920 def_builtin ("__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
7921 def_builtin ("__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
7922 def_builtin ("__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
7924 def_builtin ("__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
7925 def_builtin ("__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
7926 def_builtin ("__builtin_ia32_storehps", v4sf_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
7927 def_builtin ("__builtin_ia32_storelps", v4sf_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
7929 def_builtin ("__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
7930 def_builtin ("__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
7931 def_builtin ("__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
7932 def_builtin ("__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
7934 def_builtin ("__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
7935 def_builtin ("__builtin_ia32_prefetch", void_ftype_pchar_int, IX86_BUILTIN_PREFETCH);
7937 def_builtin ("__builtin_ia32_psadbw", v4hi_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
7939 def_builtin ("__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
7940 def_builtin ("__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
7941 def_builtin ("__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
7942 def_builtin ("__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
7943 def_builtin ("__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
7944 def_builtin ("__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
7946 def_builtin ("__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
7948 /* Composite intrinsics. */
7949 def_builtin ("__builtin_ia32_setps1", v4sf_ftype_float, IX86_BUILTIN_SETPS1);
7950 def_builtin ("__builtin_ia32_setps", v4sf_ftype_float_float_float_float, IX86_BUILTIN_SETPS);
7951 def_builtin ("__builtin_ia32_setzerops", ti_ftype_void, IX86_BUILTIN_CLRPS);
7952 def_builtin ("__builtin_ia32_loadps1", v4sf_ftype_pfloat, IX86_BUILTIN_LOADPS1);
7953 def_builtin ("__builtin_ia32_loadrps", v4sf_ftype_pfloat, IX86_BUILTIN_LOADRPS);
7954 def_builtin ("__builtin_ia32_storeps1", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREPS1);
7955 def_builtin ("__builtin_ia32_storerps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORERPS);
7958 /* Errors in the source file can cause expand_expr to return const0_rtx
7959 where we expect a vector. To avoid crashing, use one of the vector
7960 clear instructions. */
7961 static rtx
7962 safe_vector_operand (x, mode)
7963 rtx x;
7964 enum machine_mode mode;
7966 if (x != const0_rtx)
7967 return x;
7968 x = gen_reg_rtx (mode);
7970 if (VALID_MMX_REG_MODE (mode))
7971 emit_insn (gen_mmx_clrdi (mode == DImode ? x
7972 : gen_rtx_SUBREG (DImode, x, 0)));
7973 else
7974 emit_insn (gen_sse_clrti (mode == TImode ? x
7975 : gen_rtx_SUBREG (TImode, x, 0)));
7976 return x;
7979 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
7981 static rtx
7982 ix86_expand_binop_builtin (icode, arglist, target)
7983 enum insn_code icode;
7984 tree arglist;
7985 rtx target;
7987 rtx pat;
7988 tree arg0 = TREE_VALUE (arglist);
7989 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
7990 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
7991 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
7992 enum machine_mode tmode = insn_data[icode].operand[0].mode;
7993 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
7994 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
7996 if (VECTOR_MODE_P (mode0))
7997 op0 = safe_vector_operand (op0, mode0);
7998 if (VECTOR_MODE_P (mode1))
7999 op1 = safe_vector_operand (op1, mode1);
8001 if (! target
8002 || GET_MODE (target) != tmode
8003 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
8004 target = gen_reg_rtx (tmode);
8006 /* In case the insn wants input operands in modes different from
8007 the result, abort. */
8008 if (GET_MODE (op0) != mode0 || GET_MODE (op1) != mode1)
8009 abort ();
8011 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
8012 op0 = copy_to_mode_reg (mode0, op0);
8013 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
8014 op1 = copy_to_mode_reg (mode1, op1);
8016 pat = GEN_FCN (icode) (target, op0, op1);
8017 if (! pat)
8018 return 0;
8019 emit_insn (pat);
8020 return target;
8023 /* Subroutine of ix86_expand_builtin to take care of stores. */
8025 static rtx
8026 ix86_expand_store_builtin (icode, arglist, shuffle)
8027 enum insn_code icode;
8028 tree arglist;
8029 int shuffle;
8031 rtx pat;
8032 tree arg0 = TREE_VALUE (arglist);
8033 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8034 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8035 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8036 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
8037 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
8039 if (VECTOR_MODE_P (mode1))
8040 op1 = safe_vector_operand (op1, mode1);
8042 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
8043 if (shuffle >= 0 || ! (*insn_data[icode].operand[1].predicate) (op1, mode1))
8044 op1 = copy_to_mode_reg (mode1, op1);
8045 if (shuffle >= 0)
8046 emit_insn (gen_sse_shufps (op1, op1, op1, GEN_INT (shuffle)));
8047 pat = GEN_FCN (icode) (op0, op1);
8048 if (pat)
8049 emit_insn (pat);
8050 return 0;
8053 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
8055 static rtx
8056 ix86_expand_unop_builtin (icode, arglist, target, do_load)
8057 enum insn_code icode;
8058 tree arglist;
8059 rtx target;
8060 int do_load;
8062 rtx pat;
8063 tree arg0 = TREE_VALUE (arglist);
8064 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8065 enum machine_mode tmode = insn_data[icode].operand[0].mode;
8066 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
8068 if (! target
8069 || GET_MODE (target) != tmode
8070 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
8071 target = gen_reg_rtx (tmode);
8072 if (do_load)
8073 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
8074 else
8076 if (VECTOR_MODE_P (mode0))
8077 op0 = safe_vector_operand (op0, mode0);
8079 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
8080 op0 = copy_to_mode_reg (mode0, op0);
8083 pat = GEN_FCN (icode) (target, op0);
8084 if (! pat)
8085 return 0;
8086 emit_insn (pat);
8087 return target;
8090 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
8091 sqrtss, rsqrtss, rcpss. */
8093 static rtx
8094 ix86_expand_unop1_builtin (icode, arglist, target)
8095 enum insn_code icode;
8096 tree arglist;
8097 rtx target;
8099 rtx pat;
8100 tree arg0 = TREE_VALUE (arglist);
8101 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8102 enum machine_mode tmode = insn_data[icode].operand[0].mode;
8103 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
8105 if (! target
8106 || GET_MODE (target) != tmode
8107 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
8108 target = gen_reg_rtx (tmode);
8110 if (VECTOR_MODE_P (mode0))
8111 op0 = safe_vector_operand (op0, mode0);
8113 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
8114 op0 = copy_to_mode_reg (mode0, op0);
8116 pat = GEN_FCN (icode) (target, op0, op0);
8117 if (! pat)
8118 return 0;
8119 emit_insn (pat);
8120 return target;
8123 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
8125 static rtx
8126 ix86_expand_sse_compare (d, arglist, target)
8127 struct builtin_description *d;
8128 tree arglist;
8129 rtx target;
8131 rtx pat;
8132 tree arg0 = TREE_VALUE (arglist);
8133 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8134 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8135 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8136 rtx op2;
8137 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
8138 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
8139 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
8140 enum rtx_code comparison = d->comparison;
8142 if (VECTOR_MODE_P (mode0))
8143 op0 = safe_vector_operand (op0, mode0);
8144 if (VECTOR_MODE_P (mode1))
8145 op1 = safe_vector_operand (op1, mode1);
8147 /* Swap operands if we have a comparison that isn't available in
8148 hardware. */
8149 if (d->flag)
8151 target = gen_reg_rtx (tmode);
8152 emit_move_insn (target, op1);
8153 op1 = op0;
8154 op0 = target;
8155 comparison = swap_condition (comparison);
8157 else if (! target
8158 || GET_MODE (target) != tmode
8159 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
8160 target = gen_reg_rtx (tmode);
8162 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
8163 op0 = copy_to_mode_reg (mode0, op0);
8164 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
8165 op1 = copy_to_mode_reg (mode1, op1);
8167 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
8168 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
8169 if (! pat)
8170 return 0;
8171 emit_insn (pat);
8172 return target;
8175 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
8177 static rtx
8178 ix86_expand_sse_comi (d, arglist, target)
8179 struct builtin_description *d;
8180 tree arglist;
8181 rtx target;
8183 rtx pat;
8184 tree arg0 = TREE_VALUE (arglist);
8185 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8186 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8187 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8188 rtx op2;
8189 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
8190 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
8191 enum rtx_code comparison = d->comparison;
8193 if (VECTOR_MODE_P (mode0))
8194 op0 = safe_vector_operand (op0, mode0);
8195 if (VECTOR_MODE_P (mode1))
8196 op1 = safe_vector_operand (op1, mode1);
8198 /* Swap operands if we have a comparison that isn't available in
8199 hardware. */
8200 if (d->flag)
8202 rtx tmp = op1;
8203 op1 = op0;
8204 op0 = tmp;
8205 comparison = swap_condition (comparison);
8208 target = gen_reg_rtx (SImode);
8209 emit_move_insn (target, const0_rtx);
8210 target = gen_rtx_SUBREG (QImode, target, 0);
8212 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
8213 op0 = copy_to_mode_reg (mode0, op0);
8214 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
8215 op1 = copy_to_mode_reg (mode1, op1);
8217 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
8218 pat = GEN_FCN (d->icode) (op0, op1, op2);
8219 if (! pat)
8220 return 0;
8221 emit_insn (pat);
8222 emit_insn (gen_setcc_2 (target, op2));
8224 return target;
8227 /* Expand an expression EXP that calls a built-in function,
8228 with result going to TARGET if that's convenient
8229 (and in mode MODE if that's convenient).
8230 SUBTARGET may be used as the target for computing one of EXP's operands.
8231 IGNORE is nonzero if the value is to be ignored. */
8234 ix86_expand_builtin (exp, target, subtarget, mode, ignore)
8235 tree exp;
8236 rtx target;
8237 rtx subtarget ATTRIBUTE_UNUSED;
8238 enum machine_mode mode ATTRIBUTE_UNUSED;
8239 int ignore ATTRIBUTE_UNUSED;
8241 struct builtin_description *d;
8242 size_t i;
8243 enum insn_code icode;
8244 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
8245 tree arglist = TREE_OPERAND (exp, 1);
8246 tree arg0, arg1, arg2, arg3;
8247 rtx op0, op1, op2, pat;
8248 enum machine_mode tmode, mode0, mode1, mode2;
8249 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
8251 switch (fcode)
8253 case IX86_BUILTIN_EMMS:
8254 emit_insn (gen_emms ());
8255 return 0;
8257 case IX86_BUILTIN_SFENCE:
8258 emit_insn (gen_sfence ());
8259 return 0;
8261 case IX86_BUILTIN_M_FROM_INT:
8262 target = gen_reg_rtx (DImode);
8263 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
8264 emit_move_insn (gen_rtx_SUBREG (SImode, target, 0), op0);
8265 return target;
8267 case IX86_BUILTIN_M_TO_INT:
8268 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
8269 op0 = copy_to_mode_reg (DImode, op0);
8270 target = gen_reg_rtx (SImode);
8271 emit_move_insn (target, gen_rtx_SUBREG (SImode, op0, 0));
8272 return target;
8274 case IX86_BUILTIN_PEXTRW:
8275 icode = CODE_FOR_mmx_pextrw;
8276 arg0 = TREE_VALUE (arglist);
8277 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8278 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8279 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8280 tmode = insn_data[icode].operand[0].mode;
8281 mode0 = insn_data[icode].operand[1].mode;
8282 mode1 = insn_data[icode].operand[2].mode;
8284 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
8285 op0 = copy_to_mode_reg (mode0, op0);
8286 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
8288 /* @@@ better error message */
8289 error ("selector must be an immediate");
8290 return const0_rtx;
8292 if (target == 0
8293 || GET_MODE (target) != tmode
8294 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
8295 target = gen_reg_rtx (tmode);
8296 pat = GEN_FCN (icode) (target, op0, op1);
8297 if (! pat)
8298 return 0;
8299 emit_insn (pat);
8300 return target;
8302 case IX86_BUILTIN_PINSRW:
8303 icode = CODE_FOR_mmx_pinsrw;
8304 arg0 = TREE_VALUE (arglist);
8305 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8306 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
8307 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8308 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8309 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
8310 tmode = insn_data[icode].operand[0].mode;
8311 mode0 = insn_data[icode].operand[1].mode;
8312 mode1 = insn_data[icode].operand[2].mode;
8313 mode2 = insn_data[icode].operand[3].mode;
8315 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
8316 op0 = copy_to_mode_reg (mode0, op0);
8317 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
8318 op1 = copy_to_mode_reg (mode1, op1);
8319 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
8321 /* @@@ better error message */
8322 error ("selector must be an immediate");
8323 return const0_rtx;
8325 if (target == 0
8326 || GET_MODE (target) != tmode
8327 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
8328 target = gen_reg_rtx (tmode);
8329 pat = GEN_FCN (icode) (target, op0, op1, op2);
8330 if (! pat)
8331 return 0;
8332 emit_insn (pat);
8333 return target;
8335 case IX86_BUILTIN_MASKMOVQ:
8336 icode = CODE_FOR_mmx_maskmovq;
8337 /* Note the arg order is different from the operand order. */
8338 arg1 = TREE_VALUE (arglist);
8339 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
8340 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
8341 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8342 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8343 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
8344 mode0 = insn_data[icode].operand[0].mode;
8345 mode1 = insn_data[icode].operand[1].mode;
8346 mode2 = insn_data[icode].operand[2].mode;
8348 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
8349 op0 = copy_to_mode_reg (mode0, op0);
8350 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
8351 op1 = copy_to_mode_reg (mode1, op1);
8352 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
8353 op2 = copy_to_mode_reg (mode2, op2);
8354 pat = GEN_FCN (icode) (op0, op1, op2);
8355 if (! pat)
8356 return 0;
8357 emit_insn (pat);
8358 return 0;
8360 case IX86_BUILTIN_SQRTSS:
8361 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
8362 case IX86_BUILTIN_RSQRTSS:
8363 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
8364 case IX86_BUILTIN_RCPSS:
8365 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
8367 case IX86_BUILTIN_LOADAPS:
8368 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
8370 case IX86_BUILTIN_LOADUPS:
8371 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
8373 case IX86_BUILTIN_STOREAPS:
8374 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist, -1);
8375 case IX86_BUILTIN_STOREUPS:
8376 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist, -1);
8378 case IX86_BUILTIN_LOADSS:
8379 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
8381 case IX86_BUILTIN_STORESS:
8382 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist, -1);
8384 case IX86_BUILTIN_LOADHPS:
8385 case IX86_BUILTIN_LOADLPS:
8386 icode = (fcode == IX86_BUILTIN_LOADHPS
8387 ? CODE_FOR_sse_movhps : CODE_FOR_sse_movlps);
8388 arg0 = TREE_VALUE (arglist);
8389 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8390 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8391 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8392 tmode = insn_data[icode].operand[0].mode;
8393 mode0 = insn_data[icode].operand[1].mode;
8394 mode1 = insn_data[icode].operand[2].mode;
8396 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
8397 op0 = copy_to_mode_reg (mode0, op0);
8398 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
8399 if (target == 0
8400 || GET_MODE (target) != tmode
8401 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
8402 target = gen_reg_rtx (tmode);
8403 pat = GEN_FCN (icode) (target, op0, op1);
8404 if (! pat)
8405 return 0;
8406 emit_insn (pat);
8407 return target;
8409 case IX86_BUILTIN_STOREHPS:
8410 case IX86_BUILTIN_STORELPS:
8411 icode = (fcode == IX86_BUILTIN_STOREHPS
8412 ? CODE_FOR_sse_movhps : CODE_FOR_sse_movlps);
8413 arg0 = TREE_VALUE (arglist);
8414 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8415 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8416 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8417 mode0 = insn_data[icode].operand[1].mode;
8418 mode1 = insn_data[icode].operand[2].mode;
8420 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
8421 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
8422 op1 = copy_to_mode_reg (mode1, op1);
8424 pat = GEN_FCN (icode) (op0, op0, op1);
8425 if (! pat)
8426 return 0;
8427 emit_insn (pat);
8428 return 0;
8430 case IX86_BUILTIN_MOVNTPS:
8431 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist, -1);
8432 case IX86_BUILTIN_MOVNTQ:
8433 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist, -1);
8435 case IX86_BUILTIN_LDMXCSR:
8436 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
8437 target = assign_386_stack_local (SImode, 0);
8438 emit_move_insn (target, op0);
8439 emit_insn (gen_ldmxcsr (target));
8440 return 0;
8442 case IX86_BUILTIN_STMXCSR:
8443 target = assign_386_stack_local (SImode, 0);
8444 emit_insn (gen_stmxcsr (target));
8445 return copy_to_mode_reg (SImode, target);
8447 case IX86_BUILTIN_PREFETCH:
8448 icode = CODE_FOR_prefetch;
8449 arg0 = TREE_VALUE (arglist);
8450 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8451 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8452 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8453 mode0 = insn_data[icode].operand[0].mode;
8454 mode1 = insn_data[icode].operand[1].mode;
8456 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
8458 /* @@@ better error message */
8459 error ("selector must be an immediate");
8460 return const0_rtx;
8463 op0 = copy_to_mode_reg (Pmode, op0);
8464 pat = GEN_FCN (icode) (op0, op1);
8465 if (! pat)
8466 return 0;
8467 emit_insn (pat);
8468 return target;
8470 case IX86_BUILTIN_SHUFPS:
8471 icode = CODE_FOR_sse_shufps;
8472 arg0 = TREE_VALUE (arglist);
8473 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8474 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
8475 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8476 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8477 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
8478 tmode = insn_data[icode].operand[0].mode;
8479 mode0 = insn_data[icode].operand[1].mode;
8480 mode1 = insn_data[icode].operand[2].mode;
8481 mode2 = insn_data[icode].operand[3].mode;
8483 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
8484 op0 = copy_to_mode_reg (mode0, op0);
8485 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
8486 op1 = copy_to_mode_reg (mode1, op1);
8487 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
8489 /* @@@ better error message */
8490 error ("mask must be an immediate");
8491 return const0_rtx;
8493 if (target == 0
8494 || GET_MODE (target) != tmode
8495 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
8496 target = gen_reg_rtx (tmode);
8497 pat = GEN_FCN (icode) (target, op0, op1, op2);
8498 if (! pat)
8499 return 0;
8500 emit_insn (pat);
8501 return target;
8503 case IX86_BUILTIN_PSHUFW:
8504 icode = CODE_FOR_mmx_pshufw;
8505 arg0 = TREE_VALUE (arglist);
8506 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8507 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8508 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8509 tmode = insn_data[icode].operand[0].mode;
8510 mode0 = insn_data[icode].operand[2].mode;
8511 mode1 = insn_data[icode].operand[3].mode;
8513 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
8514 op0 = copy_to_mode_reg (mode0, op0);
8515 if (! (*insn_data[icode].operand[3].predicate) (op1, mode1))
8517 /* @@@ better error message */
8518 error ("mask must be an immediate");
8519 return const0_rtx;
8521 if (target == 0
8522 || GET_MODE (target) != tmode
8523 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
8524 target = gen_reg_rtx (tmode);
8525 pat = GEN_FCN (icode) (target, target, op0, op1);
8526 if (! pat)
8527 return 0;
8528 emit_insn (pat);
8529 return target;
8531 /* Composite intrinsics. */
8532 case IX86_BUILTIN_SETPS1:
8533 target = assign_386_stack_local (SFmode, 0);
8534 arg0 = TREE_VALUE (arglist);
8535 emit_move_insn (change_address (target, SFmode, XEXP (target, 0)),
8536 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
8537 op0 = gen_reg_rtx (V4SFmode);
8538 emit_insn (gen_sse_loadss (op0, change_address (target, V4SFmode,
8539 XEXP (target, 0))));
8540 emit_insn (gen_sse_shufps (op0, op0, op0, GEN_INT (0)));
8541 return op0;
8543 case IX86_BUILTIN_SETPS:
8544 target = assign_386_stack_local (V4SFmode, 0);
8545 op0 = change_address (target, SFmode, XEXP (target, 0));
8546 arg0 = TREE_VALUE (arglist);
8547 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8548 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
8549 arg3 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
8550 emit_move_insn (op0,
8551 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
8552 emit_move_insn (adj_offsettable_operand (op0, 4),
8553 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
8554 emit_move_insn (adj_offsettable_operand (op0, 8),
8555 expand_expr (arg2, NULL_RTX, VOIDmode, 0));
8556 emit_move_insn (adj_offsettable_operand (op0, 12),
8557 expand_expr (arg3, NULL_RTX, VOIDmode, 0));
8558 op0 = gen_reg_rtx (V4SFmode);
8559 emit_insn (gen_sse_movaps (op0, target));
8560 return op0;
8562 case IX86_BUILTIN_CLRPS:
8563 target = gen_reg_rtx (TImode);
8564 emit_insn (gen_sse_clrti (target));
8565 return target;
8567 case IX86_BUILTIN_LOADRPS:
8568 target = ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist,
8569 gen_reg_rtx (V4SFmode), 1);
8570 emit_insn (gen_sse_shufps (target, target, target, GEN_INT (0x1b)));
8571 return target;
8573 case IX86_BUILTIN_LOADPS1:
8574 target = ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist,
8575 gen_reg_rtx (V4SFmode), 1);
8576 emit_insn (gen_sse_shufps (target, target, target, const0_rtx));
8577 return target;
8579 case IX86_BUILTIN_STOREPS1:
8580 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist, 0);
8581 case IX86_BUILTIN_STORERPS:
8582 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist, 0x1B);
8584 case IX86_BUILTIN_MMX_ZERO:
8585 target = gen_reg_rtx (DImode);
8586 emit_insn (gen_mmx_clrdi (target));
8587 return target;
8589 default:
8590 break;
8593 for (i = 0, d = bdesc_2arg; i < sizeof (bdesc_2arg) / sizeof *d; i++, d++)
8594 if (d->code == fcode)
8596 /* Compares are treated specially. */
8597 if (d->icode == CODE_FOR_maskcmpv4sf3
8598 || d->icode == CODE_FOR_vmmaskcmpv4sf3
8599 || d->icode == CODE_FOR_maskncmpv4sf3
8600 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
8601 return ix86_expand_sse_compare (d, arglist, target);
8603 return ix86_expand_binop_builtin (d->icode, arglist, target);
8606 for (i = 0, d = bdesc_1arg; i < sizeof (bdesc_1arg) / sizeof *d; i++, d++)
8607 if (d->code == fcode)
8608 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
8610 for (i = 0, d = bdesc_comi; i < sizeof (bdesc_comi) / sizeof *d; i++, d++)
8611 if (d->code == fcode)
8612 return ix86_expand_sse_comi (d, arglist, target);
8614 /* @@@ Should really do something sensible here. */
8615 return 0;
8618 /* Store OPERAND to the memory after reload is completed. This means
8619 that we can't easilly use assign_stack_local. */
8621 ix86_force_to_memory (mode, operand)
8622 enum machine_mode mode;
8623 rtx operand;
8625 if (!reload_completed)
8626 abort ();
8627 switch (mode)
8629 case DImode:
8631 rtx operands[2];
8632 split_di (&operand, 1, operands, operands+1);
8633 emit_insn (
8634 gen_rtx_SET (VOIDmode,
8635 gen_rtx_MEM (SImode,
8636 gen_rtx_PRE_DEC (Pmode,
8637 stack_pointer_rtx)),
8638 operands[1]));
8639 emit_insn (
8640 gen_rtx_SET (VOIDmode,
8641 gen_rtx_MEM (SImode,
8642 gen_rtx_PRE_DEC (Pmode,
8643 stack_pointer_rtx)),
8644 operands[0]));
8646 break;
8647 case HImode:
8648 /* It is better to store HImodes as SImodes. */
8649 if (!TARGET_PARTIAL_REG_STALL)
8650 operand = gen_lowpart (SImode, operand);
8651 /* FALLTHRU */
8652 case SImode:
8653 emit_insn (
8654 gen_rtx_SET (VOIDmode,
8655 gen_rtx_MEM (GET_MODE (operand),
8656 gen_rtx_PRE_DEC (SImode,
8657 stack_pointer_rtx)),
8658 operand));
8659 break;
8660 default:
8661 abort();
8663 return gen_rtx_MEM (mode, stack_pointer_rtx);
8666 /* Free operand from the memory. */
8667 void
8668 ix86_free_from_memory (mode)
8669 enum machine_mode mode;
8671 /* Use LEA to deallocate stack space. In peephole2 it will be converted
8672 to pop or add instruction if registers are available. */
8673 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
8674 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
8675 GEN_INT (mode == DImode
8677 : mode == HImode && TARGET_PARTIAL_REG_STALL
8679 : 4))));