1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001
3 Free Software Foundation, Inc.
5 This file is part of GNU CC.
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
29 #include "hard-reg-set.h"
31 #include "insn-config.h"
32 #include "conditions.h"
33 #include "insn-flags.h"
35 #include "insn-attr.h"
42 #include "basic-block.h"
45 #ifndef CHECK_STACK_LIMIT
46 #define CHECK_STACK_LIMIT -1
49 /* Processor costs (relative to an add) */
50 struct processor_costs i386_cost
= { /* 386 specific costs */
51 1, /* cost of an add instruction */
52 1, /* cost of a lea instruction */
53 3, /* variable shift costs */
54 2, /* constant shift costs */
55 6, /* cost of starting a multiply */
56 1, /* cost of multiply per each bit set */
57 23, /* cost of a divide/mod */
58 15, /* "large" insn */
60 4, /* cost for loading QImode using movzbl */
61 {2, 4, 2}, /* cost of loading integer registers
62 in QImode, HImode and SImode.
63 Relative to reg-reg move (2). */
64 {2, 4, 2}, /* cost of storing integer registers */
65 2, /* cost of reg,reg fld/fst */
66 {8, 8, 8}, /* cost of loading fp registers
67 in SFmode, DFmode and XFmode */
68 {8, 8, 8} /* cost of loading integer registers */
71 struct processor_costs i486_cost
= { /* 486 specific costs */
72 1, /* cost of an add instruction */
73 1, /* cost of a lea instruction */
74 3, /* variable shift costs */
75 2, /* constant shift costs */
76 12, /* cost of starting a multiply */
77 1, /* cost of multiply per each bit set */
78 40, /* cost of a divide/mod */
79 15, /* "large" insn */
81 4, /* cost for loading QImode using movzbl */
82 {2, 4, 2}, /* cost of loading integer registers
83 in QImode, HImode and SImode.
84 Relative to reg-reg move (2). */
85 {2, 4, 2}, /* cost of storing integer registers */
86 2, /* cost of reg,reg fld/fst */
87 {8, 8, 8}, /* cost of loading fp registers
88 in SFmode, DFmode and XFmode */
89 {8, 8, 8} /* cost of loading integer registers */
92 struct processor_costs pentium_cost
= {
93 1, /* cost of an add instruction */
94 1, /* cost of a lea instruction */
95 4, /* variable shift costs */
96 1, /* constant shift costs */
97 11, /* cost of starting a multiply */
98 0, /* cost of multiply per each bit set */
99 25, /* cost of a divide/mod */
100 8, /* "large" insn */
102 6, /* cost for loading QImode using movzbl */
103 {2, 4, 2}, /* cost of loading integer registers
104 in QImode, HImode and SImode.
105 Relative to reg-reg move (2). */
106 {2, 4, 2}, /* cost of storing integer registers */
107 2, /* cost of reg,reg fld/fst */
108 {2, 2, 6}, /* cost of loading fp registers
109 in SFmode, DFmode and XFmode */
110 {4, 4, 6} /* cost of loading integer registers */
113 struct processor_costs pentiumpro_cost
= {
114 1, /* cost of an add instruction */
115 1, /* cost of a lea instruction */
116 1, /* variable shift costs */
117 1, /* constant shift costs */
118 4, /* cost of starting a multiply */
119 0, /* cost of multiply per each bit set */
120 17, /* cost of a divide/mod */
121 8, /* "large" insn */
123 2, /* cost for loading QImode using movzbl */
124 {4, 4, 4}, /* cost of loading integer registers
125 in QImode, HImode and SImode.
126 Relative to reg-reg move (2). */
127 {2, 2, 2}, /* cost of storing integer registers */
128 2, /* cost of reg,reg fld/fst */
129 {2, 2, 6}, /* cost of loading fp registers
130 in SFmode, DFmode and XFmode */
131 {4, 4, 6} /* cost of loading integer registers */
134 struct processor_costs k6_cost
= {
135 1, /* cost of an add instruction */
136 2, /* cost of a lea instruction */
137 1, /* variable shift costs */
138 1, /* constant shift costs */
139 3, /* cost of starting a multiply */
140 0, /* cost of multiply per each bit set */
141 18, /* cost of a divide/mod */
142 8, /* "large" insn */
144 3, /* cost for loading QImode using movzbl */
145 {4, 5, 4}, /* cost of loading integer registers
146 in QImode, HImode and SImode.
147 Relative to reg-reg move (2). */
148 {2, 3, 2}, /* cost of storing integer registers */
149 4, /* cost of reg,reg fld/fst */
150 {6, 6, 6}, /* cost of loading fp registers
151 in SFmode, DFmode and XFmode */
152 {4, 4, 4} /* cost of loading integer registers */
155 struct processor_costs athlon_cost
= {
156 1, /* cost of an add instruction */
157 2, /* cost of a lea instruction */
158 1, /* variable shift costs */
159 1, /* constant shift costs */
160 5, /* cost of starting a multiply */
161 0, /* cost of multiply per each bit set */
162 42, /* cost of a divide/mod */
163 8, /* "large" insn */
165 4, /* cost for loading QImode using movzbl */
166 {4, 5, 4}, /* cost of loading integer registers
167 in QImode, HImode and SImode.
168 Relative to reg-reg move (2). */
169 {2, 3, 2}, /* cost of storing integer registers */
170 4, /* cost of reg,reg fld/fst */
171 {6, 6, 20}, /* cost of loading fp registers
172 in SFmode, DFmode and XFmode */
173 {4, 4, 16} /* cost of loading integer registers */
176 struct processor_costs
*ix86_cost
= &pentium_cost
;
178 /* Processor feature/optimization bitmasks. */
179 #define m_386 (1<<PROCESSOR_I386)
180 #define m_486 (1<<PROCESSOR_I486)
181 #define m_PENT (1<<PROCESSOR_PENTIUM)
182 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
183 #define m_K6 (1<<PROCESSOR_K6)
184 #define m_ATHLON (1<<PROCESSOR_ATHLON)
186 const int x86_use_leave
= m_386
| m_K6
| m_ATHLON
;
187 const int x86_push_memory
= m_386
| m_K6
| m_ATHLON
;
188 const int x86_zero_extend_with_and
= m_486
| m_PENT
;
189 const int x86_movx
= m_ATHLON
| m_PPRO
/* m_386 | m_K6 */;
190 const int x86_double_with_add
= ~m_386
;
191 const int x86_use_bit_test
= m_386
;
192 const int x86_unroll_strlen
= m_486
| m_PENT
| m_PPRO
| m_ATHLON
| m_K6
;
193 const int x86_use_q_reg
= m_PENT
| m_PPRO
| m_K6
;
194 const int x86_use_any_reg
= m_486
;
195 const int x86_cmove
= m_PPRO
| m_ATHLON
;
196 const int x86_deep_branch
= m_PPRO
| m_K6
| m_ATHLON
;
197 const int x86_use_sahf
= m_PPRO
| m_K6
;
198 const int x86_partial_reg_stall
= m_PPRO
;
199 const int x86_use_loop
= m_K6
;
200 const int x86_use_fiop
= ~(m_PPRO
| m_ATHLON
| m_PENT
);
201 const int x86_use_mov0
= m_K6
;
202 const int x86_use_cltd
= ~(m_PENT
| m_K6
);
203 const int x86_read_modify_write
= ~m_PENT
;
204 const int x86_read_modify
= ~(m_PENT
| m_PPRO
);
205 const int x86_split_long_moves
= m_PPRO
;
206 const int x86_promote_QImode
= m_K6
| m_PENT
| m_386
| m_486
;
207 const int x86_single_stringop
= m_386
;
208 const int x86_qimode_math
= ~(0);
209 const int x86_promote_qi_regs
= 0;
210 const int x86_himode_math
= ~(m_PPRO
);
211 const int x86_promote_hi_regs
= m_PPRO
;
212 const int x86_sub_esp_4
= m_ATHLON
| m_PPRO
;
213 const int x86_sub_esp_8
= m_ATHLON
| m_PPRO
| m_386
| m_486
;
214 const int x86_add_esp_4
= m_ATHLON
| m_K6
;
215 const int x86_add_esp_8
= m_ATHLON
| m_PPRO
| m_K6
| m_386
| m_486
;
216 const int x86_integer_DFmode_moves
= ~m_ATHLON
;
217 const int x86_partial_reg_dependency
= m_ATHLON
;
218 const int x86_memory_mismatch_stall
= m_ATHLON
;
220 #define AT_BP(mode) (gen_rtx_MEM ((mode), hard_frame_pointer_rtx))
222 const char * const hi_reg_name
[] = HI_REGISTER_NAMES
;
223 const char * const qi_reg_name
[] = QI_REGISTER_NAMES
;
224 const char * const qi_high_reg_name
[] = QI_HIGH_REGISTER_NAMES
;
226 /* Array of the smallest class containing reg number REGNO, indexed by
227 REGNO. Used by REGNO_REG_CLASS in i386.h. */
229 enum reg_class
const regclass_map
[FIRST_PSEUDO_REGISTER
] =
232 AREG
, DREG
, CREG
, BREG
,
234 SIREG
, DIREG
, NON_Q_REGS
, NON_Q_REGS
,
236 FP_TOP_REG
, FP_SECOND_REG
, FLOAT_REGS
, FLOAT_REGS
,
237 FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
,
240 /* flags, fpsr, dirflag, frame */
241 NO_REGS
, NO_REGS
, NO_REGS
, NON_Q_REGS
,
242 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
244 MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
,
248 /* The "default" register map. */
250 int const dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
252 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
253 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
254 -1, -1, -1, -1, /* arg, flags, fpsr, dir */
255 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
256 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
259 /* Define the register numbers to be used in Dwarf debugging information.
260 The SVR4 reference port C compiler uses the following register numbers
261 in its Dwarf output code:
262 0 for %eax (gcc regno = 0)
263 1 for %ecx (gcc regno = 2)
264 2 for %edx (gcc regno = 1)
265 3 for %ebx (gcc regno = 3)
266 4 for %esp (gcc regno = 7)
267 5 for %ebp (gcc regno = 6)
268 6 for %esi (gcc regno = 4)
269 7 for %edi (gcc regno = 5)
270 The following three DWARF register numbers are never generated by
271 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
272 believes these numbers have these meanings.
273 8 for %eip (no gcc equivalent)
274 9 for %eflags (gcc regno = 17)
275 10 for %trapno (no gcc equivalent)
276 It is not at all clear how we should number the FP stack registers
277 for the x86 architecture. If the version of SDB on x86/svr4 were
278 a bit less brain dead with respect to floating-point then we would
279 have a precedent to follow with respect to DWARF register numbers
280 for x86 FP registers, but the SDB on x86/svr4 is so completely
281 broken with respect to FP registers that it is hardly worth thinking
282 of it as something to strive for compatibility with.
283 The version of x86/svr4 SDB I have at the moment does (partially)
284 seem to believe that DWARF register number 11 is associated with
285 the x86 register %st(0), but that's about all. Higher DWARF
286 register numbers don't seem to be associated with anything in
287 particular, and even for DWARF regno 11, SDB only seems to under-
288 stand that it should say that a variable lives in %st(0) (when
289 asked via an `=' command) if we said it was in DWARF regno 11,
290 but SDB still prints garbage when asked for the value of the
291 variable in question (via a `/' command).
292 (Also note that the labels SDB prints for various FP stack regs
293 when doing an `x' command are all wrong.)
294 Note that these problems generally don't affect the native SVR4
295 C compiler because it doesn't allow the use of -O with -g and
296 because when it is *not* optimizing, it allocates a memory
297 location for each floating-point variable, and the memory
298 location is what gets described in the DWARF AT_location
299 attribute for the variable in question.
300 Regardless of the severe mental illness of the x86/svr4 SDB, we
301 do something sensible here and we use the following DWARF
302 register numbers. Note that these are all stack-top-relative
304 11 for %st(0) (gcc regno = 8)
305 12 for %st(1) (gcc regno = 9)
306 13 for %st(2) (gcc regno = 10)
307 14 for %st(3) (gcc regno = 11)
308 15 for %st(4) (gcc regno = 12)
309 16 for %st(5) (gcc regno = 13)
310 17 for %st(6) (gcc regno = 14)
311 18 for %st(7) (gcc regno = 15)
313 int const svr4_dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
315 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
316 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
317 -1, 9, -1, -1, /* arg, flags, fpsr, dir */
318 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
319 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
322 /* Test and compare insns in i386.md store the information needed to
323 generate branch and scc insns here. */
325 struct rtx_def
*ix86_compare_op0
= NULL_RTX
;
326 struct rtx_def
*ix86_compare_op1
= NULL_RTX
;
328 #define MAX_386_STACK_LOCALS 2
330 /* Define the structure for the machine field in struct function. */
331 struct machine_function
333 rtx stack_locals
[(int) MAX_MACHINE_MODE
][MAX_386_STACK_LOCALS
];
336 #define ix86_stack_locals (cfun->machine->stack_locals)
338 /* which cpu are we scheduling for */
339 enum processor_type ix86_cpu
;
341 /* which instruction set architecture to use. */
344 /* Strings to hold which cpu and instruction set architecture to use. */
345 const char *ix86_cpu_string
; /* for -mcpu=<xxx> */
346 const char *ix86_arch_string
; /* for -march=<xxx> */
348 /* Register allocation order */
349 const char *ix86_reg_alloc_order
;
350 static char regs_allocated
[FIRST_PSEUDO_REGISTER
];
352 /* # of registers to use to pass arguments. */
353 const char *ix86_regparm_string
;
355 /* ix86_regparm_string as a number */
358 /* Alignment to use for loops and jumps: */
360 /* Power of two alignment for loops. */
361 const char *ix86_align_loops_string
;
363 /* Power of two alignment for non-loop jumps. */
364 const char *ix86_align_jumps_string
;
366 /* Power of two alignment for stack boundary in bytes. */
367 const char *ix86_preferred_stack_boundary_string
;
369 /* Preferred alignment for stack boundary in bits. */
370 int ix86_preferred_stack_boundary
;
372 /* Values 1-5: see jump.c */
373 int ix86_branch_cost
;
374 const char *ix86_branch_cost_string
;
376 /* Power of two alignment for functions. */
377 int ix86_align_funcs
;
378 const char *ix86_align_funcs_string
;
380 /* Power of two alignment for loops. */
381 int ix86_align_loops
;
383 /* Power of two alignment for non-loop jumps. */
384 int ix86_align_jumps
;
386 static void output_pic_addr_const
PARAMS ((FILE *, rtx
, int));
387 static void put_condition_code
PARAMS ((enum rtx_code
, enum machine_mode
,
389 static rtx ix86_expand_int_compare
PARAMS ((enum rtx_code
, rtx
, rtx
));
390 static enum rtx_code ix86_prepare_fp_compare_args
PARAMS ((enum rtx_code
,
392 static rtx gen_push
PARAMS ((rtx
));
393 static int memory_address_length
PARAMS ((rtx addr
));
394 static int ix86_flags_dependant
PARAMS ((rtx
, rtx
, enum attr_type
));
395 static int ix86_agi_dependant
PARAMS ((rtx
, rtx
, enum attr_type
));
396 static int ix86_safe_length
PARAMS ((rtx
));
397 static enum attr_memory ix86_safe_memory
PARAMS ((rtx
));
398 static enum attr_pent_pair ix86_safe_pent_pair
PARAMS ((rtx
));
399 static enum attr_ppro_uops ix86_safe_ppro_uops
PARAMS ((rtx
));
400 static void ix86_dump_ppro_packet
PARAMS ((FILE *));
401 static void ix86_reorder_insn
PARAMS ((rtx
*, rtx
*));
402 static rtx
* ix86_pent_find_pair
PARAMS ((rtx
*, rtx
*, enum attr_pent_pair
,
404 static void ix86_init_machine_status
PARAMS ((struct function
*));
405 static void ix86_mark_machine_status
PARAMS ((struct function
*));
406 static void ix86_free_machine_status
PARAMS ((struct function
*));
407 static int ix86_split_to_parts
PARAMS ((rtx
, rtx
*, enum machine_mode
));
408 static int ix86_safe_length_prefix
PARAMS ((rtx
));
409 static HOST_WIDE_INT ix86_compute_frame_size
PARAMS((HOST_WIDE_INT
,
410 int *, int *, int *));
411 static int ix86_nsaved_regs
PARAMS((void));
412 static void ix86_emit_save_regs
PARAMS((void));
413 static void ix86_emit_restore_regs_using_mov
PARAMS ((rtx
, int));
414 static void ix86_emit_epilogue_esp_adjustment
PARAMS((int));
415 static void ix86_sched_reorder_pentium
PARAMS((rtx
*, rtx
*));
416 static void ix86_sched_reorder_ppro
PARAMS((rtx
*, rtx
*));
417 static HOST_WIDE_INT ix86_GOT_alias_set
PARAMS ((void));
421 rtx base
, index
, disp
;
425 static int ix86_decompose_address
PARAMS ((rtx
, struct ix86_address
*));
427 struct builtin_description
;
428 static rtx ix86_expand_sse_comi
PARAMS ((struct builtin_description
*, tree
,
430 static rtx ix86_expand_sse_compare
PARAMS ((struct builtin_description
*, tree
,
432 static rtx ix86_expand_unop1_builtin
PARAMS ((enum insn_code
, tree
, rtx
));
433 static rtx ix86_expand_unop_builtin
PARAMS ((enum insn_code
, tree
, rtx
, int));
434 static rtx ix86_expand_binop_builtin
PARAMS ((enum insn_code
, tree
, rtx
));
435 static rtx ix86_expand_store_builtin
PARAMS ((enum insn_code
, tree
, int));
436 static rtx safe_vector_operand
PARAMS ((rtx
, enum machine_mode
));
437 static enum rtx_code ix86_fp_compare_code_to_integer
PARAMS ((enum rtx_code
));
438 static void ix86_fp_comparison_codes
PARAMS ((enum rtx_code code
,
442 static rtx ix86_expand_fp_compare
PARAMS ((enum rtx_code
, rtx
, rtx
, rtx
,
444 static int ix86_fp_comparison_arithmetics_cost
PARAMS ((enum rtx_code code
));
445 static int ix86_fp_comparison_fcomi_cost
PARAMS ((enum rtx_code code
));
446 static int ix86_fp_comparison_sahf_cost
PARAMS ((enum rtx_code code
));
447 static int ix86_fp_comparison_cost
PARAMS ((enum rtx_code code
));
449 /* Sometimes certain combinations of command options do not make
450 sense on a particular target machine. You can define a macro
451 `OVERRIDE_OPTIONS' to take account of this. This macro, if
452 defined, is executed once just after all the command options have
455 Don't use this macro to turn on various extra optimizations for
456 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
461 /* Comes from final.c -- no real reason to change it. */
462 #define MAX_CODE_ALIGN 16
466 struct processor_costs
*cost
; /* Processor costs */
467 int target_enable
; /* Target flags to enable. */
468 int target_disable
; /* Target flags to disable. */
469 int align_loop
; /* Default alignments. */
474 const processor_target_table
[PROCESSOR_max
] =
476 {&i386_cost
, 0, 0, 2, 2, 2, 1},
477 {&i486_cost
, 0, 0, 4, 4, 4, 1},
478 {&pentium_cost
, 0, 0, -4, -4, -4, 1},
479 {&pentiumpro_cost
, 0, 0, 4, -4, 4, 1},
480 {&k6_cost
, 0, 0, -5, -5, 4, 1},
481 {&athlon_cost
, 0, 0, 4, -4, 4, 1}
486 const char *name
; /* processor name or nickname. */
487 enum processor_type processor
;
489 const processor_alias_table
[] =
491 {"i386", PROCESSOR_I386
},
492 {"i486", PROCESSOR_I486
},
493 {"i586", PROCESSOR_PENTIUM
},
494 {"pentium", PROCESSOR_PENTIUM
},
495 {"i686", PROCESSOR_PENTIUMPRO
},
496 {"pentiumpro", PROCESSOR_PENTIUMPRO
},
497 {"k6", PROCESSOR_K6
},
498 {"athlon", PROCESSOR_ATHLON
},
501 int const pta_size
= sizeof (processor_alias_table
) / sizeof (struct pta
);
503 #ifdef SUBTARGET_OVERRIDE_OPTIONS
504 SUBTARGET_OVERRIDE_OPTIONS
;
507 ix86_arch
= PROCESSOR_I386
;
508 ix86_cpu
= (enum processor_type
) TARGET_CPU_DEFAULT
;
510 if (ix86_arch_string
!= 0)
513 for (i
= 0; i
< pta_size
; i
++)
514 if (! strcmp (ix86_arch_string
, processor_alias_table
[i
].name
))
516 ix86_arch
= processor_alias_table
[i
].processor
;
517 /* Default cpu tuning to the architecture. */
518 ix86_cpu
= ix86_arch
;
522 error ("bad value (%s) for -march= switch", ix86_arch_string
);
525 if (ix86_cpu_string
!= 0)
528 for (i
= 0; i
< pta_size
; i
++)
529 if (! strcmp (ix86_cpu_string
, processor_alias_table
[i
].name
))
531 ix86_cpu
= processor_alias_table
[i
].processor
;
535 error ("bad value (%s) for -mcpu= switch", ix86_cpu_string
);
538 ix86_cost
= processor_target_table
[ix86_cpu
].cost
;
539 target_flags
|= processor_target_table
[ix86_cpu
].target_enable
;
540 target_flags
&= ~processor_target_table
[ix86_cpu
].target_disable
;
542 /* Arrange to set up i386_stack_locals for all functions. */
543 init_machine_status
= ix86_init_machine_status
;
544 mark_machine_status
= ix86_mark_machine_status
;
545 free_machine_status
= ix86_free_machine_status
;
547 /* Validate registers in register allocation order. */
548 if (ix86_reg_alloc_order
)
551 for (i
= 0; (ch
= ix86_reg_alloc_order
[i
]) != '\0'; i
++)
557 case 'a': regno
= 0; break;
558 case 'd': regno
= 1; break;
559 case 'c': regno
= 2; break;
560 case 'b': regno
= 3; break;
561 case 'S': regno
= 4; break;
562 case 'D': regno
= 5; break;
563 case 'B': regno
= 6; break;
565 default: fatal ("Register '%c' is unknown", ch
);
568 if (regs_allocated
[regno
])
569 fatal ("Register '%c' already specified in allocation order", ch
);
571 regs_allocated
[regno
] = 1;
575 /* Validate -mregparm= value. */
576 if (ix86_regparm_string
)
578 ix86_regparm
= atoi (ix86_regparm_string
);
579 if (ix86_regparm
< 0 || ix86_regparm
> REGPARM_MAX
)
580 fatal ("-mregparm=%d is not between 0 and %d",
581 ix86_regparm
, REGPARM_MAX
);
584 /* Validate -malign-loops= value, or provide default. */
585 ix86_align_loops
= processor_target_table
[ix86_cpu
].align_loop
;
586 if (ix86_align_loops_string
)
588 ix86_align_loops
= atoi (ix86_align_loops_string
);
589 if (ix86_align_loops
< 0 || ix86_align_loops
> MAX_CODE_ALIGN
)
590 fatal ("-malign-loops=%d is not between 0 and %d",
591 ix86_align_loops
, MAX_CODE_ALIGN
);
594 /* Validate -malign-jumps= value, or provide default. */
595 ix86_align_jumps
= processor_target_table
[ix86_cpu
].align_jump
;
596 if (ix86_align_jumps_string
)
598 ix86_align_jumps
= atoi (ix86_align_jumps_string
);
599 if (ix86_align_jumps
< 0 || ix86_align_jumps
> MAX_CODE_ALIGN
)
600 fatal ("-malign-jumps=%d is not between 0 and %d",
601 ix86_align_jumps
, MAX_CODE_ALIGN
);
604 /* Validate -malign-functions= value, or provide default. */
605 ix86_align_funcs
= processor_target_table
[ix86_cpu
].align_func
;
606 if (ix86_align_funcs_string
)
608 ix86_align_funcs
= atoi (ix86_align_funcs_string
);
609 if (ix86_align_funcs
< 0 || ix86_align_funcs
> MAX_CODE_ALIGN
)
610 fatal ("-malign-functions=%d is not between 0 and %d",
611 ix86_align_funcs
, MAX_CODE_ALIGN
);
614 /* Validate -mpreferred-stack-boundary= value, or provide default.
615 The default of 128 bits is for Pentium III's SSE __m128. */
616 ix86_preferred_stack_boundary
= 128;
617 if (ix86_preferred_stack_boundary_string
)
619 int i
= atoi (ix86_preferred_stack_boundary_string
);
621 fatal ("-mpreferred-stack-boundary=%d is not between 2 and 31", i
);
622 ix86_preferred_stack_boundary
= (1 << i
) * BITS_PER_UNIT
;
625 /* Validate -mbranch-cost= value, or provide default. */
626 ix86_branch_cost
= processor_target_table
[ix86_cpu
].branch_cost
;
627 if (ix86_branch_cost_string
)
629 ix86_branch_cost
= atoi (ix86_branch_cost_string
);
630 if (ix86_branch_cost
< 0 || ix86_branch_cost
> 5)
631 fatal ("-mbranch-cost=%d is not between 0 and 5",
635 /* Keep nonleaf frame pointers. */
636 if (TARGET_OMIT_LEAF_FRAME_POINTER
)
637 flag_omit_frame_pointer
= 1;
639 /* If we're doing fast math, we don't care about comparison order
640 wrt NaNs. This lets us use a shorter comparison sequence. */
642 target_flags
&= ~MASK_IEEE_FP
;
644 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
647 target_flags
|= MASK_MMX
;
650 /* A C statement (sans semicolon) to choose the order in which to
651 allocate hard registers for pseudo-registers local to a basic
654 Store the desired register order in the array `reg_alloc_order'.
655 Element 0 should be the register to allocate first; element 1, the
656 next register; and so on.
658 The macro body should not assume anything about the contents of
659 `reg_alloc_order' before execution of the macro.
661 On most machines, it is not necessary to define this macro. */
664 order_regs_for_local_alloc ()
668 /* User specified the register allocation order. */
670 if (ix86_reg_alloc_order
)
672 for (i
= order
= 0; (ch
= ix86_reg_alloc_order
[i
]) != '\0'; i
++)
678 case 'a': regno
= 0; break;
679 case 'd': regno
= 1; break;
680 case 'c': regno
= 2; break;
681 case 'b': regno
= 3; break;
682 case 'S': regno
= 4; break;
683 case 'D': regno
= 5; break;
684 case 'B': regno
= 6; break;
687 reg_alloc_order
[order
++] = regno
;
690 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
692 if (! regs_allocated
[i
])
693 reg_alloc_order
[order
++] = i
;
697 /* If user did not specify a register allocation order, use natural order. */
700 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
701 reg_alloc_order
[i
] = i
;
706 optimization_options (level
, size
)
708 int size ATTRIBUTE_UNUSED
;
710 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
711 make the problem with not enough registers even worse. */
712 #ifdef INSN_SCHEDULING
714 flag_schedule_insns
= 0;
718 /* Return nonzero if IDENTIFIER with arguments ARGS is a valid machine specific
719 attribute for DECL. The attributes in ATTRIBUTES have previously been
723 ix86_valid_decl_attribute_p (decl
, attributes
, identifier
, args
)
724 tree decl ATTRIBUTE_UNUSED
;
725 tree attributes ATTRIBUTE_UNUSED
;
726 tree identifier ATTRIBUTE_UNUSED
;
727 tree args ATTRIBUTE_UNUSED
;
732 /* Return nonzero if IDENTIFIER with arguments ARGS is a valid machine specific
733 attribute for TYPE. The attributes in ATTRIBUTES have previously been
737 ix86_valid_type_attribute_p (type
, attributes
, identifier
, args
)
739 tree attributes ATTRIBUTE_UNUSED
;
743 if (TREE_CODE (type
) != FUNCTION_TYPE
744 && TREE_CODE (type
) != METHOD_TYPE
745 && TREE_CODE (type
) != FIELD_DECL
746 && TREE_CODE (type
) != TYPE_DECL
)
749 /* Stdcall attribute says callee is responsible for popping arguments
750 if they are not variable. */
751 if (is_attribute_p ("stdcall", identifier
))
752 return (args
== NULL_TREE
);
754 /* Cdecl attribute says the callee is a normal C declaration. */
755 if (is_attribute_p ("cdecl", identifier
))
756 return (args
== NULL_TREE
);
758 /* Regparm attribute specifies how many integer arguments are to be
759 passed in registers. */
760 if (is_attribute_p ("regparm", identifier
))
764 if (! args
|| TREE_CODE (args
) != TREE_LIST
765 || TREE_CHAIN (args
) != NULL_TREE
766 || TREE_VALUE (args
) == NULL_TREE
)
769 cst
= TREE_VALUE (args
);
770 if (TREE_CODE (cst
) != INTEGER_CST
)
773 if (compare_tree_int (cst
, REGPARM_MAX
) > 0)
782 /* Return 0 if the attributes for two types are incompatible, 1 if they
783 are compatible, and 2 if they are nearly compatible (which causes a
784 warning to be generated). */
787 ix86_comp_type_attributes (type1
, type2
)
791 /* Check for mismatch of non-default calling convention. */
792 const char *rtdstr
= TARGET_RTD
? "cdecl" : "stdcall";
794 if (TREE_CODE (type1
) != FUNCTION_TYPE
)
797 /* Check for mismatched return types (cdecl vs stdcall). */
798 if (!lookup_attribute (rtdstr
, TYPE_ATTRIBUTES (type1
))
799 != !lookup_attribute (rtdstr
, TYPE_ATTRIBUTES (type2
)))
804 /* Value is the number of bytes of arguments automatically
805 popped when returning from a subroutine call.
806 FUNDECL is the declaration node of the function (as a tree),
807 FUNTYPE is the data type of the function (as a tree),
808 or for a library call it is an identifier node for the subroutine name.
809 SIZE is the number of bytes of arguments passed on the stack.
811 On the 80386, the RTD insn may be used to pop them if the number
812 of args is fixed, but if the number is variable then the caller
813 must pop them all. RTD can't be used for library calls now
814 because the library is compiled with the Unix compiler.
815 Use of RTD is a selectable option, since it is incompatible with
816 standard Unix calling sequences. If the option is not selected,
817 the caller must always pop the args.
819 The attribute stdcall is equivalent to RTD on a per module basis. */
822 ix86_return_pops_args (fundecl
, funtype
, size
)
827 int rtd
= TARGET_RTD
&& (!fundecl
|| TREE_CODE (fundecl
) != IDENTIFIER_NODE
);
829 /* Cdecl functions override -mrtd, and never pop the stack. */
830 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype
))) {
832 /* Stdcall functions will pop the stack if not variable args. */
833 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype
)))
837 && (TYPE_ARG_TYPES (funtype
) == NULL_TREE
838 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype
)))
843 /* Lose any fake structure return argument. */
844 if (aggregate_value_p (TREE_TYPE (funtype
)))
845 return GET_MODE_SIZE (Pmode
);
850 /* Argument support functions. */
852 /* Initialize a variable CUM of type CUMULATIVE_ARGS
853 for a call to a function whose data type is FNTYPE.
854 For a library call, FNTYPE is 0. */
857 init_cumulative_args (cum
, fntype
, libname
)
858 CUMULATIVE_ARGS
*cum
; /* Argument info to initialize */
859 tree fntype
; /* tree ptr for function decl */
860 rtx libname
; /* SYMBOL_REF of library name or 0 */
862 static CUMULATIVE_ARGS zero_cum
;
863 tree param
, next_param
;
865 if (TARGET_DEBUG_ARG
)
867 fprintf (stderr
, "\ninit_cumulative_args (");
869 fprintf (stderr
, "fntype code = %s, ret code = %s",
870 tree_code_name
[(int) TREE_CODE (fntype
)],
871 tree_code_name
[(int) TREE_CODE (TREE_TYPE (fntype
))]);
873 fprintf (stderr
, "no fntype");
876 fprintf (stderr
, ", libname = %s", XSTR (libname
, 0));
881 /* Set up the number of registers to use for passing arguments. */
882 cum
->nregs
= ix86_regparm
;
885 tree attr
= lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype
));
888 cum
->nregs
= TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
)));
891 /* Determine if this function has variable arguments. This is
892 indicated by the last argument being 'void_type_mode' if there
893 are no variable arguments. If there are variable arguments, then
894 we won't pass anything in registers */
898 for (param
= (fntype
) ? TYPE_ARG_TYPES (fntype
) : 0;
899 param
!= 0; param
= next_param
)
901 next_param
= TREE_CHAIN (param
);
902 if (next_param
== 0 && TREE_VALUE (param
) != void_type_node
)
907 if (TARGET_DEBUG_ARG
)
908 fprintf (stderr
, ", nregs=%d )\n", cum
->nregs
);
913 /* Update the data in CUM to advance over an argument
914 of mode MODE and data type TYPE.
915 (TYPE is null for libcalls where that information may not be available.) */
918 function_arg_advance (cum
, mode
, type
, named
)
919 CUMULATIVE_ARGS
*cum
; /* current arg information */
920 enum machine_mode mode
; /* current arg mode */
921 tree type
; /* type of the argument or 0 if lib support */
922 int named
; /* whether or not the argument was named */
925 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
926 int words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
928 if (TARGET_DEBUG_ARG
)
930 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
931 words
, cum
->words
, cum
->nregs
, GET_MODE_NAME (mode
), named
);
946 /* Define where to put the arguments to a function.
947 Value is zero to push the argument on the stack,
948 or a hard register in which to store the argument.
950 MODE is the argument's machine mode.
951 TYPE is the data type of the argument (as a tree).
952 This is null for libcalls where that information may
954 CUM is a variable of type CUMULATIVE_ARGS which gives info about
955 the preceding args and about the function being called.
956 NAMED is nonzero if this argument is a named parameter
957 (otherwise it is an extra parameter matching an ellipsis). */
960 function_arg (cum
, mode
, type
, named
)
961 CUMULATIVE_ARGS
*cum
; /* current arg information */
962 enum machine_mode mode
; /* current arg mode */
963 tree type
; /* type of the argument or 0 if lib support */
964 int named
; /* != 0 for normal args, == 0 for ... args */
968 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
969 int words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
973 /* For now, pass fp/complex values on the stack. */
982 if (words
<= cum
->nregs
)
983 ret
= gen_rtx_REG (mode
, cum
->regno
);
987 if (TARGET_DEBUG_ARG
)
990 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d",
991 words
, cum
->words
, cum
->nregs
, GET_MODE_NAME (mode
), named
);
994 fprintf (stderr
, ", reg=%%e%s", reg_names
[ REGNO(ret
) ]);
996 fprintf (stderr
, ", stack");
998 fprintf (stderr
, " )\n");
1005 /* Return nonzero if OP is (const_int 1), else return zero. */
1008 const_int_1_operand (op
, mode
)
1010 enum machine_mode mode ATTRIBUTE_UNUSED
;
1012 return (GET_CODE (op
) == CONST_INT
&& INTVAL (op
) == 1);
1015 /* Returns 1 if OP is either a symbol reference or a sum of a symbol
1016 reference and a constant. */
1019 symbolic_operand (op
, mode
)
1021 enum machine_mode mode ATTRIBUTE_UNUSED
;
1023 switch (GET_CODE (op
))
1031 if (GET_CODE (op
) == SYMBOL_REF
1032 || GET_CODE (op
) == LABEL_REF
1033 || (GET_CODE (op
) == UNSPEC
1034 && XINT (op
, 1) >= 6
1035 && XINT (op
, 1) <= 7))
1037 if (GET_CODE (op
) != PLUS
1038 || GET_CODE (XEXP (op
, 1)) != CONST_INT
)
1042 if (GET_CODE (op
) == SYMBOL_REF
1043 || GET_CODE (op
) == LABEL_REF
)
1045 /* Only @GOTOFF gets offsets. */
1046 if (GET_CODE (op
) != UNSPEC
1047 || XINT (op
, 1) != 7)
1050 op
= XVECEXP (op
, 0, 0);
1051 if (GET_CODE (op
) == SYMBOL_REF
1052 || GET_CODE (op
) == LABEL_REF
)
1061 /* Return true if the operand contains a @GOT or @GOTOFF reference. */
1064 pic_symbolic_operand (op
, mode
)
1066 enum machine_mode mode ATTRIBUTE_UNUSED
;
1068 if (GET_CODE (op
) == CONST
)
1071 if (GET_CODE (op
) == UNSPEC
)
1073 if (GET_CODE (op
) != PLUS
1074 || GET_CODE (XEXP (op
, 1)) != CONST_INT
)
1077 if (GET_CODE (op
) == UNSPEC
)
1083 /* Test for a valid operand for a call instruction. Don't allow the
1084 arg pointer register or virtual regs since they may decay into
1085 reg + const, which the patterns can't handle. */
1088 call_insn_operand (op
, mode
)
1090 enum machine_mode mode ATTRIBUTE_UNUSED
;
1092 /* Disallow indirect through a virtual register. This leads to
1093 compiler aborts when trying to eliminate them. */
1094 if (GET_CODE (op
) == REG
1095 && (op
== arg_pointer_rtx
1096 || op
== frame_pointer_rtx
1097 || (REGNO (op
) >= FIRST_PSEUDO_REGISTER
1098 && REGNO (op
) <= LAST_VIRTUAL_REGISTER
)))
1101 /* Disallow `call 1234'. Due to varying assembler lameness this
1102 gets either rejected or translated to `call .+1234'. */
1103 if (GET_CODE (op
) == CONST_INT
)
1106 /* Explicitly allow SYMBOL_REF even if pic. */
1107 if (GET_CODE (op
) == SYMBOL_REF
)
1110 /* Half-pic doesn't allow anything but registers and constants.
1111 We've just taken care of the later. */
1113 return register_operand (op
, Pmode
);
1115 /* Otherwise we can allow any general_operand in the address. */
1116 return general_operand (op
, Pmode
);
1120 constant_call_address_operand (op
, mode
)
1122 enum machine_mode mode ATTRIBUTE_UNUSED
;
1124 if (GET_CODE (op
) == CONST
1125 && GET_CODE (XEXP (op
, 0)) == PLUS
1126 && GET_CODE (XEXP (XEXP (op
, 0), 1)) == CONST_INT
)
1127 op
= XEXP (XEXP (op
, 0), 0);
1128 return GET_CODE (op
) == SYMBOL_REF
;
1131 /* Match exactly zero and one. */
1134 const0_operand (op
, mode
)
1136 enum machine_mode mode
;
1138 return op
== CONST0_RTX (mode
);
1142 const1_operand (op
, mode
)
1144 enum machine_mode mode ATTRIBUTE_UNUSED
;
1146 return op
== const1_rtx
;
1149 /* Match 2, 4, or 8. Used for leal multiplicands. */
1152 const248_operand (op
, mode
)
1154 enum machine_mode mode ATTRIBUTE_UNUSED
;
1156 return (GET_CODE (op
) == CONST_INT
1157 && (INTVAL (op
) == 2 || INTVAL (op
) == 4 || INTVAL (op
) == 8));
1160 /* True if this is a constant appropriate for an increment or decremenmt. */
1163 incdec_operand (op
, mode
)
1165 enum machine_mode mode
;
1167 if (op
== const1_rtx
|| op
== constm1_rtx
)
1169 if (GET_CODE (op
) != CONST_INT
)
1171 if (mode
== SImode
&& INTVAL (op
) == (HOST_WIDE_INT
) 0xffffffff)
1173 if (mode
== HImode
&& INTVAL (op
) == (HOST_WIDE_INT
) 0xffff)
1175 if (mode
== QImode
&& INTVAL (op
) == (HOST_WIDE_INT
) 0xff)
1180 /* Return false if this is the stack pointer, or any other fake
1181 register eliminable to the stack pointer. Otherwise, this is
1184 This is used to prevent esp from being used as an index reg.
1185 Which would only happen in pathological cases. */
1188 reg_no_sp_operand (op
, mode
)
1190 enum machine_mode mode
;
1193 if (GET_CODE (t
) == SUBREG
)
1195 if (t
== stack_pointer_rtx
|| t
== arg_pointer_rtx
|| t
== frame_pointer_rtx
)
1198 return register_operand (op
, mode
);
1202 mmx_reg_operand (op
, mode
)
1204 enum machine_mode mode ATTRIBUTE_UNUSED
;
1206 return MMX_REG_P (op
);
1209 /* Return false if this is any eliminable register. Otherwise
1213 general_no_elim_operand (op
, mode
)
1215 enum machine_mode mode
;
1218 if (GET_CODE (t
) == SUBREG
)
1220 if (t
== arg_pointer_rtx
|| t
== frame_pointer_rtx
1221 || t
== virtual_incoming_args_rtx
|| t
== virtual_stack_vars_rtx
1222 || t
== virtual_stack_dynamic_rtx
)
1225 return general_operand (op
, mode
);
1228 /* Return false if this is any eliminable register. Otherwise
1229 register_operand or const_int. */
1232 nonmemory_no_elim_operand (op
, mode
)
1234 enum machine_mode mode
;
1237 if (GET_CODE (t
) == SUBREG
)
1239 if (t
== arg_pointer_rtx
|| t
== frame_pointer_rtx
1240 || t
== virtual_incoming_args_rtx
|| t
== virtual_stack_vars_rtx
1241 || t
== virtual_stack_dynamic_rtx
)
1244 return GET_CODE (op
) == CONST_INT
|| register_operand (op
, mode
);
1247 /* Return true if op is a Q_REGS class register. */
1250 q_regs_operand (op
, mode
)
1252 enum machine_mode mode
;
1254 if (mode
!= VOIDmode
&& GET_MODE (op
) != mode
)
1256 if (GET_CODE (op
) == SUBREG
)
1257 op
= SUBREG_REG (op
);
1258 return QI_REG_P (op
);
1261 /* Return true if op is a NON_Q_REGS class register. */
1264 non_q_regs_operand (op
, mode
)
1266 enum machine_mode mode
;
1268 if (mode
!= VOIDmode
&& GET_MODE (op
) != mode
)
1270 if (GET_CODE (op
) == SUBREG
)
1271 op
= SUBREG_REG (op
);
1272 return NON_QI_REG_P (op
);
1275 /* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
1278 sse_comparison_operator (op
, mode
)
1280 enum machine_mode mode ATTRIBUTE_UNUSED
;
1282 enum rtx_code code
= GET_CODE (op
);
1283 return code
== EQ
|| code
== LT
|| code
== LE
|| code
== UNORDERED
;
1285 /* Return 1 if OP is a valid comparison operator in valid mode. */
1287 ix86_comparison_operator (op
, mode
)
1289 enum machine_mode mode
;
1291 enum machine_mode inmode
;
1292 enum rtx_code code
= GET_CODE (op
);
1293 if (mode
!= VOIDmode
&& GET_MODE (op
) != mode
)
1295 if (GET_RTX_CLASS (code
) != '<')
1297 inmode
= GET_MODE (XEXP (op
, 0));
1299 if (inmode
== CCFPmode
|| inmode
== CCFPUmode
)
1301 enum rtx_code second_code
, bypass_code
;
1302 ix86_fp_comparison_codes (code
, &bypass_code
, &code
, &second_code
);
1303 return (bypass_code
== NIL
&& second_code
== NIL
);
1310 if (inmode
== CCmode
|| inmode
== CCGCmode
1311 || inmode
== CCGOCmode
|| inmode
== CCNOmode
)
1314 case LTU
: case GTU
: case LEU
: case ORDERED
: case UNORDERED
: case GEU
:
1315 if (inmode
== CCmode
)
1319 if (inmode
== CCmode
|| inmode
== CCGCmode
|| inmode
== CCNOmode
)
1327 /* Return 1 if OP is a comparison operator that can be issued by fcmov. */
1330 fcmov_comparison_operator (op
, mode
)
1332 enum machine_mode mode
;
1334 enum machine_mode inmode
;
1335 enum rtx_code code
= GET_CODE (op
);
1336 if (mode
!= VOIDmode
&& GET_MODE (op
) != mode
)
1338 if (GET_RTX_CLASS (code
) != '<')
1340 inmode
= GET_MODE (XEXP (op
, 0));
1341 if (inmode
== CCFPmode
|| inmode
== CCFPUmode
)
1343 enum rtx_code second_code
, bypass_code
;
1344 ix86_fp_comparison_codes (code
, &bypass_code
, &code
, &second_code
);
1345 if (bypass_code
!= NIL
|| second_code
!= NIL
)
1347 code
= ix86_fp_compare_code_to_integer (code
);
1349 /* i387 supports just limited amount of conditional codes. */
1352 case LTU
: case GTU
: case LEU
: case GEU
:
1353 if (inmode
== CCmode
|| inmode
== CCFPmode
|| inmode
== CCFPUmode
)
1356 case ORDERED
: case UNORDERED
:
1364 /* Return 1 if OP is a binary operator that can be promoted to wider mode. */
1367 promotable_binary_operator (op
, mode
)
1369 enum machine_mode mode ATTRIBUTE_UNUSED
;
1371 switch (GET_CODE (op
))
1374 /* Modern CPUs have same latency for HImode and SImode multiply,
1375 but 386 and 486 do HImode multiply faster. */
1376 return ix86_cpu
> PROCESSOR_I486
;
1388 /* Nearly general operand, but accept any const_double, since we wish
1389 to be able to drop them into memory rather than have them get pulled
1393 cmp_fp_expander_operand (op
, mode
)
1395 enum machine_mode mode
;
1397 if (mode
!= VOIDmode
&& mode
!= GET_MODE (op
))
1399 if (GET_CODE (op
) == CONST_DOUBLE
)
1401 return general_operand (op
, mode
);
1404 /* Match an SI or HImode register for a zero_extract. */
1407 ext_register_operand (op
, mode
)
1409 enum machine_mode mode ATTRIBUTE_UNUSED
;
1411 if (GET_MODE (op
) != SImode
&& GET_MODE (op
) != HImode
)
1413 return register_operand (op
, VOIDmode
);
1416 /* Return 1 if this is a valid binary floating-point operation.
1417 OP is the expression matched, and MODE is its mode. */
1420 binary_fp_operator (op
, mode
)
1422 enum machine_mode mode
;
1424 if (mode
!= VOIDmode
&& mode
!= GET_MODE (op
))
1427 switch (GET_CODE (op
))
1433 return GET_MODE_CLASS (GET_MODE (op
)) == MODE_FLOAT
;
1441 mult_operator(op
, mode
)
1443 enum machine_mode mode ATTRIBUTE_UNUSED
;
1445 return GET_CODE (op
) == MULT
;
1449 div_operator(op
, mode
)
1451 enum machine_mode mode ATTRIBUTE_UNUSED
;
1453 return GET_CODE (op
) == DIV
;
1457 arith_or_logical_operator (op
, mode
)
1459 enum machine_mode mode
;
1461 return ((mode
== VOIDmode
|| GET_MODE (op
) == mode
)
1462 && (GET_RTX_CLASS (GET_CODE (op
)) == 'c'
1463 || GET_RTX_CLASS (GET_CODE (op
)) == '2'));
1466 /* Returns 1 if OP is memory operand with a displacement. */
1469 memory_displacement_operand (op
, mode
)
1471 enum machine_mode mode
;
1473 struct ix86_address parts
;
1475 if (! memory_operand (op
, mode
))
1478 if (! ix86_decompose_address (XEXP (op
, 0), &parts
))
1481 return parts
.disp
!= NULL_RTX
;
1484 /* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
1485 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
1487 ??? It seems likely that this will only work because cmpsi is an
1488 expander, and no actual insns use this. */
1491 cmpsi_operand (op
, mode
)
1493 enum machine_mode mode
;
1495 if (general_operand (op
, mode
))
1498 if (GET_CODE (op
) == AND
1499 && GET_MODE (op
) == SImode
1500 && GET_CODE (XEXP (op
, 0)) == ZERO_EXTRACT
1501 && GET_CODE (XEXP (XEXP (op
, 0), 1)) == CONST_INT
1502 && GET_CODE (XEXP (XEXP (op
, 0), 2)) == CONST_INT
1503 && INTVAL (XEXP (XEXP (op
, 0), 1)) == 8
1504 && INTVAL (XEXP (XEXP (op
, 0), 2)) == 8
1505 && GET_CODE (XEXP (op
, 1)) == CONST_INT
)
1511 /* Returns 1 if OP is memory operand that can not be represented by the
1515 long_memory_operand (op
, mode
)
1517 enum machine_mode mode
;
1519 if (! memory_operand (op
, mode
))
1522 return memory_address_length (op
) != 0;
1525 /* Return nonzero if the rtx is known aligned. */
1528 aligned_operand (op
, mode
)
1530 enum machine_mode mode
;
1532 struct ix86_address parts
;
1534 if (!general_operand (op
, mode
))
1537 /* Registers and immediate operands are always "aligned". */
1538 if (GET_CODE (op
) != MEM
)
1541 /* Don't even try to do any aligned optimizations with volatiles. */
1542 if (MEM_VOLATILE_P (op
))
1547 /* Pushes and pops are only valid on the stack pointer. */
1548 if (GET_CODE (op
) == PRE_DEC
1549 || GET_CODE (op
) == POST_INC
)
1552 /* Decode the address. */
1553 if (! ix86_decompose_address (op
, &parts
))
1556 /* Look for some component that isn't known to be aligned. */
1560 && REGNO_POINTER_ALIGN (REGNO (parts
.index
)) < 32)
1565 if (REGNO_POINTER_ALIGN (REGNO (parts
.base
)) < 32)
1570 if (GET_CODE (parts
.disp
) != CONST_INT
1571 || (INTVAL (parts
.disp
) & 3) != 0)
1575 /* Didn't find one -- this must be an aligned address. */
1579 /* Return true if the constant is something that can be loaded with
1580 a special instruction. Only handle 0.0 and 1.0; others are less
1584 standard_80387_constant_p (x
)
1587 if (GET_CODE (x
) != CONST_DOUBLE
)
1590 #if ! defined (REAL_IS_NOT_DOUBLE) || defined (REAL_ARITHMETIC)
1596 if (setjmp (handler
))
1599 set_float_handler (handler
);
1600 REAL_VALUE_FROM_CONST_DOUBLE (d
, x
);
1601 is0
= REAL_VALUES_EQUAL (d
, dconst0
) && !REAL_VALUE_MINUS_ZERO (d
);
1602 is1
= REAL_VALUES_EQUAL (d
, dconst1
);
1603 set_float_handler (NULL_PTR
);
1611 /* Note that on the 80387, other constants, such as pi,
1612 are much slower to load as standard constants
1613 than to load from doubles in memory! */
1614 /* ??? Not true on K6: all constants are equal cost. */
1621 /* Returns 1 if OP contains a symbol reference */
1624 symbolic_reference_mentioned_p (op
)
1627 register const char *fmt
;
1630 if (GET_CODE (op
) == SYMBOL_REF
|| GET_CODE (op
) == LABEL_REF
)
1633 fmt
= GET_RTX_FORMAT (GET_CODE (op
));
1634 for (i
= GET_RTX_LENGTH (GET_CODE (op
)) - 1; i
>= 0; i
--)
1640 for (j
= XVECLEN (op
, i
) - 1; j
>= 0; j
--)
1641 if (symbolic_reference_mentioned_p (XVECEXP (op
, i
, j
)))
1645 else if (fmt
[i
] == 'e' && symbolic_reference_mentioned_p (XEXP (op
, i
)))
1652 /* Return 1 if it is appropriate to emit `ret' instructions in the
1653 body of a function. Do this only if the epilogue is simple, needing a
1654 couple of insns. Prior to reloading, we can't tell how many registers
1655 must be saved, so return 0 then. Return 0 if there is no frame
1656 marker to de-allocate.
1658 If NON_SAVING_SETJMP is defined and true, then it is not possible
1659 for the epilogue to be simple, so return 0. This is a special case
1660 since NON_SAVING_SETJMP will not cause regs_ever_live to change
1661 until final, but jump_optimize may need to know sooner if a
1665 ix86_can_use_return_insn_p ()
1667 HOST_WIDE_INT tsize
;
1670 #ifdef NON_SAVING_SETJMP
1671 if (NON_SAVING_SETJMP
&& current_function_calls_setjmp
)
1674 #ifdef FUNCTION_BLOCK_PROFILER_EXIT
1675 if (profile_block_flag
== 2)
1679 if (! reload_completed
|| frame_pointer_needed
)
1682 /* Don't allow more than 32 pop, since that's all we can do
1683 with one instruction. */
1684 if (current_function_pops_args
1685 && current_function_args_size
>= 32768)
1688 tsize
= ix86_compute_frame_size (get_frame_size (), &nregs
, NULL
, NULL
);
1689 return tsize
== 0 && nregs
== 0;
1692 static char pic_label_name
[32];
1694 /* This function generates code for -fpic that loads %ebx with
1695 the return address of the caller and then returns. */
1698 ix86_asm_file_end (file
)
1703 if (! TARGET_DEEP_BRANCH_PREDICTION
|| pic_label_name
[0] == 0)
1706 /* ??? Binutils 2.10 and earlier has a linkonce elimination bug related
1707 to updating relocations to a section being discarded such that this
1708 doesn't work. Ought to detect this at configure time. */
1709 #if 0 && defined (ASM_OUTPUT_SECTION_NAME)
1710 /* The trick here is to create a linkonce section containing the
1711 pic label thunk, but to refer to it with an internal label.
1712 Because the label is internal, we don't have inter-dso name
1713 binding issues on hosts that don't support ".hidden".
1715 In order to use these macros, however, we must create a fake
1718 tree decl
= build_decl (FUNCTION_DECL
,
1719 get_identifier ("i686.get_pc_thunk"),
1721 DECL_ONE_ONLY (decl
) = 1;
1722 UNIQUE_SECTION (decl
, 0);
1723 named_section (decl
, NULL
, 0);
1729 /* This used to call ASM_DECLARE_FUNCTION_NAME() but since it's an
1730 internal (non-global) label that's being emitted, it didn't make
1731 sense to have .type information for local labels. This caused
1732 the SCO OpenServer 5.0.4 ELF assembler grief (why are you giving
1733 me debug info for a label that you're declaring non-global?) this
1734 was changed to call ASM_OUTPUT_LABEL() instead. */
1736 ASM_OUTPUT_LABEL (file
, pic_label_name
);
1738 xops
[0] = pic_offset_table_rtx
;
1739 xops
[1] = gen_rtx_MEM (SImode
, stack_pointer_rtx
);
1740 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops
);
1741 output_asm_insn ("ret", xops
);
1745 load_pic_register ()
1749 gotsym
= gen_rtx_SYMBOL_REF (Pmode
, "_GLOBAL_OFFSET_TABLE_");
1751 if (TARGET_DEEP_BRANCH_PREDICTION
)
1753 if (! pic_label_name
[0])
1754 ASM_GENERATE_INTERNAL_LABEL (pic_label_name
, "LPR", 0);
1755 pclab
= gen_rtx_MEM (QImode
, gen_rtx_SYMBOL_REF (Pmode
, pic_label_name
));
1759 pclab
= gen_rtx_LABEL_REF (VOIDmode
, gen_label_rtx ());
1762 emit_insn (gen_prologue_get_pc (pic_offset_table_rtx
, pclab
));
1764 if (! TARGET_DEEP_BRANCH_PREDICTION
)
1765 emit_insn (gen_popsi1 (pic_offset_table_rtx
));
1767 emit_insn (gen_prologue_set_got (pic_offset_table_rtx
, gotsym
, pclab
));
1770 /* Generate an SImode "push" pattern for input ARG. */
1776 return gen_rtx_SET (VOIDmode
,
1777 gen_rtx_MEM (SImode
,
1778 gen_rtx_PRE_DEC (SImode
,
1779 stack_pointer_rtx
)),
1783 /* Return number of registers to be saved on the stack. */
1789 int pic_reg_used
= flag_pic
&& (current_function_uses_pic_offset_table
1790 || current_function_uses_const_pool
);
1791 int limit
= (frame_pointer_needed
1792 ? HARD_FRAME_POINTER_REGNUM
: STACK_POINTER_REGNUM
);
1795 for (regno
= limit
- 1; regno
>= 0; regno
--)
1796 if ((regs_ever_live
[regno
] && ! call_used_regs
[regno
])
1797 || (regno
== PIC_OFFSET_TABLE_REGNUM
&& pic_reg_used
))
1804 /* Return the offset between two registers, one to be eliminated, and the other
1805 its replacement, at the start of a routine. */
1808 ix86_initial_elimination_offset (from
, to
)
1815 /* Stack grows downward:
1821 saved frame pointer if frame_pointer_needed
1822 <- HARD_FRAME_POINTER
1832 if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
1833 /* Skip saved PC and previous frame pointer.
1834 Executed only when frame_pointer_needed. */
1836 else if (from
== FRAME_POINTER_REGNUM
1837 && to
== HARD_FRAME_POINTER_REGNUM
)
1839 ix86_compute_frame_size (get_frame_size (), &nregs
, &padding1
, (int *) 0);
1840 padding1
+= nregs
* UNITS_PER_WORD
;
1845 /* ARG_POINTER or FRAME_POINTER to STACK_POINTER elimination. */
1846 int frame_size
= frame_pointer_needed
? 8 : 4;
1847 HOST_WIDE_INT tsize
= ix86_compute_frame_size (get_frame_size (),
1848 &nregs
, &padding1
, (int *) 0);
1850 if (to
!= STACK_POINTER_REGNUM
)
1852 else if (from
== ARG_POINTER_REGNUM
)
1853 return tsize
+ nregs
* UNITS_PER_WORD
+ frame_size
;
1854 else if (from
!= FRAME_POINTER_REGNUM
)
1857 return tsize
- padding1
;
1861 /* Compute the size of local storage taking into consideration the
1862 desired stack alignment which is to be maintained. Also determine
1863 the number of registers saved below the local storage.
1865 PADDING1 returns padding before stack frame and PADDING2 returns
1866 padding after stack frame;
1869 static HOST_WIDE_INT
1870 ix86_compute_frame_size (size
, nregs_on_stack
, rpadding1
, rpadding2
)
1872 int *nregs_on_stack
;
1879 HOST_WIDE_INT total_size
;
1880 int stack_alignment_needed
= cfun
->stack_alignment_needed
/ BITS_PER_UNIT
;
1882 int preferred_alignment
= cfun
->preferred_stack_boundary
/ BITS_PER_UNIT
;
1884 nregs
= ix86_nsaved_regs ();
1887 offset
= frame_pointer_needed
? 8 : 4;
1889 /* Do some sanity checking of stack_alignment_needed and preferred_alignment,
1890 since i386 port is the only using those features that may break easilly. */
1892 if (size
&& !stack_alignment_needed
)
1894 if (!size
&& stack_alignment_needed
!= STACK_BOUNDARY
/ BITS_PER_UNIT
)
1896 if (preferred_alignment
< STACK_BOUNDARY
/ BITS_PER_UNIT
)
1898 if (preferred_alignment
> PREFERRED_STACK_BOUNDARY
/ BITS_PER_UNIT
)
1900 if (stack_alignment_needed
> PREFERRED_STACK_BOUNDARY
/ BITS_PER_UNIT
)
1903 if (stack_alignment_needed
< 4)
1904 stack_alignment_needed
= 4;
1906 offset
+= nregs
* UNITS_PER_WORD
;
1908 if (ACCUMULATE_OUTGOING_ARGS
)
1909 total_size
+= current_function_outgoing_args_size
;
1911 total_size
+= offset
;
1913 /* Align start of frame for local function. */
1914 padding1
= ((offset
+ stack_alignment_needed
- 1)
1915 & -stack_alignment_needed
) - offset
;
1916 total_size
+= padding1
;
1918 /* Align stack boundary. */
1919 padding2
= ((total_size
+ preferred_alignment
- 1)
1920 & -preferred_alignment
) - total_size
;
1922 if (ACCUMULATE_OUTGOING_ARGS
)
1923 padding2
+= current_function_outgoing_args_size
;
1926 *nregs_on_stack
= nregs
;
1928 *rpadding1
= padding1
;
1930 *rpadding2
= padding2
;
1932 return size
+ padding1
+ padding2
;
1935 /* Emit code to save registers in the prologue. */
1938 ix86_emit_save_regs ()
1943 int pic_reg_used
= flag_pic
&& (current_function_uses_pic_offset_table
1944 || current_function_uses_const_pool
);
1945 limit
= (frame_pointer_needed
1946 ? HARD_FRAME_POINTER_REGNUM
: STACK_POINTER_REGNUM
);
1948 for (regno
= limit
- 1; regno
>= 0; regno
--)
1949 if ((regs_ever_live
[regno
] && !call_used_regs
[regno
])
1950 || (regno
== PIC_OFFSET_TABLE_REGNUM
&& pic_reg_used
))
1952 insn
= emit_insn (gen_push (gen_rtx_REG (SImode
, regno
)));
1953 RTX_FRAME_RELATED_P (insn
) = 1;
1957 /* Expand the prologue into a bunch of separate insns. */
1960 ix86_expand_prologue ()
1962 HOST_WIDE_INT tsize
= ix86_compute_frame_size (get_frame_size (), (int *) 0,
1963 (int *) 0, (int *) 0);
1965 int pic_reg_used
= flag_pic
&& (current_function_uses_pic_offset_table
1966 || current_function_uses_const_pool
);
1968 /* Note: AT&T enter does NOT have reversed args. Enter is probably
1969 slower on all targets. Also sdb doesn't like it. */
1971 if (frame_pointer_needed
)
1973 insn
= emit_insn (gen_push (hard_frame_pointer_rtx
));
1974 RTX_FRAME_RELATED_P (insn
) = 1;
1976 insn
= emit_move_insn (hard_frame_pointer_rtx
, stack_pointer_rtx
);
1977 RTX_FRAME_RELATED_P (insn
) = 1;
1980 ix86_emit_save_regs ();
1984 else if (! TARGET_STACK_PROBE
|| tsize
< CHECK_STACK_LIMIT
)
1986 if (frame_pointer_needed
)
1987 insn
= emit_insn (gen_pro_epilogue_adjust_stack
1988 (stack_pointer_rtx
, stack_pointer_rtx
,
1989 GEN_INT (-tsize
), hard_frame_pointer_rtx
));
1991 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
1993 RTX_FRAME_RELATED_P (insn
) = 1;
1997 /* ??? Is this only valid for Win32? */
2001 arg0
= gen_rtx_REG (SImode
, 0);
2002 emit_move_insn (arg0
, GEN_INT (tsize
));
2004 sym
= gen_rtx_MEM (FUNCTION_MODE
,
2005 gen_rtx_SYMBOL_REF (Pmode
, "_alloca"));
2006 insn
= emit_call_insn (gen_call (sym
, const0_rtx
));
2008 CALL_INSN_FUNCTION_USAGE (insn
)
2009 = gen_rtx_EXPR_LIST (VOIDmode
, gen_rtx_USE (VOIDmode
, arg0
),
2010 CALL_INSN_FUNCTION_USAGE (insn
));
2013 #ifdef SUBTARGET_PROLOGUE
2018 load_pic_register ();
2020 /* If we are profiling, make sure no instructions are scheduled before
2021 the call to mcount. However, if -fpic, the above call will have
2023 if ((profile_flag
|| profile_block_flag
) && ! pic_reg_used
)
2024 emit_insn (gen_blockage ());
2027 /* Emit code to add TSIZE to esp value. Use POP instruction when
2031 ix86_emit_epilogue_esp_adjustment (tsize
)
2034 /* If a frame pointer is present, we must be sure to tie the sp
2035 to the fp so that we don't mis-schedule. */
2036 if (frame_pointer_needed
)
2037 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx
,
2040 hard_frame_pointer_rtx
));
2042 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
2046 /* Emit code to restore saved registers using MOV insns. First register
2047 is restored from POINTER + OFFSET. */
2049 ix86_emit_restore_regs_using_mov (pointer
, offset
)
2054 int pic_reg_used
= flag_pic
&& (current_function_uses_pic_offset_table
2055 || current_function_uses_const_pool
);
2056 int limit
= (frame_pointer_needed
2057 ? HARD_FRAME_POINTER_REGNUM
: STACK_POINTER_REGNUM
);
2059 for (regno
= 0; regno
< limit
; regno
++)
2060 if ((regs_ever_live
[regno
] && !call_used_regs
[regno
])
2061 || (regno
== PIC_OFFSET_TABLE_REGNUM
&& pic_reg_used
))
2063 emit_move_insn (gen_rtx_REG (SImode
, regno
),
2064 adj_offsettable_operand (gen_rtx_MEM (SImode
,
2071 /* Restore function stack, frame, and registers. */
2074 ix86_expand_epilogue (emit_return
)
2080 int pic_reg_used
= flag_pic
&& (current_function_uses_pic_offset_table
2081 || current_function_uses_const_pool
);
2082 int sp_valid
= !frame_pointer_needed
|| current_function_sp_is_unchanging
;
2083 HOST_WIDE_INT offset
;
2084 HOST_WIDE_INT tsize
= ix86_compute_frame_size (get_frame_size (), &nregs
,
2085 (int *) 0, (int *) 0);
2087 /* Calculate start of saved registers relative to ebp. */
2088 offset
= -nregs
* UNITS_PER_WORD
;
2090 #ifdef FUNCTION_BLOCK_PROFILER_EXIT
2091 if (profile_block_flag
== 2)
2093 FUNCTION_BLOCK_PROFILER_EXIT
;
2097 /* If we're only restoring one register and sp is not valid then
2098 using a move instruction to restore the register since it's
2099 less work than reloading sp and popping the register.
2101 The default code result in stack adjustment using add/lea instruction,
2102 while this code results in LEAVE instruction (or discrete equivalent),
2103 so it is profitable in some other cases as well. Especially when there
2104 are no registers to restore. We also use this code when TARGET_USE_LEAVE
2105 and there is exactly one register to pop. This heruistic may need some
2106 tuning in future. */
2107 if ((!sp_valid
&& nregs
<= 1)
2108 || (frame_pointer_needed
&& !nregs
&& tsize
)
2109 || (frame_pointer_needed
&& TARGET_USE_LEAVE
&& !optimize_size
2112 /* Restore registers. We can use ebp or esp to address the memory
2113 locations. If both are available, default to ebp, since offsets
2114 are known to be small. Only exception is esp pointing directly to the
2115 end of block of saved registers, where we may simplify addressing
2118 if (!frame_pointer_needed
|| (sp_valid
&& !tsize
))
2119 ix86_emit_restore_regs_using_mov (stack_pointer_rtx
, tsize
);
2121 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx
, offset
);
2123 if (!frame_pointer_needed
)
2124 ix86_emit_epilogue_esp_adjustment (tsize
+ nregs
* UNITS_PER_WORD
);
2125 /* If not an i386, mov & pop is faster than "leave". */
2126 else if (TARGET_USE_LEAVE
|| optimize_size
)
2127 emit_insn (gen_leave ());
2130 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx
,
2131 hard_frame_pointer_rtx
,
2133 hard_frame_pointer_rtx
));
2134 emit_insn (gen_popsi1 (hard_frame_pointer_rtx
));
2139 /* First step is to deallocate the stack frame so that we can
2140 pop the registers. */
2143 if (!frame_pointer_needed
)
2145 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx
,
2146 hard_frame_pointer_rtx
,
2148 hard_frame_pointer_rtx
));
2151 ix86_emit_epilogue_esp_adjustment (tsize
);
2153 for (regno
= 0; regno
< STACK_POINTER_REGNUM
; regno
++)
2154 if ((regs_ever_live
[regno
] && !call_used_regs
[regno
])
2155 || (regno
== PIC_OFFSET_TABLE_REGNUM
&& pic_reg_used
))
2156 emit_insn (gen_popsi1 (gen_rtx_REG (SImode
, regno
)));
2159 /* Sibcall epilogues don't want a return instruction. */
2163 if (current_function_pops_args
&& current_function_args_size
)
2165 rtx popc
= GEN_INT (current_function_pops_args
);
2167 /* i386 can only pop 64K bytes. If asked to pop more, pop
2168 return address, do explicit add, and jump indirectly to the
2171 if (current_function_pops_args
>= 65536)
2173 rtx ecx
= gen_rtx_REG (SImode
, 2);
2175 emit_insn (gen_popsi1 (ecx
));
2176 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, popc
));
2177 emit_jump_insn (gen_return_indirect_internal (ecx
));
2180 emit_jump_insn (gen_return_pop_internal (popc
));
2183 emit_jump_insn (gen_return_internal ());
2186 /* Extract the parts of an RTL expression that is a valid memory address
2187 for an instruction. Return false if the structure of the address is
2191 ix86_decompose_address (addr
, out
)
2193 struct ix86_address
*out
;
2195 rtx base
= NULL_RTX
;
2196 rtx index
= NULL_RTX
;
2197 rtx disp
= NULL_RTX
;
2198 HOST_WIDE_INT scale
= 1;
2199 rtx scale_rtx
= NULL_RTX
;
2201 if (GET_CODE (addr
) == REG
|| GET_CODE (addr
) == SUBREG
)
2203 else if (GET_CODE (addr
) == PLUS
)
2205 rtx op0
= XEXP (addr
, 0);
2206 rtx op1
= XEXP (addr
, 1);
2207 enum rtx_code code0
= GET_CODE (op0
);
2208 enum rtx_code code1
= GET_CODE (op1
);
2210 if (code0
== REG
|| code0
== SUBREG
)
2212 if (code1
== REG
|| code1
== SUBREG
)
2213 index
= op0
, base
= op1
; /* index + base */
2215 base
= op0
, disp
= op1
; /* base + displacement */
2217 else if (code0
== MULT
)
2219 index
= XEXP (op0
, 0);
2220 scale_rtx
= XEXP (op0
, 1);
2221 if (code1
== REG
|| code1
== SUBREG
)
2222 base
= op1
; /* index*scale + base */
2224 disp
= op1
; /* index*scale + disp */
2226 else if (code0
== PLUS
&& GET_CODE (XEXP (op0
, 0)) == MULT
)
2228 index
= XEXP (XEXP (op0
, 0), 0); /* index*scale + base + disp */
2229 scale_rtx
= XEXP (XEXP (op0
, 0), 1);
2230 base
= XEXP (op0
, 1);
2233 else if (code0
== PLUS
)
2235 index
= XEXP (op0
, 0); /* index + base + disp */
2236 base
= XEXP (op0
, 1);
2242 else if (GET_CODE (addr
) == MULT
)
2244 index
= XEXP (addr
, 0); /* index*scale */
2245 scale_rtx
= XEXP (addr
, 1);
2247 else if (GET_CODE (addr
) == ASHIFT
)
2251 /* We're called for lea too, which implements ashift on occasion. */
2252 index
= XEXP (addr
, 0);
2253 tmp
= XEXP (addr
, 1);
2254 if (GET_CODE (tmp
) != CONST_INT
)
2256 scale
= INTVAL (tmp
);
2257 if ((unsigned HOST_WIDE_INT
) scale
> 3)
2262 disp
= addr
; /* displacement */
2264 /* Extract the integral value of scale. */
2267 if (GET_CODE (scale_rtx
) != CONST_INT
)
2269 scale
= INTVAL (scale_rtx
);
2272 /* Allow arg pointer and stack pointer as index if there is not scaling */
2273 if (base
&& index
&& scale
== 1
2274 && (index
== arg_pointer_rtx
|| index
== frame_pointer_rtx
2275 || index
== stack_pointer_rtx
))
2282 /* Special case: %ebp cannot be encoded as a base without a displacement. */
2283 if ((base
== hard_frame_pointer_rtx
2284 || base
== frame_pointer_rtx
2285 || base
== arg_pointer_rtx
) && !disp
)
2288 /* Special case: on K6, [%esi] makes the instruction vector decoded.
2289 Avoid this by transforming to [%esi+0]. */
2290 if (ix86_cpu
== PROCESSOR_K6
&& !optimize_size
2291 && base
&& !index
&& !disp
2293 && REGNO_REG_CLASS (REGNO (base
)) == SIREG
)
2296 /* Special case: encode reg+reg instead of reg*2. */
2297 if (!base
&& index
&& scale
&& scale
== 2)
2298 base
= index
, scale
= 1;
2300 /* Special case: scaling cannot be encoded without base or displacement. */
2301 if (!base
&& !disp
&& index
&& scale
!= 1)
2312 /* Return cost of the memory address x.
2313 For i386, it is better to use a complex address than let gcc copy
2314 the address into a reg and make a new pseudo. But not if the address
2315 requires to two regs - that would mean more pseudos with longer
2318 ix86_address_cost (x
)
2321 struct ix86_address parts
;
2324 if (!ix86_decompose_address (x
, &parts
))
2327 /* More complex memory references are better. */
2328 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
2331 /* Attempt to minimize number of registers in the address. */
2333 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
))
2335 && (!REG_P (parts
.index
)
2336 || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)))
2340 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
)
2342 && (!REG_P (parts
.index
) || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)
2343 && parts
.base
!= parts
.index
)
2346 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
2347 since it's predecode logic can't detect the length of instructions
2348 and it degenerates to vector decoded. Increase cost of such
2349 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
2350 to split such addresses or even refuse such addresses at all.
2352 Following addressing modes are affected:
2357 The first and last case may be avoidable by explicitly coding the zero in
2358 memory address, but I don't have AMD-K6 machine handy to check this
2362 && ((!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
!= 1)
2363 || (parts
.disp
&& !parts
.base
&& parts
.index
&& parts
.scale
!= 1)
2364 || (!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
== 1)))
2370 /* If X is a machine specific address (i.e. a symbol or label being
2371 referenced as a displacement from the GOT implemented using an
2372 UNSPEC), then return the base term. Otherwise return X. */
2375 ix86_find_base_term (x
)
2380 if (GET_CODE (x
) != PLUS
2381 || XEXP (x
, 0) != pic_offset_table_rtx
2382 || GET_CODE (XEXP (x
, 1)) != CONST
)
2385 term
= XEXP (XEXP (x
, 1), 0);
2387 if (GET_CODE (term
) == PLUS
&& GET_CODE (XEXP (term
, 1)) == CONST_INT
)
2388 term
= XEXP (term
, 0);
2390 if (GET_CODE (term
) != UNSPEC
2391 || XVECLEN (term
, 0) != 1
2392 || XINT (term
, 1) != 7)
2395 term
= XVECEXP (term
, 0, 0);
2397 if (GET_CODE (term
) != SYMBOL_REF
2398 && GET_CODE (term
) != LABEL_REF
)
2404 /* Determine if a given CONST RTX is a valid memory displacement
2408 legitimate_pic_address_disp_p (disp
)
2411 if (GET_CODE (disp
) != CONST
)
2413 disp
= XEXP (disp
, 0);
2415 if (GET_CODE (disp
) == PLUS
)
2417 if (GET_CODE (XEXP (disp
, 1)) != CONST_INT
)
2419 disp
= XEXP (disp
, 0);
2422 if (GET_CODE (disp
) != UNSPEC
2423 || XVECLEN (disp
, 0) != 1)
2426 /* Must be @GOT or @GOTOFF. */
2427 if (XINT (disp
, 1) != 6
2428 && XINT (disp
, 1) != 7)
2431 if (GET_CODE (XVECEXP (disp
, 0, 0)) != SYMBOL_REF
2432 && GET_CODE (XVECEXP (disp
, 0, 0)) != LABEL_REF
)
2438 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
2439 memory address for an instruction. The MODE argument is the machine mode
2440 for the MEM expression that wants to use this address.
2442 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
2443 convert common non-canonical forms to canonical form so that they will
2447 legitimate_address_p (mode
, addr
, strict
)
2448 enum machine_mode mode
;
2452 struct ix86_address parts
;
2453 rtx base
, index
, disp
;
2454 HOST_WIDE_INT scale
;
2455 const char *reason
= NULL
;
2456 rtx reason_rtx
= NULL_RTX
;
2458 if (TARGET_DEBUG_ADDR
)
2461 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
2462 GET_MODE_NAME (mode
), strict
);
2466 if (! ix86_decompose_address (addr
, &parts
))
2468 reason
= "decomposition failed";
2473 index
= parts
.index
;
2475 scale
= parts
.scale
;
2477 /* Validate base register.
2479 Don't allow SUBREG's here, it can lead to spill failures when the base
2480 is one word out of a two word structure, which is represented internally
2487 if (GET_CODE (base
) != REG
)
2489 reason
= "base is not a register";
2493 if (GET_MODE (base
) != Pmode
)
2495 reason
= "base is not in Pmode";
2499 if ((strict
&& ! REG_OK_FOR_BASE_STRICT_P (base
))
2500 || (! strict
&& ! REG_OK_FOR_BASE_NONSTRICT_P (base
)))
2502 reason
= "base is not valid";
2507 /* Validate index register.
2509 Don't allow SUBREG's here, it can lead to spill failures when the index
2510 is one word out of a two word structure, which is represented internally
2517 if (GET_CODE (index
) != REG
)
2519 reason
= "index is not a register";
2523 if (GET_MODE (index
) != Pmode
)
2525 reason
= "index is not in Pmode";
2529 if ((strict
&& ! REG_OK_FOR_INDEX_STRICT_P (index
))
2530 || (! strict
&& ! REG_OK_FOR_INDEX_NONSTRICT_P (index
)))
2532 reason
= "index is not valid";
2537 /* Validate scale factor. */
2540 reason_rtx
= GEN_INT (scale
);
2543 reason
= "scale without index";
2547 if (scale
!= 2 && scale
!= 4 && scale
!= 8)
2549 reason
= "scale is not a valid multiplier";
2554 /* Validate displacement. */
2559 if (!CONSTANT_ADDRESS_P (disp
))
2561 reason
= "displacement is not constant";
2565 if (GET_CODE (disp
) == CONST_DOUBLE
)
2567 reason
= "displacement is a const_double";
2571 if (flag_pic
&& SYMBOLIC_CONST (disp
))
2573 if (! legitimate_pic_address_disp_p (disp
))
2575 reason
= "displacement is an invalid pic construct";
2579 /* This code used to verify that a symbolic pic displacement
2580 includes the pic_offset_table_rtx register.
2582 While this is good idea, unfortunately these constructs may
2583 be created by "adds using lea" optimization for incorrect
2592 This code is nonsensical, but results in addressing
2593 GOT table with pic_offset_table_rtx base. We can't
2594 just refuse it easilly, since it gets matched by
2595 "addsi3" pattern, that later gets split to lea in the
2596 case output register differs from input. While this
2597 can be handled by separate addsi pattern for this case
2598 that never results in lea, this seems to be easier and
2599 correct fix for crash to disable this test. */
2601 else if (HALF_PIC_P ())
2603 if (! HALF_PIC_ADDRESS_P (disp
)
2604 || (base
!= NULL_RTX
|| index
!= NULL_RTX
))
2606 reason
= "displacement is an invalid half-pic reference";
2612 /* Everything looks valid. */
2613 if (TARGET_DEBUG_ADDR
)
2614 fprintf (stderr
, "Success.\n");
2618 if (TARGET_DEBUG_ADDR
)
2620 fprintf (stderr
, "Error: %s\n", reason
);
2621 debug_rtx (reason_rtx
);
2626 /* Return an unique alias set for the GOT. */
2628 static HOST_WIDE_INT
2629 ix86_GOT_alias_set ()
2631 static HOST_WIDE_INT set
= -1;
2633 set
= new_alias_set ();
2637 /* Return a legitimate reference for ORIG (an address) using the
2638 register REG. If REG is 0, a new pseudo is generated.
2640 There are two types of references that must be handled:
2642 1. Global data references must load the address from the GOT, via
2643 the PIC reg. An insn is emitted to do this load, and the reg is
2646 2. Static data references, constant pool addresses, and code labels
2647 compute the address as an offset from the GOT, whose base is in
2648 the PIC reg. Static data objects have SYMBOL_REF_FLAG set to
2649 differentiate them from global data objects. The returned
2650 address is the PIC reg + an unspec constant.
2652 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
2653 reg also appears in the address. */
2656 legitimize_pic_address (orig
, reg
)
2664 if (GET_CODE (addr
) == LABEL_REF
2665 || (GET_CODE (addr
) == SYMBOL_REF
2666 && (CONSTANT_POOL_ADDRESS_P (addr
)
2667 || SYMBOL_REF_FLAG (addr
))))
2669 /* This symbol may be referenced via a displacement from the PIC
2670 base address (@GOTOFF). */
2672 current_function_uses_pic_offset_table
= 1;
2673 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), 7);
2674 new = gen_rtx_CONST (Pmode
, new);
2675 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
2679 emit_move_insn (reg
, new);
2683 else if (GET_CODE (addr
) == SYMBOL_REF
)
2685 /* This symbol must be referenced via a load from the
2686 Global Offset Table (@GOT). */
2688 current_function_uses_pic_offset_table
= 1;
2689 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), 6);
2690 new = gen_rtx_CONST (Pmode
, new);
2691 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
2692 new = gen_rtx_MEM (Pmode
, new);
2693 RTX_UNCHANGING_P (new) = 1;
2694 MEM_ALIAS_SET (new) = ix86_GOT_alias_set ();
2697 reg
= gen_reg_rtx (Pmode
);
2698 emit_move_insn (reg
, new);
2703 if (GET_CODE (addr
) == CONST
)
2705 addr
= XEXP (addr
, 0);
2706 if (GET_CODE (addr
) == UNSPEC
)
2708 /* Check that the unspec is one of the ones we generate? */
2710 else if (GET_CODE (addr
) != PLUS
)
2713 if (GET_CODE (addr
) == PLUS
)
2715 rtx op0
= XEXP (addr
, 0), op1
= XEXP (addr
, 1);
2717 /* Check first to see if this is a constant offset from a @GOTOFF
2718 symbol reference. */
2719 if ((GET_CODE (op0
) == LABEL_REF
2720 || (GET_CODE (op0
) == SYMBOL_REF
2721 && (CONSTANT_POOL_ADDRESS_P (op0
)
2722 || SYMBOL_REF_FLAG (op0
))))
2723 && GET_CODE (op1
) == CONST_INT
)
2725 current_function_uses_pic_offset_table
= 1;
2726 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, op0
), 7);
2727 new = gen_rtx_PLUS (Pmode
, new, op1
);
2728 new = gen_rtx_CONST (Pmode
, new);
2729 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
2733 emit_move_insn (reg
, new);
2739 base
= legitimize_pic_address (XEXP (addr
, 0), reg
);
2740 new = legitimize_pic_address (XEXP (addr
, 1),
2741 base
== reg
? NULL_RTX
: reg
);
2743 if (GET_CODE (new) == CONST_INT
)
2744 new = plus_constant (base
, INTVAL (new));
2747 if (GET_CODE (new) == PLUS
&& CONSTANT_P (XEXP (new, 1)))
2749 base
= gen_rtx_PLUS (Pmode
, base
, XEXP (new, 0));
2750 new = XEXP (new, 1);
2752 new = gen_rtx_PLUS (Pmode
, base
, new);
2760 /* Try machine-dependent ways of modifying an illegitimate address
2761 to be legitimate. If we find one, return the new, valid address.
2762 This macro is used in only one place: `memory_address' in explow.c.
2764 OLDX is the address as it was before break_out_memory_refs was called.
2765 In some cases it is useful to look at this to decide what needs to be done.
2767 MODE and WIN are passed so that this macro can use
2768 GO_IF_LEGITIMATE_ADDRESS.
2770 It is always safe for this macro to do nothing. It exists to recognize
2771 opportunities to optimize the output.
2773 For the 80386, we handle X+REG by loading X into a register R and
2774 using R+REG. R will go in a general reg and indexing will be used.
2775 However, if REG is a broken-out memory address or multiplication,
2776 nothing needs to be done because REG can certainly go in a general reg.
2778 When -fpic is used, special handling is needed for symbolic references.
2779 See comments by legitimize_pic_address in i386.c for details. */
2782 legitimize_address (x
, oldx
, mode
)
2784 register rtx oldx ATTRIBUTE_UNUSED
;
2785 enum machine_mode mode
;
2790 if (TARGET_DEBUG_ADDR
)
2792 fprintf (stderr
, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
2793 GET_MODE_NAME (mode
));
2797 if (flag_pic
&& SYMBOLIC_CONST (x
))
2798 return legitimize_pic_address (x
, 0);
2800 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
2801 if (GET_CODE (x
) == ASHIFT
2802 && GET_CODE (XEXP (x
, 1)) == CONST_INT
2803 && (log
= (unsigned)exact_log2 (INTVAL (XEXP (x
, 1)))) < 4)
2806 x
= gen_rtx_MULT (Pmode
, force_reg (Pmode
, XEXP (x
, 0)),
2807 GEN_INT (1 << log
));
2810 if (GET_CODE (x
) == PLUS
)
2812 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
2814 if (GET_CODE (XEXP (x
, 0)) == ASHIFT
2815 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == CONST_INT
2816 && (log
= (unsigned)exact_log2 (INTVAL (XEXP (XEXP (x
, 0), 1)))) < 4)
2819 XEXP (x
, 0) = gen_rtx_MULT (Pmode
,
2820 force_reg (Pmode
, XEXP (XEXP (x
, 0), 0)),
2821 GEN_INT (1 << log
));
2824 if (GET_CODE (XEXP (x
, 1)) == ASHIFT
2825 && GET_CODE (XEXP (XEXP (x
, 1), 1)) == CONST_INT
2826 && (log
= (unsigned)exact_log2 (INTVAL (XEXP (XEXP (x
, 1), 1)))) < 4)
2829 XEXP (x
, 1) = gen_rtx_MULT (Pmode
,
2830 force_reg (Pmode
, XEXP (XEXP (x
, 1), 0)),
2831 GEN_INT (1 << log
));
2834 /* Put multiply first if it isn't already. */
2835 if (GET_CODE (XEXP (x
, 1)) == MULT
)
2837 rtx tmp
= XEXP (x
, 0);
2838 XEXP (x
, 0) = XEXP (x
, 1);
2843 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
2844 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
2845 created by virtual register instantiation, register elimination, and
2846 similar optimizations. */
2847 if (GET_CODE (XEXP (x
, 0)) == MULT
&& GET_CODE (XEXP (x
, 1)) == PLUS
)
2850 x
= gen_rtx_PLUS (Pmode
,
2851 gen_rtx_PLUS (Pmode
, XEXP (x
, 0),
2852 XEXP (XEXP (x
, 1), 0)),
2853 XEXP (XEXP (x
, 1), 1));
2857 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
2858 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
2859 else if (GET_CODE (x
) == PLUS
&& GET_CODE (XEXP (x
, 0)) == PLUS
2860 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
2861 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == PLUS
2862 && CONSTANT_P (XEXP (x
, 1)))
2865 rtx other
= NULL_RTX
;
2867 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
2869 constant
= XEXP (x
, 1);
2870 other
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
2872 else if (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 1), 1)) == CONST_INT
)
2874 constant
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
2875 other
= XEXP (x
, 1);
2883 x
= gen_rtx_PLUS (Pmode
,
2884 gen_rtx_PLUS (Pmode
, XEXP (XEXP (x
, 0), 0),
2885 XEXP (XEXP (XEXP (x
, 0), 1), 0)),
2886 plus_constant (other
, INTVAL (constant
)));
2890 if (changed
&& legitimate_address_p (mode
, x
, FALSE
))
2893 if (GET_CODE (XEXP (x
, 0)) == MULT
)
2896 XEXP (x
, 0) = force_operand (XEXP (x
, 0), 0);
2899 if (GET_CODE (XEXP (x
, 1)) == MULT
)
2902 XEXP (x
, 1) = force_operand (XEXP (x
, 1), 0);
2906 && GET_CODE (XEXP (x
, 1)) == REG
2907 && GET_CODE (XEXP (x
, 0)) == REG
)
2910 if (flag_pic
&& SYMBOLIC_CONST (XEXP (x
, 1)))
2913 x
= legitimize_pic_address (x
, 0);
2916 if (changed
&& legitimate_address_p (mode
, x
, FALSE
))
2919 if (GET_CODE (XEXP (x
, 0)) == REG
)
2921 register rtx temp
= gen_reg_rtx (Pmode
);
2922 register rtx val
= force_operand (XEXP (x
, 1), temp
);
2924 emit_move_insn (temp
, val
);
2930 else if (GET_CODE (XEXP (x
, 1)) == REG
)
2932 register rtx temp
= gen_reg_rtx (Pmode
);
2933 register rtx val
= force_operand (XEXP (x
, 0), temp
);
2935 emit_move_insn (temp
, val
);
2945 /* Print an integer constant expression in assembler syntax. Addition
2946 and subtraction are the only arithmetic that may appear in these
2947 expressions. FILE is the stdio stream to write to, X is the rtx, and
2948 CODE is the operand print code from the output string. */
2951 output_pic_addr_const (file
, x
, code
)
2958 switch (GET_CODE (x
))
2968 assemble_name (file
, XSTR (x
, 0));
2969 if (code
== 'P' && ! SYMBOL_REF_FLAG (x
))
2970 fputs ("@PLT", file
);
2977 ASM_GENERATE_INTERNAL_LABEL (buf
, "L", CODE_LABEL_NUMBER (x
));
2978 assemble_name (asm_out_file
, buf
);
2982 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
2986 /* This used to output parentheses around the expression,
2987 but that does not work on the 386 (either ATT or BSD assembler). */
2988 output_pic_addr_const (file
, XEXP (x
, 0), code
);
2992 if (GET_MODE (x
) == VOIDmode
)
2994 /* We can use %d if the number is <32 bits and positive. */
2995 if (CONST_DOUBLE_HIGH (x
) || CONST_DOUBLE_LOW (x
) < 0)
2996 fprintf (file
, "0x%lx%08lx",
2997 (unsigned long) CONST_DOUBLE_HIGH (x
),
2998 (unsigned long) CONST_DOUBLE_LOW (x
));
3000 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, CONST_DOUBLE_LOW (x
));
3003 /* We can't handle floating point constants;
3004 PRINT_OPERAND must handle them. */
3005 output_operand_lossage ("floating constant misused");
3009 /* Some assemblers need integer constants to appear first. */
3010 if (GET_CODE (XEXP (x
, 0)) == CONST_INT
)
3012 output_pic_addr_const (file
, XEXP (x
, 0), code
);
3014 output_pic_addr_const (file
, XEXP (x
, 1), code
);
3016 else if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
3018 output_pic_addr_const (file
, XEXP (x
, 1), code
);
3020 output_pic_addr_const (file
, XEXP (x
, 0), code
);
3027 putc (ASSEMBLER_DIALECT
? '(' : '[', file
);
3028 output_pic_addr_const (file
, XEXP (x
, 0), code
);
3030 output_pic_addr_const (file
, XEXP (x
, 1), code
);
3031 putc (ASSEMBLER_DIALECT
? ')' : ']', file
);
3035 if (XVECLEN (x
, 0) != 1)
3037 output_pic_addr_const (file
, XVECEXP (x
, 0, 0), code
);
3038 switch (XINT (x
, 1))
3041 fputs ("@GOT", file
);
3044 fputs ("@GOTOFF", file
);
3047 fputs ("@PLT", file
);
3050 output_operand_lossage ("invalid UNSPEC as operand");
3056 output_operand_lossage ("invalid expression as operand");
3060 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
3061 We need to handle our special PIC relocations. */
3064 i386_dwarf_output_addr_const (file
, x
)
3068 fprintf (file
, "%s", INT_ASM_OP
);
3070 output_pic_addr_const (file
, x
, '\0');
3072 output_addr_const (file
, x
);
3076 /* In the name of slightly smaller debug output, and to cater to
3077 general assembler losage, recognize PIC+GOTOFF and turn it back
3078 into a direct symbol reference. */
3081 i386_simplify_dwarf_addr (orig_x
)
3086 if (GET_CODE (x
) != PLUS
3087 || GET_CODE (XEXP (x
, 0)) != REG
3088 || GET_CODE (XEXP (x
, 1)) != CONST
)
3091 x
= XEXP (XEXP (x
, 1), 0);
3092 if (GET_CODE (x
) == UNSPEC
3093 && (XINT (x
, 1) == 6
3094 || XINT (x
, 1) == 7))
3095 return XVECEXP (x
, 0, 0);
3097 if (GET_CODE (x
) == PLUS
3098 && GET_CODE (XEXP (x
, 0)) == UNSPEC
3099 && GET_CODE (XEXP (x
, 1)) == CONST_INT
3100 && (XINT (XEXP (x
, 0), 1) == 6
3101 || XINT (XEXP (x
, 0), 1) == 7))
3102 return gen_rtx_PLUS (VOIDmode
, XVECEXP (XEXP (x
, 0), 0, 0), XEXP (x
, 1));
3108 put_condition_code (code
, mode
, reverse
, fp
, file
)
3110 enum machine_mode mode
;
3116 if (mode
== CCFPmode
|| mode
== CCFPUmode
)
3118 enum rtx_code second_code
, bypass_code
;
3119 ix86_fp_comparison_codes (code
, &bypass_code
, &code
, &second_code
);
3120 if (bypass_code
!= NIL
|| second_code
!= NIL
)
3122 code
= ix86_fp_compare_code_to_integer (code
);
3126 code
= reverse_condition (code
);
3137 if (mode
!= CCmode
&& mode
!= CCNOmode
&& mode
!= CCGCmode
)
3142 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
3143 Those same assemblers have the same but opposite losage on cmov. */
3146 suffix
= fp
? "nbe" : "a";
3149 if (mode
== CCNOmode
|| mode
== CCGOCmode
)
3151 else if (mode
== CCmode
|| mode
== CCGCmode
)
3162 if (mode
== CCNOmode
|| mode
== CCGOCmode
)
3164 else if (mode
== CCmode
|| mode
== CCGCmode
)
3173 suffix
= fp
? "nb" : "ae";
3176 if (mode
!= CCmode
&& mode
!= CCGCmode
&& mode
!= CCNOmode
)
3186 suffix
= fp
? "u" : "p";
3189 suffix
= fp
? "nu" : "np";
3194 fputs (suffix
, file
);
3198 print_reg (x
, code
, file
)
3203 if (REGNO (x
) == ARG_POINTER_REGNUM
3204 || REGNO (x
) == FRAME_POINTER_REGNUM
3205 || REGNO (x
) == FLAGS_REG
3206 || REGNO (x
) == FPSR_REG
)
3209 if (ASSEMBLER_DIALECT
== 0 || USER_LABEL_PREFIX
[0] == 0)
3214 else if (code
== 'b')
3216 else if (code
== 'k')
3218 else if (code
== 'y')
3220 else if (code
== 'h')
3222 else if (code
== 'm' || MMX_REG_P (x
))
3225 code
= GET_MODE_SIZE (GET_MODE (x
));
3230 fputs (hi_reg_name
[REGNO (x
)], file
);
3233 if (STACK_TOP_P (x
))
3235 fputs ("st(0)", file
);
3247 fputs (hi_reg_name
[REGNO (x
)], file
);
3250 fputs (qi_reg_name
[REGNO (x
)], file
);
3253 fputs (qi_high_reg_name
[REGNO (x
)], file
);
3261 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
3262 C -- print opcode suffix for set/cmov insn.
3263 c -- like C, but print reversed condition
3264 R -- print the prefix for register names.
3265 z -- print the opcode suffix for the size of the current operand.
3266 * -- print a star (in certain assembler syntax)
3267 A -- print an absolute memory reference.
3268 w -- print the operand as if it's a "word" (HImode) even if it isn't.
3269 s -- print a shift double count, followed by the assemblers argument
3271 b -- print the QImode name of the register for the indicated operand.
3272 %b0 would print %al if operands[0] is reg 0.
3273 w -- likewise, print the HImode name of the register.
3274 k -- likewise, print the SImode name of the register.
3275 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
3276 y -- print "st(0)" instead of "st" as a register.
3277 m -- print "st(n)" as an mmx register. */
3280 print_operand (file
, x
, code
)
3290 if (ASSEMBLER_DIALECT
== 0)
3295 if (ASSEMBLER_DIALECT
== 0)
3297 else if (ASSEMBLER_DIALECT
== 1)
3299 /* Intel syntax. For absolute addresses, registers should not
3300 be surrounded by braces. */
3301 if (GET_CODE (x
) != REG
)
3304 PRINT_OPERAND (file
, x
, 0);
3310 PRINT_OPERAND (file
, x
, 0);
3315 if (ASSEMBLER_DIALECT
== 0)
3320 if (ASSEMBLER_DIALECT
== 0)
3325 if (ASSEMBLER_DIALECT
== 0)
3330 if (ASSEMBLER_DIALECT
== 0)
3335 if (ASSEMBLER_DIALECT
== 0)
3340 if (ASSEMBLER_DIALECT
== 0)
3345 /* 387 opcodes don't get size suffixes if the operands are
3348 if (STACK_REG_P (x
))
3351 /* this is the size of op from size of operand */
3352 switch (GET_MODE_SIZE (GET_MODE (x
)))
3355 #ifdef HAVE_GAS_FILDS_FISTS
3361 if (GET_MODE (x
) == SFmode
)
3376 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
3378 #ifdef GAS_MNEMONICS
3404 if (GET_CODE (x
) == CONST_INT
|| ! SHIFT_DOUBLE_OMITS_COUNT
)
3406 PRINT_OPERAND (file
, x
, 0);
3412 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 0, 0, file
);
3415 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 0, 1, file
);
3418 /* Like above, but reverse condition */
3420 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 1, 0, file
);
3423 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 1, 1, file
);
3429 sprintf (str
, "invalid operand code `%c'", code
);
3430 output_operand_lossage (str
);
3435 if (GET_CODE (x
) == REG
)
3437 PRINT_REG (x
, code
, file
);
3440 else if (GET_CODE (x
) == MEM
)
3442 /* No `byte ptr' prefix for call instructions. */
3443 if (ASSEMBLER_DIALECT
!= 0 && code
!= 'X' && code
!= 'P')
3446 switch (GET_MODE_SIZE (GET_MODE (x
)))
3448 case 1: size
= "BYTE"; break;
3449 case 2: size
= "WORD"; break;
3450 case 4: size
= "DWORD"; break;
3451 case 8: size
= "QWORD"; break;
3452 case 12: size
= "XWORD"; break;
3453 case 16: size
= "XMMWORD"; break;
3458 /* Check for explicit size override (codes 'b', 'w' and 'k') */
3461 else if (code
== 'w')
3463 else if (code
== 'k')
3467 fputs (" PTR ", file
);
3471 if (flag_pic
&& CONSTANT_ADDRESS_P (x
))
3472 output_pic_addr_const (file
, x
, code
);
3477 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == SFmode
)
3482 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
3483 REAL_VALUE_TO_TARGET_SINGLE (r
, l
);
3485 if (ASSEMBLER_DIALECT
== 0)
3487 fprintf (file
, "0x%lx", l
);
3490 /* These float cases don't actually occur as immediate operands. */
3491 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == DFmode
)
3496 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
3497 REAL_VALUE_TO_DECIMAL (r
, "%.22e", dstr
);
3498 fprintf (file
, "%s", dstr
);
3501 else if (GET_CODE (x
) == CONST_DOUBLE
3502 && (GET_MODE (x
) == XFmode
|| GET_MODE (x
) == TFmode
))
3507 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
3508 REAL_VALUE_TO_DECIMAL (r
, "%.22e", dstr
);
3509 fprintf (file
, "%s", dstr
);
3515 if (GET_CODE (x
) == CONST_INT
|| GET_CODE (x
) == CONST_DOUBLE
)
3517 if (ASSEMBLER_DIALECT
== 0)
3520 else if (GET_CODE (x
) == CONST
|| GET_CODE (x
) == SYMBOL_REF
3521 || GET_CODE (x
) == LABEL_REF
)
3523 if (ASSEMBLER_DIALECT
== 0)
3526 fputs ("OFFSET FLAT:", file
);
3529 if (GET_CODE (x
) == CONST_INT
)
3530 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
3532 output_pic_addr_const (file
, x
, code
);
3534 output_addr_const (file
, x
);
3538 /* Print a memory operand whose address is ADDR. */
3541 print_operand_address (file
, addr
)
3545 struct ix86_address parts
;
3546 rtx base
, index
, disp
;
3549 if (! ix86_decompose_address (addr
, &parts
))
3553 index
= parts
.index
;
3555 scale
= parts
.scale
;
3557 if (!base
&& !index
)
3559 /* Displacement only requires special attention. */
3561 if (GET_CODE (disp
) == CONST_INT
)
3563 if (ASSEMBLER_DIALECT
!= 0)
3565 if (USER_LABEL_PREFIX
[0] == 0)
3567 fputs ("ds:", file
);
3569 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (addr
));
3572 output_pic_addr_const (file
, addr
, 0);
3574 output_addr_const (file
, addr
);
3578 if (ASSEMBLER_DIALECT
== 0)
3583 output_pic_addr_const (file
, disp
, 0);
3584 else if (GET_CODE (disp
) == LABEL_REF
)
3585 output_asm_label (disp
);
3587 output_addr_const (file
, disp
);
3592 PRINT_REG (base
, 0, file
);
3596 PRINT_REG (index
, 0, file
);
3598 fprintf (file
, ",%d", scale
);
3604 rtx offset
= NULL_RTX
;
3608 /* Pull out the offset of a symbol; print any symbol itself. */
3609 if (GET_CODE (disp
) == CONST
3610 && GET_CODE (XEXP (disp
, 0)) == PLUS
3611 && GET_CODE (XEXP (XEXP (disp
, 0), 1)) == CONST_INT
)
3613 offset
= XEXP (XEXP (disp
, 0), 1);
3614 disp
= gen_rtx_CONST (VOIDmode
,
3615 XEXP (XEXP (disp
, 0), 0));
3619 output_pic_addr_const (file
, disp
, 0);
3620 else if (GET_CODE (disp
) == LABEL_REF
)
3621 output_asm_label (disp
);
3622 else if (GET_CODE (disp
) == CONST_INT
)
3625 output_addr_const (file
, disp
);
3631 PRINT_REG (base
, 0, file
);
3634 if (INTVAL (offset
) >= 0)
3636 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
3640 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
3647 PRINT_REG (index
, 0, file
);
3649 fprintf (file
, "*%d", scale
);
3656 /* Split one or more DImode RTL references into pairs of SImode
3657 references. The RTL can be REG, offsettable MEM, integer constant, or
3658 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
3659 split and "num" is its length. lo_half and hi_half are output arrays
3660 that parallel "operands". */
3663 split_di (operands
, num
, lo_half
, hi_half
)
3666 rtx lo_half
[], hi_half
[];
3670 rtx op
= operands
[num
];
3671 if (CONSTANT_P (op
))
3672 split_double (op
, &lo_half
[num
], &hi_half
[num
]);
3673 else if (! reload_completed
)
3675 lo_half
[num
] = gen_lowpart (SImode
, op
);
3676 hi_half
[num
] = gen_highpart (SImode
, op
);
3678 else if (GET_CODE (op
) == REG
)
3680 lo_half
[num
] = gen_rtx_REG (SImode
, REGNO (op
));
3681 hi_half
[num
] = gen_rtx_REG (SImode
, REGNO (op
) + 1);
3683 else if (offsettable_memref_p (op
))
3685 rtx lo_addr
= XEXP (op
, 0);
3686 rtx hi_addr
= XEXP (adj_offsettable_operand (op
, 4), 0);
3687 lo_half
[num
] = change_address (op
, SImode
, lo_addr
);
3688 hi_half
[num
] = change_address (op
, SImode
, hi_addr
);
3695 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
3696 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
3697 is the expression of the binary operation. The output may either be
3698 emitted here, or returned to the caller, like all output_* functions.
3700 There is no guarantee that the operands are the same mode, as they
3701 might be within FLOAT or FLOAT_EXTEND expressions. */
3703 #ifndef SYSV386_COMPAT
3704 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
3705 wants to fix the assemblers because that causes incompatibility
3706 with gcc. No-one wants to fix gcc because that causes
3707 incompatibility with assemblers... You can use the option of
3708 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
3709 #define SYSV386_COMPAT 1
3713 output_387_binary_op (insn
, operands
)
3717 static char buf
[30];
3720 #ifdef ENABLE_CHECKING
3721 /* Even if we do not want to check the inputs, this documents input
3722 constraints. Which helps in understanding the following code. */
3723 if (STACK_REG_P (operands
[0])
3724 && ((REG_P (operands
[1])
3725 && REGNO (operands
[0]) == REGNO (operands
[1])
3726 && (STACK_REG_P (operands
[2]) || GET_CODE (operands
[2]) == MEM
))
3727 || (REG_P (operands
[2])
3728 && REGNO (operands
[0]) == REGNO (operands
[2])
3729 && (STACK_REG_P (operands
[1]) || GET_CODE (operands
[1]) == MEM
)))
3730 && (STACK_TOP_P (operands
[1]) || STACK_TOP_P (operands
[2])))
3736 switch (GET_CODE (operands
[3]))
3739 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
3740 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
3747 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
3748 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
3755 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
3756 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
3763 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
3764 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
3776 switch (GET_CODE (operands
[3]))
3780 if (REG_P (operands
[2]) && REGNO (operands
[0]) == REGNO (operands
[2]))
3782 rtx temp
= operands
[2];
3783 operands
[2] = operands
[1];
3787 /* know operands[0] == operands[1]. */
3789 if (GET_CODE (operands
[2]) == MEM
)
3795 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
3797 if (STACK_TOP_P (operands
[0]))
3798 /* How is it that we are storing to a dead operand[2]?
3799 Well, presumably operands[1] is dead too. We can't
3800 store the result to st(0) as st(0) gets popped on this
3801 instruction. Instead store to operands[2] (which I
3802 think has to be st(1)). st(1) will be popped later.
3803 gcc <= 2.8.1 didn't have this check and generated
3804 assembly code that the Unixware assembler rejected. */
3805 p
= "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
3807 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
3811 if (STACK_TOP_P (operands
[0]))
3812 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
3814 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
3819 if (GET_CODE (operands
[1]) == MEM
)
3825 if (GET_CODE (operands
[2]) == MEM
)
3831 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
3834 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
3835 derived assemblers, confusingly reverse the direction of
3836 the operation for fsub{r} and fdiv{r} when the
3837 destination register is not st(0). The Intel assembler
3838 doesn't have this brain damage. Read !SYSV386_COMPAT to
3839 figure out what the hardware really does. */
3840 if (STACK_TOP_P (operands
[0]))
3841 p
= "{p\t%0, %2|rp\t%2, %0}";
3843 p
= "{rp\t%2, %0|p\t%0, %2}";
3845 if (STACK_TOP_P (operands
[0]))
3846 /* As above for fmul/fadd, we can't store to st(0). */
3847 p
= "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
3849 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
3854 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
3857 if (STACK_TOP_P (operands
[0]))
3858 p
= "{rp\t%0, %1|p\t%1, %0}";
3860 p
= "{p\t%1, %0|rp\t%0, %1}";
3862 if (STACK_TOP_P (operands
[0]))
3863 p
= "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
3865 p
= "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
3870 if (STACK_TOP_P (operands
[0]))
3872 if (STACK_TOP_P (operands
[1]))
3873 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
3875 p
= "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
3878 else if (STACK_TOP_P (operands
[1]))
3881 p
= "{\t%1, %0|r\t%0, %1}";
3883 p
= "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
3889 p
= "{r\t%2, %0|\t%0, %2}";
3891 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
3904 /* Output code for INSN to convert a float to a signed int. OPERANDS
3905 are the insn operands. The output may be [HSD]Imode and the input
3906 operand may be [SDX]Fmode. */
3909 output_fix_trunc (insn
, operands
)
3913 int stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
3914 int dimode_p
= GET_MODE (operands
[0]) == DImode
;
3917 /* Jump through a hoop or two for DImode, since the hardware has no
3918 non-popping instruction. We used to do this a different way, but
3919 that was somewhat fragile and broke with post-reload splitters. */
3920 if (dimode_p
&& !stack_top_dies
)
3921 output_asm_insn ("fld\t%y1", operands
);
3923 if (! STACK_TOP_P (operands
[1]))
3926 xops
[0] = GEN_INT (12);
3927 xops
[1] = adj_offsettable_operand (operands
[2], 1);
3928 xops
[1] = change_address (xops
[1], QImode
, NULL_RTX
);
3930 xops
[2] = operands
[0];
3931 if (GET_CODE (operands
[0]) != MEM
)
3932 xops
[2] = operands
[3];
3934 output_asm_insn ("fnstcw\t%2", operands
);
3935 output_asm_insn ("mov{l}\t{%2, %4|%4, %2}", operands
);
3936 output_asm_insn ("mov{b}\t{%0, %1|%1, %0}", xops
);
3937 output_asm_insn ("fldcw\t%2", operands
);
3938 output_asm_insn ("mov{l}\t{%4, %2|%2, %4}", operands
);
3940 if (stack_top_dies
|| dimode_p
)
3941 output_asm_insn ("fistp%z2\t%2", xops
);
3943 output_asm_insn ("fist%z2\t%2", xops
);
3945 output_asm_insn ("fldcw\t%2", operands
);
3947 if (GET_CODE (operands
[0]) != MEM
)
3951 split_di (operands
+0, 1, xops
+0, xops
+1);
3952 split_di (operands
+3, 1, xops
+2, xops
+3);
3953 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops
);
3954 output_asm_insn ("mov{l}\t{%3, %1|%1, %3}", xops
);
3956 else if (GET_MODE (operands
[0]) == SImode
)
3957 output_asm_insn ("mov{l}\t{%3, %0|%0, %3}", operands
);
3959 output_asm_insn ("mov{w}\t{%3, %0|%0, %3}", operands
);
3965 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
3966 should be used and 2 when fnstsw should be used. UNORDERED_P is true
3967 when fucom should be used. */
3970 output_fp_compare (insn
, operands
, eflags_p
, unordered_p
)
3973 int eflags_p
, unordered_p
;
3976 rtx cmp_op0
= operands
[0];
3977 rtx cmp_op1
= operands
[1];
3982 cmp_op1
= operands
[2];
3985 if (! STACK_TOP_P (cmp_op0
))
3988 stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
3990 if (STACK_REG_P (cmp_op1
)
3992 && find_regno_note (insn
, REG_DEAD
, REGNO (cmp_op1
))
3993 && REGNO (cmp_op1
) != FIRST_STACK_REG
)
3995 /* If both the top of the 387 stack dies, and the other operand
3996 is also a stack register that dies, then this must be a
3997 `fcompp' float compare */
4001 /* There is no double popping fcomi variant. Fortunately,
4002 eflags is immune from the fstp's cc clobbering. */
4004 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands
);
4006 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands
);
4014 return "fucompp\n\tfnstsw\t%0";
4016 return "fcompp\n\tfnstsw\t%0";
4029 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
4031 static const char * const alt
[24] =
4043 "fcomi\t{%y1, %0|%0, %y1}",
4044 "fcomip\t{%y1, %0|%0, %y1}",
4045 "fucomi\t{%y1, %0|%0, %y1}",
4046 "fucomip\t{%y1, %0|%0, %y1}",
4053 "fcom%z2\t%y2\n\tfnstsw\t%0",
4054 "fcomp%z2\t%y2\n\tfnstsw\t%0",
4055 "fucom%z2\t%y2\n\tfnstsw\t%0",
4056 "fucomp%z2\t%y2\n\tfnstsw\t%0",
4058 "ficom%z2\t%y2\n\tfnstsw\t%0",
4059 "ficomp%z2\t%y2\n\tfnstsw\t%0",
4067 mask
= eflags_p
<< 3;
4068 mask
|= (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
) << 2;
4069 mask
|= unordered_p
<< 1;
4070 mask
|= stack_top_dies
;
4082 /* Output assembler code to FILE to initialize basic-block profiling.
4084 If profile_block_flag == 2
4086 Output code to call the subroutine `__bb_init_trace_func'
4087 and pass two parameters to it. The first parameter is
4088 the address of a block allocated in the object module.
4089 The second parameter is the number of the first basic block
4092 The name of the block is a local symbol made with this statement:
4094 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 0);
4096 Of course, since you are writing the definition of
4097 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
4098 can take a short cut in the definition of this macro and use the
4099 name that you know will result.
4101 The number of the first basic block of the function is
4102 passed to the macro in BLOCK_OR_LABEL.
4104 If described in a virtual assembler language the code to be
4108 parameter2 <- BLOCK_OR_LABEL
4109 call __bb_init_trace_func
4111 else if profile_block_flag != 0
4113 Output code to call the subroutine `__bb_init_func'
4114 and pass one single parameter to it, which is the same
4115 as the first parameter to `__bb_init_trace_func'.
4117 The first word of this parameter is a flag which will be nonzero if
4118 the object module has already been initialized. So test this word
4119 first, and do not call `__bb_init_func' if the flag is nonzero.
4120 Note: When profile_block_flag == 2 the test need not be done
4121 but `__bb_init_trace_func' *must* be called.
4123 BLOCK_OR_LABEL may be used to generate a label number as a
4124 branch destination in case `__bb_init_func' will not be called.
4126 If described in a virtual assembler language the code to be
4137 ix86_output_function_block_profiler (file
, block_or_label
)
4141 static int num_func
= 0;
4143 char block_table
[80], false_label
[80];
4145 ASM_GENERATE_INTERNAL_LABEL (block_table
, "LPBX", 0);
4147 xops
[1] = gen_rtx_SYMBOL_REF (VOIDmode
, block_table
);
4148 xops
[5] = stack_pointer_rtx
;
4149 xops
[7] = gen_rtx_REG (Pmode
, 0); /* eax */
4151 CONSTANT_POOL_ADDRESS_P (xops
[1]) = TRUE
;
4153 switch (profile_block_flag
)
4156 xops
[2] = GEN_INT (block_or_label
);
4157 xops
[3] = gen_rtx_MEM (Pmode
,
4158 gen_rtx_SYMBOL_REF (VOIDmode
, "__bb_init_trace_func"));
4159 xops
[6] = GEN_INT (8);
4161 output_asm_insn ("push{l}\t%2", xops
);
4163 output_asm_insn ("push{l}\t%1", xops
);
4166 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a1}", xops
);
4167 output_asm_insn ("push{l}\t%7", xops
);
4169 output_asm_insn ("call\t%P3", xops
);
4170 output_asm_insn ("add{l}\t{%6, %5|%5, %6}", xops
);
4174 ASM_GENERATE_INTERNAL_LABEL (false_label
, "LPBZ", num_func
);
4176 xops
[0] = const0_rtx
;
4177 xops
[2] = gen_rtx_MEM (Pmode
,
4178 gen_rtx_SYMBOL_REF (VOIDmode
, false_label
));
4179 xops
[3] = gen_rtx_MEM (Pmode
,
4180 gen_rtx_SYMBOL_REF (VOIDmode
, "__bb_init_func"));
4181 xops
[4] = gen_rtx_MEM (Pmode
, xops
[1]);
4182 xops
[6] = GEN_INT (4);
4184 CONSTANT_POOL_ADDRESS_P (xops
[2]) = TRUE
;
4186 output_asm_insn ("cmp{l}\t{%0, %4|%4, %0}", xops
);
4187 output_asm_insn ("jne\t%2", xops
);
4190 output_asm_insn ("push{l}\t%1", xops
);
4193 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a2}", xops
);
4194 output_asm_insn ("push{l}\t%7", xops
);
4196 output_asm_insn ("call\t%P3", xops
);
4197 output_asm_insn ("add{l}\t{%6, %5|%5, %6}", xops
);
4198 ASM_OUTPUT_INTERNAL_LABEL (file
, "LPBZ", num_func
);
4204 /* Output assembler code to FILE to increment a counter associated
4205 with basic block number BLOCKNO.
4207 If profile_block_flag == 2
4209 Output code to initialize the global structure `__bb' and
4210 call the function `__bb_trace_func' which will increment the
4213 `__bb' consists of two words. In the first word the number
4214 of the basic block has to be stored. In the second word
4215 the address of a block allocated in the object module
4218 The basic block number is given by BLOCKNO.
4220 The address of the block is given by the label created with
4222 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 0);
4224 by FUNCTION_BLOCK_PROFILER.
4226 Of course, since you are writing the definition of
4227 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
4228 can take a short cut in the definition of this macro and use the
4229 name that you know will result.
4231 If described in a virtual assembler language the code to be
4234 move BLOCKNO -> (__bb)
4235 move LPBX0 -> (__bb+4)
4236 call __bb_trace_func
4238 Note that function `__bb_trace_func' must not change the
4239 machine state, especially the flag register. To grant
4240 this, you must output code to save and restore registers
4241 either in this macro or in the macros MACHINE_STATE_SAVE
4242 and MACHINE_STATE_RESTORE. The last two macros will be
4243 used in the function `__bb_trace_func', so you must make
4244 sure that the function prologue does not change any
4245 register prior to saving it with MACHINE_STATE_SAVE.
4247 else if profile_block_flag != 0
4249 Output code to increment the counter directly.
4250 Basic blocks are numbered separately from zero within each
4251 compiled object module. The count associated with block number
4252 BLOCKNO is at index BLOCKNO in an array of words; the name of
4253 this array is a local symbol made with this statement:
4255 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 2);
4257 Of course, since you are writing the definition of
4258 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
4259 can take a short cut in the definition of this macro and use the
4260 name that you know will result.
4262 If described in a virtual assembler language the code to be
4265 inc (LPBX2+4*BLOCKNO)
4269 ix86_output_block_profiler (file
, blockno
)
4270 FILE *file ATTRIBUTE_UNUSED
;
4273 rtx xops
[8], cnt_rtx
;
4275 char *block_table
= counts
;
4277 switch (profile_block_flag
)
4280 ASM_GENERATE_INTERNAL_LABEL (block_table
, "LPBX", 0);
4282 xops
[1] = gen_rtx_SYMBOL_REF (VOIDmode
, block_table
);
4283 xops
[2] = GEN_INT (blockno
);
4284 xops
[3] = gen_rtx_MEM (Pmode
,
4285 gen_rtx_SYMBOL_REF (VOIDmode
, "__bb_trace_func"));
4286 xops
[4] = gen_rtx_SYMBOL_REF (VOIDmode
, "__bb");
4287 xops
[5] = plus_constant (xops
[4], 4);
4288 xops
[0] = gen_rtx_MEM (SImode
, xops
[4]);
4289 xops
[6] = gen_rtx_MEM (SImode
, xops
[5]);
4291 CONSTANT_POOL_ADDRESS_P (xops
[1]) = TRUE
;
4293 output_asm_insn ("pushf", xops
);
4294 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops
);
4297 xops
[7] = gen_rtx_REG (Pmode
, 0); /* eax */
4298 output_asm_insn ("push{l}\t%7", xops
);
4299 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a1}", xops
);
4300 output_asm_insn ("mov{l}\t{%7, %6|%6, %7}", xops
);
4301 output_asm_insn ("pop{l}\t%7", xops
);
4304 output_asm_insn ("mov{l}\t{%1, %6|%6, %1}", xops
);
4305 output_asm_insn ("call\t%P3", xops
);
4306 output_asm_insn ("popf", xops
);
4311 ASM_GENERATE_INTERNAL_LABEL (counts
, "LPBX", 2);
4312 cnt_rtx
= gen_rtx_SYMBOL_REF (VOIDmode
, counts
);
4313 SYMBOL_REF_FLAG (cnt_rtx
) = TRUE
;
4316 cnt_rtx
= plus_constant (cnt_rtx
, blockno
*4);
4319 cnt_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, cnt_rtx
);
4321 xops
[0] = gen_rtx_MEM (SImode
, cnt_rtx
);
4322 output_asm_insn ("inc{l}\t%0", xops
);
4329 ix86_expand_move (mode
, operands
)
4330 enum machine_mode mode
;
4333 int strict
= (reload_in_progress
|| reload_completed
);
4336 if (flag_pic
&& mode
== Pmode
&& symbolic_operand (operands
[1], Pmode
))
4338 /* Emit insns to move operands[1] into operands[0]. */
4340 if (GET_CODE (operands
[0]) == MEM
)
4341 operands
[1] = force_reg (Pmode
, operands
[1]);
4344 rtx temp
= operands
[0];
4345 if (GET_CODE (temp
) != REG
)
4346 temp
= gen_reg_rtx (Pmode
);
4347 temp
= legitimize_pic_address (operands
[1], temp
);
4348 if (temp
== operands
[0])
4355 if (GET_CODE (operands
[0]) == MEM
4356 && (GET_MODE (operands
[0]) == QImode
4357 || !push_operand (operands
[0], mode
))
4358 && GET_CODE (operands
[1]) == MEM
)
4359 operands
[1] = force_reg (mode
, operands
[1]);
4361 if (push_operand (operands
[0], mode
)
4362 && ! general_no_elim_operand (operands
[1], mode
))
4363 operands
[1] = copy_to_mode_reg (mode
, operands
[1]);
4365 if (FLOAT_MODE_P (mode
))
4367 /* If we are loading a floating point constant to a register,
4368 force the value to memory now, since we'll get better code
4369 out the back end. */
4373 else if (GET_CODE (operands
[1]) == CONST_DOUBLE
4374 && register_operand (operands
[0], mode
))
4375 operands
[1] = validize_mem (force_const_mem (mode
, operands
[1]));
4379 insn
= gen_rtx_SET (VOIDmode
, operands
[0], operands
[1]);
4384 /* Attempt to expand a binary operator. Make the expansion closer to the
4385 actual machine, then just general_operand, which will allow 3 separate
4386 memory references (one output, two input) in a single insn. */
4389 ix86_expand_binary_operator (code
, mode
, operands
)
4391 enum machine_mode mode
;
4394 int matching_memory
;
4395 rtx src1
, src2
, dst
, op
, clob
;
4401 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
4402 if (GET_RTX_CLASS (code
) == 'c'
4403 && (rtx_equal_p (dst
, src2
)
4404 || immediate_operand (src1
, mode
)))
4411 /* If the destination is memory, and we do not have matching source
4412 operands, do things in registers. */
4413 matching_memory
= 0;
4414 if (GET_CODE (dst
) == MEM
)
4416 if (rtx_equal_p (dst
, src1
))
4417 matching_memory
= 1;
4418 else if (GET_RTX_CLASS (code
) == 'c'
4419 && rtx_equal_p (dst
, src2
))
4420 matching_memory
= 2;
4422 dst
= gen_reg_rtx (mode
);
4425 /* Both source operands cannot be in memory. */
4426 if (GET_CODE (src1
) == MEM
&& GET_CODE (src2
) == MEM
)
4428 if (matching_memory
!= 2)
4429 src2
= force_reg (mode
, src2
);
4431 src1
= force_reg (mode
, src1
);
4434 /* If the operation is not commutable, source 1 cannot be a constant
4435 or non-matching memory. */
4436 if ((CONSTANT_P (src1
)
4437 || (!matching_memory
&& GET_CODE (src1
) == MEM
))
4438 && GET_RTX_CLASS (code
) != 'c')
4439 src1
= force_reg (mode
, src1
);
4441 /* If optimizing, copy to regs to improve CSE */
4442 if (optimize
&& ! no_new_pseudos
)
4444 if (GET_CODE (dst
) == MEM
)
4445 dst
= gen_reg_rtx (mode
);
4446 if (GET_CODE (src1
) == MEM
)
4447 src1
= force_reg (mode
, src1
);
4448 if (GET_CODE (src2
) == MEM
)
4449 src2
= force_reg (mode
, src2
);
4452 /* Emit the instruction. */
4454 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_ee (code
, mode
, src1
, src2
));
4455 if (reload_in_progress
)
4457 /* Reload doesn't know about the flags register, and doesn't know that
4458 it doesn't want to clobber it. We can only do this with PLUS. */
4465 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
4466 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
4469 /* Fix up the destination if needed. */
4470 if (dst
!= operands
[0])
4471 emit_move_insn (operands
[0], dst
);
4474 /* Return TRUE or FALSE depending on whether the binary operator meets the
4475 appropriate constraints. */
4478 ix86_binary_operator_ok (code
, mode
, operands
)
4480 enum machine_mode mode ATTRIBUTE_UNUSED
;
4483 /* Both source operands cannot be in memory. */
4484 if (GET_CODE (operands
[1]) == MEM
&& GET_CODE (operands
[2]) == MEM
)
4486 /* If the operation is not commutable, source 1 cannot be a constant. */
4487 if (CONSTANT_P (operands
[1]) && GET_RTX_CLASS (code
) != 'c')
4489 /* If the destination is memory, we must have a matching source operand. */
4490 if (GET_CODE (operands
[0]) == MEM
4491 && ! (rtx_equal_p (operands
[0], operands
[1])
4492 || (GET_RTX_CLASS (code
) == 'c'
4493 && rtx_equal_p (operands
[0], operands
[2]))))
4495 /* If the operation is not commutable and the source 1 is memory, we must
4496 have a matching destionation. */
4497 if (GET_CODE (operands
[1]) == MEM
4498 && GET_RTX_CLASS (code
) != 'c'
4499 && ! rtx_equal_p (operands
[0], operands
[1]))
4504 /* Attempt to expand a unary operator. Make the expansion closer to the
4505 actual machine, then just general_operand, which will allow 2 separate
4506 memory references (one output, one input) in a single insn. */
4509 ix86_expand_unary_operator (code
, mode
, operands
)
4511 enum machine_mode mode
;
4514 int matching_memory
;
4515 rtx src
, dst
, op
, clob
;
4520 /* If the destination is memory, and we do not have matching source
4521 operands, do things in registers. */
4522 matching_memory
= 0;
4523 if (GET_CODE (dst
) == MEM
)
4525 if (rtx_equal_p (dst
, src
))
4526 matching_memory
= 1;
4528 dst
= gen_reg_rtx (mode
);
4531 /* When source operand is memory, destination must match. */
4532 if (!matching_memory
&& GET_CODE (src
) == MEM
)
4533 src
= force_reg (mode
, src
);
4535 /* If optimizing, copy to regs to improve CSE */
4536 if (optimize
&& ! no_new_pseudos
)
4538 if (GET_CODE (dst
) == MEM
)
4539 dst
= gen_reg_rtx (mode
);
4540 if (GET_CODE (src
) == MEM
)
4541 src
= force_reg (mode
, src
);
4544 /* Emit the instruction. */
4546 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_e (code
, mode
, src
));
4547 if (reload_in_progress
|| code
== NOT
)
4549 /* Reload doesn't know about the flags register, and doesn't know that
4550 it doesn't want to clobber it. */
4557 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
4558 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
4561 /* Fix up the destination if needed. */
4562 if (dst
!= operands
[0])
4563 emit_move_insn (operands
[0], dst
);
4566 /* Return TRUE or FALSE depending on whether the unary operator meets the
4567 appropriate constraints. */
4570 ix86_unary_operator_ok (code
, mode
, operands
)
4571 enum rtx_code code ATTRIBUTE_UNUSED
;
4572 enum machine_mode mode ATTRIBUTE_UNUSED
;
4573 rtx operands
[2] ATTRIBUTE_UNUSED
;
4575 /* If one of operands is memory, source and destination must match. */
4576 if ((GET_CODE (operands
[0]) == MEM
4577 || GET_CODE (operands
[1]) == MEM
)
4578 && ! rtx_equal_p (operands
[0], operands
[1]))
4583 /* Return TRUE or FALSE depending on whether the first SET in INSN
4584 has source and destination with matching CC modes, and that the
4585 CC mode is at least as constrained as REQ_MODE. */
4588 ix86_match_ccmode (insn
, req_mode
)
4590 enum machine_mode req_mode
;
4593 enum machine_mode set_mode
;
4595 set
= PATTERN (insn
);
4596 if (GET_CODE (set
) == PARALLEL
)
4597 set
= XVECEXP (set
, 0, 0);
4598 if (GET_CODE (set
) != SET
)
4600 if (GET_CODE (SET_SRC (set
)) != COMPARE
)
4603 set_mode
= GET_MODE (SET_DEST (set
));
4607 if (req_mode
!= CCNOmode
4608 && (req_mode
!= CCmode
4609 || XEXP (SET_SRC (set
), 1) != const0_rtx
))
4613 if (req_mode
== CCGCmode
)
4617 if (req_mode
== CCGOCmode
|| req_mode
== CCNOmode
)
4621 if (req_mode
== CCZmode
)
4631 return (GET_MODE (SET_SRC (set
)) == set_mode
);
4634 /* Generate insn patterns to do an integer compare of OPERANDS. */
4637 ix86_expand_int_compare (code
, op0
, op1
)
4641 enum machine_mode cmpmode
;
4644 cmpmode
= SELECT_CC_MODE (code
, op0
, op1
);
4645 flags
= gen_rtx_REG (cmpmode
, FLAGS_REG
);
4647 /* This is very simple, but making the interface the same as in the
4648 FP case makes the rest of the code easier. */
4649 tmp
= gen_rtx_COMPARE (cmpmode
, op0
, op1
);
4650 emit_insn (gen_rtx_SET (VOIDmode
, flags
, tmp
));
4652 /* Return the test that should be put into the flags user, i.e.
4653 the bcc, scc, or cmov instruction. */
4654 return gen_rtx_fmt_ee (code
, VOIDmode
, flags
, const0_rtx
);
4657 /* Figure out whether to use ordered or unordered fp comparisons.
4658 Return the appropriate mode to use. */
4661 ix86_fp_compare_mode (code
)
4662 enum rtx_code code ATTRIBUTE_UNUSED
;
4664 /* ??? In order to make all comparisons reversible, we do all comparisons
4665 non-trapping when compiling for IEEE. Once gcc is able to distinguish
4666 all forms trapping and nontrapping comparisons, we can make inequality
4667 comparisons trapping again, since it results in better code when using
4668 FCOM based compares. */
4669 return TARGET_IEEE_FP
? CCFPUmode
: CCFPmode
;
4673 ix86_cc_mode (code
, op0
, op1
)
4677 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_FLOAT
)
4678 return ix86_fp_compare_mode (code
);
4681 /* Only zero flag is needed. */
4683 case NE
: /* ZF!=0 */
4685 /* Codes needing carry flag. */
4686 case GEU
: /* CF=0 */
4687 case GTU
: /* CF=0 & ZF=0 */
4688 case LTU
: /* CF=1 */
4689 case LEU
: /* CF=1 | ZF=1 */
4691 /* Codes possibly doable only with sign flag when
4692 comparing against zero. */
4693 case GE
: /* SF=OF or SF=0 */
4694 case LT
: /* SF<>OF or SF=1 */
4695 if (op1
== const0_rtx
)
4698 /* For other cases Carry flag is not required. */
4700 /* Codes doable only with sign flag when comparing
4701 against zero, but we miss jump instruction for it
4702 so we need to use relational tests agains overflow
4703 that thus needs to be zero. */
4704 case GT
: /* ZF=0 & SF=OF */
4705 case LE
: /* ZF=1 | SF<>OF */
4706 if (op1
== const0_rtx
)
4715 /* Return true if we should use an FCOMI instruction for this fp comparison. */
4718 ix86_use_fcomi_compare (code
)
4719 enum rtx_code code ATTRIBUTE_UNUSED
;
4721 enum rtx_code swapped_code
= swap_condition (code
);
4722 return ((ix86_fp_comparison_cost (code
) == ix86_fp_comparison_fcomi_cost (code
))
4723 || (ix86_fp_comparison_cost (swapped_code
)
4724 == ix86_fp_comparison_fcomi_cost (swapped_code
)));
4727 /* Swap, force into registers, or otherwise massage the two operands
4728 to a fp comparison. The operands are updated in place; the new
4729 comparsion code is returned. */
4731 static enum rtx_code
4732 ix86_prepare_fp_compare_args (code
, pop0
, pop1
)
4736 enum machine_mode fpcmp_mode
= ix86_fp_compare_mode (code
);
4737 rtx op0
= *pop0
, op1
= *pop1
;
4738 enum machine_mode op_mode
= GET_MODE (op0
);
4740 /* All of the unordered compare instructions only work on registers.
4741 The same is true of the XFmode compare instructions. The same is
4742 true of the fcomi compare instructions. */
4744 if (fpcmp_mode
== CCFPUmode
4745 || op_mode
== XFmode
4746 || op_mode
== TFmode
4747 || ix86_use_fcomi_compare (code
))
4749 op0
= force_reg (op_mode
, op0
);
4750 op1
= force_reg (op_mode
, op1
);
4754 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
4755 things around if they appear profitable, otherwise force op0
4758 if (standard_80387_constant_p (op0
) == 0
4759 || (GET_CODE (op0
) == MEM
4760 && ! (standard_80387_constant_p (op1
) == 0
4761 || GET_CODE (op1
) == MEM
)))
4764 tmp
= op0
, op0
= op1
, op1
= tmp
;
4765 code
= swap_condition (code
);
4768 if (GET_CODE (op0
) != REG
)
4769 op0
= force_reg (op_mode
, op0
);
4771 if (CONSTANT_P (op1
))
4773 if (standard_80387_constant_p (op1
))
4774 op1
= force_reg (op_mode
, op1
);
4776 op1
= validize_mem (force_const_mem (op_mode
, op1
));
4780 /* Try to rearrange the comparison to make it cheaper. */
4781 if (ix86_fp_comparison_cost (code
)
4782 > ix86_fp_comparison_cost (swap_condition (code
))
4783 && (GET_CODE (op0
) == REG
|| !reload_completed
))
4786 tmp
= op0
, op0
= op1
, op1
= tmp
;
4787 code
= swap_condition (code
);
4788 if (GET_CODE (op0
) != REG
)
4789 op0
= force_reg (op_mode
, op0
);
4797 /* Convert comparison codes we use to represent FP comparison to integer
4798 code that will result in proper branch. Return UNKNOWN if no such code
4800 static enum rtx_code
4801 ix86_fp_compare_code_to_integer (code
)
4831 /* Split comparison code CODE into comparisons we can do using branch
4832 instructions. BYPASS_CODE is comparison code for branch that will
4833 branch around FIRST_CODE and SECOND_CODE. If some of branches
4834 is not required, set value to NIL.
4835 We never require more than two branches. */
4837 ix86_fp_comparison_codes (code
, bypass_code
, first_code
, second_code
)
4838 enum rtx_code code
, *bypass_code
, *first_code
, *second_code
;
4844 /* The fcomi comparison sets flags as follows:
4854 case GT
: /* GTU - CF=0 & ZF=0 */
4855 case GE
: /* GEU - CF=0 */
4856 case ORDERED
: /* PF=0 */
4857 case UNORDERED
: /* PF=1 */
4858 case UNEQ
: /* EQ - ZF=1 */
4859 case UNLT
: /* LTU - CF=1 */
4860 case UNLE
: /* LEU - CF=1 | ZF=1 */
4861 case LTGT
: /* EQ - ZF=0 */
4863 case LT
: /* LTU - CF=1 - fails on unordered */
4865 *bypass_code
= UNORDERED
;
4867 case LE
: /* LEU - CF=1 | ZF=1 - fails on unordered */
4869 *bypass_code
= UNORDERED
;
4871 case EQ
: /* EQ - ZF=1 - fails on unordered */
4873 *bypass_code
= UNORDERED
;
4875 case NE
: /* NE - ZF=0 - fails on unordered */
4877 *second_code
= UNORDERED
;
4879 case UNGE
: /* GEU - CF=0 - fails on unordered */
4881 *second_code
= UNORDERED
;
4883 case UNGT
: /* GTU - CF=0 & ZF=0 - fails on unordered */
4885 *second_code
= UNORDERED
;
4890 if (!TARGET_IEEE_FP
)
4897 /* Return cost of comparison done fcom + arithmetics operations on AX.
4898 All following functions do use number of instructions as an cost metrics.
4899 In future this should be tweaked to compute bytes for optimize_size and
4900 take into account performance of various instructions on various CPUs. */
4902 ix86_fp_comparison_arithmetics_cost (code
)
4905 if (!TARGET_IEEE_FP
)
4907 /* The cost of code output by ix86_expand_fp_compare. */
4935 /* Return cost of comparison done using fcomi operation.
4936 See ix86_fp_comparison_arithmetics_cost for the metrics. */
4938 ix86_fp_comparison_fcomi_cost (code
)
4941 enum rtx_code bypass_code
, first_code
, second_code
;
4942 /* Return arbitarily high cost when instruction is not supported - this
4943 prevents gcc from using it. */
4946 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
4947 return (bypass_code
!= NIL
|| second_code
!= NIL
) + 2;
4950 /* Return cost of comparison done using sahf operation.
4951 See ix86_fp_comparison_arithmetics_cost for the metrics. */
4953 ix86_fp_comparison_sahf_cost (code
)
4956 enum rtx_code bypass_code
, first_code
, second_code
;
4957 /* Return arbitarily high cost when instruction is not preferred - this
4958 avoids gcc from using it. */
4959 if (!TARGET_USE_SAHF
&& !optimize_size
)
4961 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
4962 return (bypass_code
!= NIL
|| second_code
!= NIL
) + 3;
4965 /* Compute cost of the comparison done using any method.
4966 See ix86_fp_comparison_arithmetics_cost for the metrics. */
4968 ix86_fp_comparison_cost (code
)
4971 int fcomi_cost
, sahf_cost
, arithmetics_cost
= 1024;
4974 fcomi_cost
= ix86_fp_comparison_fcomi_cost (code
);
4975 sahf_cost
= ix86_fp_comparison_sahf_cost (code
);
4977 min
= arithmetics_cost
= ix86_fp_comparison_arithmetics_cost (code
);
4978 if (min
> sahf_cost
)
4980 if (min
> fcomi_cost
)
4985 /* Generate insn patterns to do a floating point compare of OPERANDS. */
4988 ix86_expand_fp_compare (code
, op0
, op1
, scratch
, second_test
, bypass_test
)
4990 rtx op0
, op1
, scratch
;
4994 enum machine_mode fpcmp_mode
, intcmp_mode
;
4996 int cost
= ix86_fp_comparison_cost (code
);
4997 enum rtx_code bypass_code
, first_code
, second_code
;
4999 fpcmp_mode
= ix86_fp_compare_mode (code
);
5000 code
= ix86_prepare_fp_compare_args (code
, &op0
, &op1
);
5003 *second_test
= NULL_RTX
;
5005 *bypass_test
= NULL_RTX
;
5007 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
5009 /* Do fcomi/sahf based test when profitable. */
5010 if ((bypass_code
== NIL
|| bypass_test
)
5011 && (second_code
== NIL
|| second_test
)
5012 && ix86_fp_comparison_arithmetics_cost (code
) > cost
)
5016 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
5017 tmp
= gen_rtx_SET (VOIDmode
, gen_rtx_REG (fpcmp_mode
, FLAGS_REG
),
5023 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
5024 tmp2
= gen_rtx_UNSPEC (HImode
, gen_rtvec (1, tmp
), 9);
5025 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, tmp2
));
5026 emit_insn (gen_x86_sahf_1 (scratch
));
5029 /* The FP codes work out to act like unsigned. */
5030 intcmp_mode
= fpcmp_mode
;
5032 if (bypass_code
!= NIL
)
5033 *bypass_test
= gen_rtx_fmt_ee (bypass_code
, VOIDmode
,
5034 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
5036 if (second_code
!= NIL
)
5037 *second_test
= gen_rtx_fmt_ee (second_code
, VOIDmode
,
5038 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
5043 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
5044 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
5045 tmp2
= gen_rtx_UNSPEC (HImode
, gen_rtvec (1, tmp
), 9);
5046 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, tmp2
));
5048 /* In the unordered case, we have to check C2 for NaN's, which
5049 doesn't happen to work out to anything nice combination-wise.
5050 So do some bit twiddling on the value we've got in AH to come
5051 up with an appropriate set of condition codes. */
5053 intcmp_mode
= CCNOmode
;
5058 if (code
== GT
|| !TARGET_IEEE_FP
)
5060 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
5065 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
5066 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
5067 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x44)));
5068 intcmp_mode
= CCmode
;
5074 if (code
== LT
&& TARGET_IEEE_FP
)
5076 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
5077 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x01)));
5078 intcmp_mode
= CCmode
;
5083 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x01)));
5089 if (code
== GE
|| !TARGET_IEEE_FP
)
5091 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x05)));
5096 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
5097 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
,
5104 if (code
== LE
&& TARGET_IEEE_FP
)
5106 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
5107 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
5108 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
5109 intcmp_mode
= CCmode
;
5114 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
5120 if (code
== EQ
&& TARGET_IEEE_FP
)
5122 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
5123 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
5124 intcmp_mode
= CCmode
;
5129 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
5136 if (code
== NE
&& TARGET_IEEE_FP
)
5138 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
5139 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
,
5145 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
5151 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
5155 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
5164 /* Return the test that should be put into the flags user, i.e.
5165 the bcc, scc, or cmov instruction. */
5166 return gen_rtx_fmt_ee (code
, VOIDmode
,
5167 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
5172 ix86_expand_compare (code
, second_test
, bypass_test
)
5174 rtx
*second_test
, *bypass_test
;
5177 op0
= ix86_compare_op0
;
5178 op1
= ix86_compare_op1
;
5181 *second_test
= NULL_RTX
;
5183 *bypass_test
= NULL_RTX
;
5185 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_FLOAT
)
5186 ret
= ix86_expand_fp_compare (code
, op0
, op1
, gen_reg_rtx (HImode
),
5187 second_test
, bypass_test
);
5189 ret
= ix86_expand_int_compare (code
, op0
, op1
);
5195 ix86_expand_branch (code
, label
)
5201 switch (GET_MODE (ix86_compare_op0
))
5206 tmp
= ix86_expand_compare (code
, NULL
, NULL
);
5207 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
5208 gen_rtx_LABEL_REF (VOIDmode
, label
),
5210 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
5217 /* Don't expand the comparison early, so that we get better code
5218 when jump or whoever decides to reverse the comparison. */
5223 code
= ix86_prepare_fp_compare_args (code
, &ix86_compare_op0
,
5226 tmp
= gen_rtx_fmt_ee (code
, VOIDmode
,
5227 ix86_compare_op0
, ix86_compare_op1
);
5228 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
5229 gen_rtx_LABEL_REF (VOIDmode
, label
),
5231 tmp
= gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
);
5233 use_fcomi
= ix86_use_fcomi_compare (code
);
5234 vec
= rtvec_alloc (3 + !use_fcomi
);
5235 RTVEC_ELT (vec
, 0) = tmp
;
5237 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCFPmode
, 18));
5239 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCFPmode
, 17));
5242 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (HImode
));
5244 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode
, vec
));
5249 /* Expand DImode branch into multiple compare+branch. */
5251 rtx lo
[2], hi
[2], label2
;
5252 enum rtx_code code1
, code2
, code3
;
5254 if (CONSTANT_P (ix86_compare_op0
) && ! CONSTANT_P (ix86_compare_op1
))
5256 tmp
= ix86_compare_op0
;
5257 ix86_compare_op0
= ix86_compare_op1
;
5258 ix86_compare_op1
= tmp
;
5259 code
= swap_condition (code
);
5261 split_di (&ix86_compare_op0
, 1, lo
+0, hi
+0);
5262 split_di (&ix86_compare_op1
, 1, lo
+1, hi
+1);
5264 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
5265 avoid two branches. This costs one extra insn, so disable when
5266 optimizing for size. */
5268 if ((code
== EQ
|| code
== NE
)
5270 || hi
[1] == const0_rtx
|| lo
[1] == const0_rtx
))
5275 if (hi
[1] != const0_rtx
)
5276 xor1
= expand_binop (SImode
, xor_optab
, xor1
, hi
[1],
5277 NULL_RTX
, 0, OPTAB_WIDEN
);
5280 if (lo
[1] != const0_rtx
)
5281 xor0
= expand_binop (SImode
, xor_optab
, xor0
, lo
[1],
5282 NULL_RTX
, 0, OPTAB_WIDEN
);
5284 tmp
= expand_binop (SImode
, ior_optab
, xor1
, xor0
,
5285 NULL_RTX
, 0, OPTAB_WIDEN
);
5287 ix86_compare_op0
= tmp
;
5288 ix86_compare_op1
= const0_rtx
;
5289 ix86_expand_branch (code
, label
);
5293 /* Otherwise, if we are doing less-than or greater-or-equal-than,
5294 op1 is a constant and the low word is zero, then we can just
5295 examine the high word. */
5297 if (GET_CODE (hi
[1]) == CONST_INT
&& lo
[1] == const0_rtx
)
5300 case LT
: case LTU
: case GE
: case GEU
:
5301 ix86_compare_op0
= hi
[0];
5302 ix86_compare_op1
= hi
[1];
5303 ix86_expand_branch (code
, label
);
5309 /* Otherwise, we need two or three jumps. */
5311 label2
= gen_label_rtx ();
5314 code2
= swap_condition (code
);
5315 code3
= unsigned_condition (code
);
5319 case LT
: case GT
: case LTU
: case GTU
:
5322 case LE
: code1
= LT
; code2
= GT
; break;
5323 case GE
: code1
= GT
; code2
= LT
; break;
5324 case LEU
: code1
= LTU
; code2
= GTU
; break;
5325 case GEU
: code1
= GTU
; code2
= LTU
; break;
5327 case EQ
: code1
= NIL
; code2
= NE
; break;
5328 case NE
: code2
= NIL
; break;
5336 * if (hi(a) < hi(b)) goto true;
5337 * if (hi(a) > hi(b)) goto false;
5338 * if (lo(a) < lo(b)) goto true;
5342 ix86_compare_op0
= hi
[0];
5343 ix86_compare_op1
= hi
[1];
5346 ix86_expand_branch (code1
, label
);
5348 ix86_expand_branch (code2
, label2
);
5350 ix86_compare_op0
= lo
[0];
5351 ix86_compare_op1
= lo
[1];
5352 ix86_expand_branch (code3
, label
);
5355 emit_label (label2
);
5364 /* Split branch based on floating point condition. */
5366 ix86_split_fp_branch (condition
, op1
, op2
, target1
, target2
, tmp
)
5367 rtx condition
, op1
, op2
, target1
, target2
, tmp
;
5370 rtx label
= NULL_RTX
;
5371 enum rtx_code code
= GET_CODE (condition
);
5373 if (target2
!= pc_rtx
)
5376 code
= reverse_condition_maybe_unordered (code
);
5381 condition
= ix86_expand_fp_compare (code
, op1
, op2
,
5382 tmp
, &second
, &bypass
);
5383 if (bypass
!= NULL_RTX
)
5385 label
= gen_label_rtx ();
5386 emit_jump_insn (gen_rtx_SET
5388 gen_rtx_IF_THEN_ELSE (VOIDmode
,
5390 gen_rtx_LABEL_REF (VOIDmode
,
5394 /* AMD Athlon and probably other CPUs too have fast bypass path between the
5395 comparison and first branch. The second branch takes longer to execute
5396 so place first branch the worse predicable one if possible. */
5397 if (second
!= NULL_RTX
5398 && (GET_CODE (second
) == UNORDERED
|| GET_CODE (second
) == ORDERED
))
5400 rtx tmp
= condition
;
5404 emit_jump_insn (gen_rtx_SET
5406 gen_rtx_IF_THEN_ELSE (VOIDmode
,
5407 condition
, target1
, target2
)));
5408 if (second
!= NULL_RTX
)
5409 emit_jump_insn (gen_rtx_SET
5411 gen_rtx_IF_THEN_ELSE (VOIDmode
, second
, target1
, target2
)));
5412 if (label
!= NULL_RTX
)
5417 ix86_expand_setcc (code
, dest
)
5421 rtx ret
, tmp
, tmpreg
;
5422 rtx second_test
, bypass_test
;
5425 if (GET_MODE (ix86_compare_op0
) == DImode
)
5426 return 0; /* FAIL */
5428 /* Three modes of generation:
5429 0 -- destination does not overlap compare sources:
5430 clear dest first, emit strict_low_part setcc.
5431 1 -- destination does overlap compare sources:
5432 emit subreg setcc, zero extend.
5433 2 -- destination is in QImode:
5439 if (GET_MODE (dest
) == QImode
)
5441 else if (reg_overlap_mentioned_p (dest
, ix86_compare_op0
)
5442 || reg_overlap_mentioned_p (dest
, ix86_compare_op1
))
5446 emit_move_insn (dest
, const0_rtx
);
5448 ret
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
5449 PUT_MODE (ret
, QImode
);
5455 tmp
= gen_lowpart (QImode
, dest
);
5457 tmp
= gen_rtx_STRICT_LOW_PART (VOIDmode
, tmp
);
5461 if (!cse_not_expected
)
5462 tmp
= gen_reg_rtx (QImode
);
5464 tmp
= gen_lowpart (QImode
, dest
);
5468 emit_insn (gen_rtx_SET (VOIDmode
, tmp
, ret
));
5469 if (bypass_test
|| second_test
)
5471 rtx test
= second_test
;
5473 rtx tmp2
= gen_reg_rtx (QImode
);
5480 PUT_CODE (test
, reverse_condition_maybe_unordered (GET_CODE (test
)));
5482 PUT_MODE (test
, QImode
);
5483 emit_insn (gen_rtx_SET (VOIDmode
, tmp2
, test
));
5486 emit_insn (gen_andqi3 (tmp
, tmpreg
, tmp2
));
5488 emit_insn (gen_iorqi3 (tmp
, tmpreg
, tmp2
));
5495 tmp
= gen_rtx_ZERO_EXTEND (GET_MODE (dest
), tmp
);
5496 tmp
= gen_rtx_SET (VOIDmode
, dest
, tmp
);
5497 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
5498 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, clob
));
5502 return 1; /* DONE */
5506 ix86_expand_int_movcc (operands
)
5509 enum rtx_code code
= GET_CODE (operands
[1]), compare_code
;
5510 rtx compare_seq
, compare_op
;
5511 rtx second_test
, bypass_test
;
5513 /* When the compare code is not LTU or GEU, we can not use sbbl case.
5514 In case comparsion is done with immediate, we can convert it to LTU or
5515 GEU by altering the integer. */
5517 if ((code
== LEU
|| code
== GTU
)
5518 && GET_CODE (ix86_compare_op1
) == CONST_INT
5519 && GET_MODE (operands
[0]) != HImode
5520 && (unsigned int)INTVAL (ix86_compare_op1
) != 0xffffffff
5521 && GET_CODE (operands
[2]) == CONST_INT
5522 && GET_CODE (operands
[3]) == CONST_INT
)
5528 ix86_compare_op1
= GEN_INT (INTVAL (ix86_compare_op1
) + 1);
5532 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
5533 compare_seq
= gen_sequence ();
5536 compare_code
= GET_CODE (compare_op
);
5538 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
5539 HImode insns, we'd be swallowed in word prefix ops. */
5541 if (GET_MODE (operands
[0]) != HImode
5542 && GET_CODE (operands
[2]) == CONST_INT
5543 && GET_CODE (operands
[3]) == CONST_INT
)
5545 rtx out
= operands
[0];
5546 HOST_WIDE_INT ct
= INTVAL (operands
[2]);
5547 HOST_WIDE_INT cf
= INTVAL (operands
[3]);
5550 if ((compare_code
== LTU
|| compare_code
== GEU
)
5551 && !second_test
&& !bypass_test
)
5554 /* Detect overlap between destination and compare sources. */
5557 /* To simplify rest of code, restrict to the GEU case. */
5558 if (compare_code
== LTU
)
5563 compare_code
= reverse_condition (compare_code
);
5564 code
= reverse_condition (code
);
5568 if (reg_overlap_mentioned_p (out
, ix86_compare_op0
)
5569 || reg_overlap_mentioned_p (out
, ix86_compare_op1
))
5570 tmp
= gen_reg_rtx (SImode
);
5572 emit_insn (compare_seq
);
5573 emit_insn (gen_x86_movsicc_0_m1 (tmp
));
5585 emit_insn (gen_addsi3 (out
, out
, GEN_INT (ct
)));
5596 emit_insn (gen_iorsi3 (out
, out
, GEN_INT (ct
)));
5598 else if (diff
== -1 && ct
)
5608 emit_insn (gen_one_cmplsi2 (tmp
, tmp
));
5610 emit_insn (gen_addsi3 (out
, out
, GEN_INT (cf
)));
5617 * andl cf - ct, dest
5622 emit_insn (gen_andsi3 (out
, out
, GEN_INT (cf
- ct
)));
5624 emit_insn (gen_addsi3 (out
, out
, GEN_INT (ct
)));
5628 emit_move_insn (out
, tmp
);
5630 return 1; /* DONE */
5637 tmp
= ct
, ct
= cf
, cf
= tmp
;
5639 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0
)))
5641 /* We may be reversing unordered compare to normal compare, that
5642 is not valid in general (we may convert non-trapping condition
5643 to trapping one), however on i386 we currently emit all
5644 comparisons unordered. */
5645 compare_code
= reverse_condition_maybe_unordered (compare_code
);
5646 code
= reverse_condition_maybe_unordered (code
);
5650 compare_code
= reverse_condition (compare_code
);
5651 code
= reverse_condition (code
);
5654 if (diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
5655 || diff
== 3 || diff
== 5 || diff
== 9)
5661 * lea cf(dest*(ct-cf)),dest
5665 * This also catches the degenerate setcc-only case.
5671 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
5672 ix86_compare_op1
, VOIDmode
, 0, 1);
5679 tmp
= gen_rtx_MULT (SImode
, out
, GEN_INT (diff
& ~1));
5683 tmp
= gen_rtx_PLUS (SImode
, tmp
, out
);
5689 tmp
= gen_rtx_PLUS (SImode
, tmp
, GEN_INT (cf
));
5695 emit_move_insn (out
, tmp
);
5700 clob
= gen_rtx_REG (CCmode
, FLAGS_REG
);
5701 clob
= gen_rtx_CLOBBER (VOIDmode
, clob
);
5703 tmp
= gen_rtx_SET (VOIDmode
, out
, tmp
);
5704 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, clob
));
5708 emit_insn (gen_rtx_SET (VOIDmode
, out
, tmp
));
5710 if (out
!= operands
[0])
5711 emit_move_insn (operands
[0], out
);
5713 return 1; /* DONE */
5717 * General case: Jumpful:
5718 * xorl dest,dest cmpl op1, op2
5719 * cmpl op1, op2 movl ct, dest
5721 * decl dest movl cf, dest
5722 * andl (cf-ct),dest 1:
5727 * This is reasonably steep, but branch mispredict costs are
5728 * high on modern cpus, so consider failing only if optimizing
5731 * %%% Parameterize branch_cost on the tuning architecture, then
5732 * use that. The 80386 couldn't care less about mispredicts.
5735 if (!optimize_size
&& !TARGET_CMOVE
)
5741 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0
)))
5743 /* We may be reversing unordered compare to normal compare,
5744 that is not valid in general (we may convert non-trapping
5745 condition to trapping one), however on i386 we currently
5746 emit all comparisons unordered. */
5747 compare_code
= reverse_condition_maybe_unordered (compare_code
);
5748 code
= reverse_condition_maybe_unordered (code
);
5752 compare_code
= reverse_condition (compare_code
);
5753 code
= reverse_condition (code
);
5757 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
5758 ix86_compare_op1
, VOIDmode
, 0, 1);
5760 emit_insn (gen_addsi3 (out
, out
, constm1_rtx
));
5761 emit_insn (gen_andsi3 (out
, out
, GEN_INT (cf
-ct
)));
5763 emit_insn (gen_addsi3 (out
, out
, GEN_INT (ct
)));
5764 if (out
!= operands
[0])
5765 emit_move_insn (operands
[0], out
);
5767 return 1; /* DONE */
5773 /* Try a few things more with specific constants and a variable. */
5776 rtx var
, orig_out
, out
, tmp
;
5779 return 0; /* FAIL */
5781 /* If one of the two operands is an interesting constant, load a
5782 constant with the above and mask it in with a logical operation. */
5784 if (GET_CODE (operands
[2]) == CONST_INT
)
5787 if (INTVAL (operands
[2]) == 0)
5788 operands
[3] = constm1_rtx
, op
= and_optab
;
5789 else if (INTVAL (operands
[2]) == -1)
5790 operands
[3] = const0_rtx
, op
= ior_optab
;
5792 return 0; /* FAIL */
5794 else if (GET_CODE (operands
[3]) == CONST_INT
)
5797 if (INTVAL (operands
[3]) == 0)
5798 operands
[2] = constm1_rtx
, op
= and_optab
;
5799 else if (INTVAL (operands
[3]) == -1)
5800 operands
[2] = const0_rtx
, op
= ior_optab
;
5802 return 0; /* FAIL */
5805 return 0; /* FAIL */
5807 orig_out
= operands
[0];
5808 tmp
= gen_reg_rtx (GET_MODE (orig_out
));
5811 /* Recurse to get the constant loaded. */
5812 if (ix86_expand_int_movcc (operands
) == 0)
5813 return 0; /* FAIL */
5815 /* Mask in the interesting variable. */
5816 out
= expand_binop (GET_MODE (orig_out
), op
, var
, tmp
, orig_out
, 0,
5818 if (out
!= orig_out
)
5819 emit_move_insn (orig_out
, out
);
5821 return 1; /* DONE */
5825 * For comparison with above,
5835 if (! nonimmediate_operand (operands
[2], GET_MODE (operands
[0])))
5836 operands
[2] = force_reg (GET_MODE (operands
[0]), operands
[2]);
5837 if (! nonimmediate_operand (operands
[3], GET_MODE (operands
[0])))
5838 operands
[3] = force_reg (GET_MODE (operands
[0]), operands
[3]);
5840 if (bypass_test
&& reg_overlap_mentioned_p (operands
[0], operands
[3]))
5842 rtx tmp
= gen_reg_rtx (GET_MODE (operands
[0]));
5843 emit_move_insn (tmp
, operands
[3]);
5846 if (second_test
&& reg_overlap_mentioned_p (operands
[0], operands
[2]))
5848 rtx tmp
= gen_reg_rtx (GET_MODE (operands
[0]));
5849 emit_move_insn (tmp
, operands
[2]);
5853 emit_insn (compare_seq
);
5854 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
5855 gen_rtx_IF_THEN_ELSE (GET_MODE (operands
[0]),
5856 compare_op
, operands
[2],
5859 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
5860 gen_rtx_IF_THEN_ELSE (GET_MODE (operands
[0]),
5865 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
5866 gen_rtx_IF_THEN_ELSE (GET_MODE (operands
[0]),
5871 return 1; /* DONE */
5875 ix86_expand_fp_movcc (operands
)
5880 rtx compare_op
, second_test
, bypass_test
;
5882 /* The floating point conditional move instructions don't directly
5883 support conditions resulting from a signed integer comparison. */
5885 code
= GET_CODE (operands
[1]);
5886 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
5888 /* The floating point conditional move instructions don't directly
5889 support signed integer comparisons. */
5891 if (!fcmov_comparison_operator (compare_op
, VOIDmode
))
5893 if (second_test
!= NULL
|| bypass_test
!= NULL
)
5895 tmp
= gen_reg_rtx (QImode
);
5896 ix86_expand_setcc (code
, tmp
);
5898 ix86_compare_op0
= tmp
;
5899 ix86_compare_op1
= const0_rtx
;
5900 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
5902 if (bypass_test
&& reg_overlap_mentioned_p (operands
[0], operands
[3]))
5904 tmp
= gen_reg_rtx (GET_MODE (operands
[0]));
5905 emit_move_insn (tmp
, operands
[3]);
5908 if (second_test
&& reg_overlap_mentioned_p (operands
[0], operands
[2]))
5910 tmp
= gen_reg_rtx (GET_MODE (operands
[0]));
5911 emit_move_insn (tmp
, operands
[2]);
5915 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
5916 gen_rtx_IF_THEN_ELSE (GET_MODE (operands
[0]),
5921 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
5922 gen_rtx_IF_THEN_ELSE (GET_MODE (operands
[0]),
5927 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
5928 gen_rtx_IF_THEN_ELSE (GET_MODE (operands
[0]),
5936 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
5937 works for floating pointer parameters and nonoffsetable memories.
5938 For pushes, it returns just stack offsets; the values will be saved
5939 in the right order. Maximally three parts are generated. */
5942 ix86_split_to_parts (operand
, parts
, mode
)
5945 enum machine_mode mode
;
5947 int size
= mode
== TFmode
? 3 : GET_MODE_SIZE (mode
) / 4;
5949 if (GET_CODE (operand
) == REG
&& MMX_REGNO_P (REGNO (operand
)))
5951 if (size
< 2 || size
> 3)
5954 /* Optimize constant pool reference to immediates. This is used by fp moves,
5955 that force all constants to memory to allow combining. */
5957 if (GET_CODE (operand
) == MEM
5958 && GET_CODE (XEXP (operand
, 0)) == SYMBOL_REF
5959 && CONSTANT_POOL_ADDRESS_P (XEXP (operand
, 0)))
5960 operand
= get_pool_constant (XEXP (operand
, 0));
5962 if (GET_CODE (operand
) == MEM
&& !offsettable_memref_p (operand
))
5964 /* The only non-offsetable memories we handle are pushes. */
5965 if (! push_operand (operand
, VOIDmode
))
5968 PUT_MODE (operand
, SImode
);
5969 parts
[0] = parts
[1] = parts
[2] = operand
;
5974 split_di (&operand
, 1, &parts
[0], &parts
[1]);
5977 if (REG_P (operand
))
5979 if (!reload_completed
)
5981 parts
[0] = gen_rtx_REG (SImode
, REGNO (operand
) + 0);
5982 parts
[1] = gen_rtx_REG (SImode
, REGNO (operand
) + 1);
5984 parts
[2] = gen_rtx_REG (SImode
, REGNO (operand
) + 2);
5986 else if (offsettable_memref_p (operand
))
5988 PUT_MODE (operand
, SImode
);
5990 parts
[1] = adj_offsettable_operand (operand
, 4);
5992 parts
[2] = adj_offsettable_operand (operand
, 8);
5994 else if (GET_CODE (operand
) == CONST_DOUBLE
)
5999 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
6004 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r
, l
);
6005 parts
[2] = GEN_INT (l
[2]);
6008 REAL_VALUE_TO_TARGET_DOUBLE (r
, l
);
6013 parts
[1] = GEN_INT (l
[1]);
6014 parts
[0] = GEN_INT (l
[0]);
6024 /* Emit insns to perform a move or push of DI, DF, and XF values.
6025 Return false when normal moves are needed; true when all required
6026 insns have been emitted. Operands 2-4 contain the input values
6027 int the correct order; operands 5-7 contain the output values. */
6030 ix86_split_long_move (operands1
)
6039 /* Make our own copy to avoid clobbering the operands. */
6040 operands
[0] = copy_rtx (operands1
[0]);
6041 operands
[1] = copy_rtx (operands1
[1]);
6043 /* The only non-offsettable memory we handle is push. */
6044 if (push_operand (operands
[0], VOIDmode
))
6046 else if (GET_CODE (operands
[0]) == MEM
6047 && ! offsettable_memref_p (operands
[0]))
6050 size
= ix86_split_to_parts (operands
[0], part
[0], GET_MODE (operands1
[0]));
6051 ix86_split_to_parts (operands
[1], part
[1], GET_MODE (operands1
[0]));
6053 /* When emitting push, take care for source operands on the stack. */
6054 if (push
&& GET_CODE (operands
[1]) == MEM
6055 && reg_overlap_mentioned_p (stack_pointer_rtx
, operands
[1]))
6058 part
[1][1] = part
[1][2];
6059 part
[1][0] = part
[1][1];
6062 /* We need to do copy in the right order in case an address register
6063 of the source overlaps the destination. */
6064 if (REG_P (part
[0][0]) && GET_CODE (part
[1][0]) == MEM
)
6066 if (reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0)))
6068 if (reg_overlap_mentioned_p (part
[0][1], XEXP (part
[1][0], 0)))
6071 && reg_overlap_mentioned_p (part
[0][2], XEXP (part
[1][0], 0)))
6074 /* Collision in the middle part can be handled by reordering. */
6075 if (collisions
== 1 && size
== 3
6076 && reg_overlap_mentioned_p (part
[0][1], XEXP (part
[1][0], 0)))
6079 tmp
= part
[0][1]; part
[0][1] = part
[0][2]; part
[0][2] = tmp
;
6080 tmp
= part
[1][1]; part
[1][1] = part
[1][2]; part
[1][2] = tmp
;
6083 /* If there are more collisions, we can't handle it by reordering.
6084 Do an lea to the last part and use only one colliding move. */
6085 else if (collisions
> 1)
6088 emit_insn (gen_rtx_SET (VOIDmode
, part
[0][size
- 1],
6089 XEXP (part
[1][0], 0)));
6090 part
[1][0] = change_address (part
[1][0], SImode
, part
[0][size
- 1]);
6091 part
[1][1] = adj_offsettable_operand (part
[1][0], 4);
6093 part
[1][2] = adj_offsettable_operand (part
[1][0], 8);
6101 /* We use only first 12 bytes of TFmode value, but for pushing we
6102 are required to adjust stack as if we were pushing real 16byte
6104 if (GET_MODE (operands1
[0]) == TFmode
)
6105 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
6107 emit_insn (gen_push (part
[1][2]));
6109 emit_insn (gen_push (part
[1][1]));
6110 emit_insn (gen_push (part
[1][0]));
6114 /* Choose correct order to not overwrite the source before it is copied. */
6115 if ((REG_P (part
[0][0])
6116 && REG_P (part
[1][1])
6117 && (REGNO (part
[0][0]) == REGNO (part
[1][1])
6119 && REGNO (part
[0][0]) == REGNO (part
[1][2]))))
6121 && reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0))))
6125 operands1
[2] = part
[0][2];
6126 operands1
[3] = part
[0][1];
6127 operands1
[4] = part
[0][0];
6128 operands1
[5] = part
[1][2];
6129 operands1
[6] = part
[1][1];
6130 operands1
[7] = part
[1][0];
6134 operands1
[2] = part
[0][1];
6135 operands1
[3] = part
[0][0];
6136 operands1
[5] = part
[1][1];
6137 operands1
[6] = part
[1][0];
6144 operands1
[2] = part
[0][0];
6145 operands1
[3] = part
[0][1];
6146 operands1
[4] = part
[0][2];
6147 operands1
[5] = part
[1][0];
6148 operands1
[6] = part
[1][1];
6149 operands1
[7] = part
[1][2];
6153 operands1
[2] = part
[0][0];
6154 operands1
[3] = part
[0][1];
6155 operands1
[5] = part
[1][0];
6156 operands1
[6] = part
[1][1];
6164 ix86_split_ashldi (operands
, scratch
)
6165 rtx
*operands
, scratch
;
6167 rtx low
[2], high
[2];
6170 if (GET_CODE (operands
[2]) == CONST_INT
)
6172 split_di (operands
, 2, low
, high
);
6173 count
= INTVAL (operands
[2]) & 63;
6177 emit_move_insn (high
[0], low
[1]);
6178 emit_move_insn (low
[0], const0_rtx
);
6181 emit_insn (gen_ashlsi3 (high
[0], high
[0], GEN_INT (count
- 32)));
6185 if (!rtx_equal_p (operands
[0], operands
[1]))
6186 emit_move_insn (operands
[0], operands
[1]);
6187 emit_insn (gen_x86_shld_1 (high
[0], low
[0], GEN_INT (count
)));
6188 emit_insn (gen_ashlsi3 (low
[0], low
[0], GEN_INT (count
)));
6193 if (!rtx_equal_p (operands
[0], operands
[1]))
6194 emit_move_insn (operands
[0], operands
[1]);
6196 split_di (operands
, 1, low
, high
);
6198 emit_insn (gen_x86_shld_1 (high
[0], low
[0], operands
[2]));
6199 emit_insn (gen_ashlsi3 (low
[0], low
[0], operands
[2]));
6201 if (TARGET_CMOVE
&& (! no_new_pseudos
|| scratch
))
6203 if (! no_new_pseudos
)
6204 scratch
= force_reg (SImode
, const0_rtx
);
6206 emit_move_insn (scratch
, const0_rtx
);
6208 emit_insn (gen_x86_shift_adj_1 (high
[0], low
[0], operands
[2],
6212 emit_insn (gen_x86_shift_adj_2 (high
[0], low
[0], operands
[2]));
6217 ix86_split_ashrdi (operands
, scratch
)
6218 rtx
*operands
, scratch
;
6220 rtx low
[2], high
[2];
6223 if (GET_CODE (operands
[2]) == CONST_INT
)
6225 split_di (operands
, 2, low
, high
);
6226 count
= INTVAL (operands
[2]) & 63;
6230 emit_move_insn (low
[0], high
[1]);
6232 if (! reload_completed
)
6233 emit_insn (gen_ashrsi3 (high
[0], low
[0], GEN_INT (31)));
6236 emit_move_insn (high
[0], low
[0]);
6237 emit_insn (gen_ashrsi3 (high
[0], high
[0], GEN_INT (31)));
6241 emit_insn (gen_ashrsi3 (low
[0], low
[0], GEN_INT (count
- 32)));
6245 if (!rtx_equal_p (operands
[0], operands
[1]))
6246 emit_move_insn (operands
[0], operands
[1]);
6247 emit_insn (gen_x86_shrd_1 (low
[0], high
[0], GEN_INT (count
)));
6248 emit_insn (gen_ashrsi3 (high
[0], high
[0], GEN_INT (count
)));
6253 if (!rtx_equal_p (operands
[0], operands
[1]))
6254 emit_move_insn (operands
[0], operands
[1]);
6256 split_di (operands
, 1, low
, high
);
6258 emit_insn (gen_x86_shrd_1 (low
[0], high
[0], operands
[2]));
6259 emit_insn (gen_ashrsi3 (high
[0], high
[0], operands
[2]));
6261 if (TARGET_CMOVE
&& (! no_new_pseudos
|| scratch
))
6263 if (! no_new_pseudos
)
6264 scratch
= gen_reg_rtx (SImode
);
6265 emit_move_insn (scratch
, high
[0]);
6266 emit_insn (gen_ashrsi3 (scratch
, scratch
, GEN_INT (31)));
6267 emit_insn (gen_x86_shift_adj_1 (low
[0], high
[0], operands
[2],
6271 emit_insn (gen_x86_shift_adj_3 (low
[0], high
[0], operands
[2]));
6276 ix86_split_lshrdi (operands
, scratch
)
6277 rtx
*operands
, scratch
;
6279 rtx low
[2], high
[2];
6282 if (GET_CODE (operands
[2]) == CONST_INT
)
6284 split_di (operands
, 2, low
, high
);
6285 count
= INTVAL (operands
[2]) & 63;
6289 emit_move_insn (low
[0], high
[1]);
6290 emit_move_insn (high
[0], const0_rtx
);
6293 emit_insn (gen_lshrsi3 (low
[0], low
[0], GEN_INT (count
- 32)));
6297 if (!rtx_equal_p (operands
[0], operands
[1]))
6298 emit_move_insn (operands
[0], operands
[1]);
6299 emit_insn (gen_x86_shrd_1 (low
[0], high
[0], GEN_INT (count
)));
6300 emit_insn (gen_lshrsi3 (high
[0], high
[0], GEN_INT (count
)));
6305 if (!rtx_equal_p (operands
[0], operands
[1]))
6306 emit_move_insn (operands
[0], operands
[1]);
6308 split_di (operands
, 1, low
, high
);
6310 emit_insn (gen_x86_shrd_1 (low
[0], high
[0], operands
[2]));
6311 emit_insn (gen_lshrsi3 (high
[0], high
[0], operands
[2]));
6313 /* Heh. By reversing the arguments, we can reuse this pattern. */
6314 if (TARGET_CMOVE
&& (! no_new_pseudos
|| scratch
))
6316 if (! no_new_pseudos
)
6317 scratch
= force_reg (SImode
, const0_rtx
);
6319 emit_move_insn (scratch
, const0_rtx
);
6321 emit_insn (gen_x86_shift_adj_1 (low
[0], high
[0], operands
[2],
6325 emit_insn (gen_x86_shift_adj_2 (low
[0], high
[0], operands
[2]));
6329 /* Expand the appropriate insns for doing strlen if not just doing
6332 out = result, initialized with the start address
6333 align_rtx = alignment of the address.
6334 scratch = scratch register, initialized with the startaddress when
6335 not aligned, otherwise undefined
6337 This is just the body. It needs the initialisations mentioned above and
6338 some address computing at the end. These things are done in i386.md. */
6341 ix86_expand_strlensi_unroll_1 (out
, align_rtx
, scratch
)
6342 rtx out
, align_rtx
, scratch
;
6346 rtx align_2_label
= NULL_RTX
;
6347 rtx align_3_label
= NULL_RTX
;
6348 rtx align_4_label
= gen_label_rtx ();
6349 rtx end_0_label
= gen_label_rtx ();
6351 rtx tmpreg
= gen_reg_rtx (SImode
);
6354 if (GET_CODE (align_rtx
) == CONST_INT
)
6355 align
= INTVAL (align_rtx
);
6357 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
6359 /* Is there a known alignment and is it less than 4? */
6362 /* Is there a known alignment and is it not 2? */
6365 align_3_label
= gen_label_rtx (); /* Label when aligned to 3-byte */
6366 align_2_label
= gen_label_rtx (); /* Label when aligned to 2-byte */
6368 /* Leave just the 3 lower bits. */
6369 align_rtx
= expand_binop (SImode
, and_optab
, scratch
, GEN_INT (3),
6370 NULL_RTX
, 0, OPTAB_WIDEN
);
6372 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
6373 SImode
, 1, 0, align_4_label
);
6374 emit_cmp_and_jump_insns (align_rtx
, GEN_INT (2), EQ
, NULL
,
6375 SImode
, 1, 0, align_2_label
);
6376 emit_cmp_and_jump_insns (align_rtx
, GEN_INT (2), GTU
, NULL
,
6377 SImode
, 1, 0, align_3_label
);
6381 /* Since the alignment is 2, we have to check 2 or 0 bytes;
6382 check if is aligned to 4 - byte. */
6384 align_rtx
= expand_binop (SImode
, and_optab
, scratch
, GEN_INT (2),
6385 NULL_RTX
, 0, OPTAB_WIDEN
);
6387 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
6388 SImode
, 1, 0, align_4_label
);
6391 mem
= gen_rtx_MEM (QImode
, out
);
6393 /* Now compare the bytes. */
6395 /* Compare the first n unaligned byte on a byte per byte basis. */
6396 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
,
6397 QImode
, 1, 0, end_0_label
);
6399 /* Increment the address. */
6400 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
6402 /* Not needed with an alignment of 2 */
6405 emit_label (align_2_label
);
6407 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
,
6408 QImode
, 1, 0, end_0_label
);
6410 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
6412 emit_label (align_3_label
);
6415 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
,
6416 QImode
, 1, 0, end_0_label
);
6418 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
6421 /* Generate loop to check 4 bytes at a time. It is not a good idea to
6422 align this loop. It gives only huge programs, but does not help to
6424 emit_label (align_4_label
);
6426 mem
= gen_rtx_MEM (SImode
, out
);
6427 emit_move_insn (scratch
, mem
);
6428 emit_insn (gen_addsi3 (out
, out
, GEN_INT (4)));
6430 /* This formula yields a nonzero result iff one of the bytes is zero.
6431 This saves three branches inside loop and many cycles. */
6433 emit_insn (gen_addsi3 (tmpreg
, scratch
, GEN_INT (-0x01010101)));
6434 emit_insn (gen_one_cmplsi2 (scratch
, scratch
));
6435 emit_insn (gen_andsi3 (tmpreg
, tmpreg
, scratch
));
6436 emit_insn (gen_andsi3 (tmpreg
, tmpreg
, GEN_INT (0x80808080)));
6437 emit_cmp_and_jump_insns (tmpreg
, const0_rtx
, EQ
, 0,
6438 SImode
, 1, 0, align_4_label
);
6442 rtx reg
= gen_reg_rtx (SImode
);
6443 emit_move_insn (reg
, tmpreg
);
6444 emit_insn (gen_lshrsi3 (reg
, reg
, GEN_INT (16)));
6446 /* If zero is not in the first two bytes, move two bytes forward. */
6447 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
6448 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
6449 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
6450 emit_insn (gen_rtx_SET (VOIDmode
, tmpreg
,
6451 gen_rtx_IF_THEN_ELSE (SImode
, tmp
,
6454 /* Emit lea manually to avoid clobbering of flags. */
6455 emit_insn (gen_rtx_SET (SImode
, reg
,
6456 gen_rtx_PLUS (SImode
, out
, GEN_INT (2))));
6458 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
6459 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
6460 emit_insn (gen_rtx_SET (VOIDmode
, out
,
6461 gen_rtx_IF_THEN_ELSE (SImode
, tmp
,
6468 rtx end_2_label
= gen_label_rtx ();
6469 /* Is zero in the first two bytes? */
6471 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
6472 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
6473 tmp
= gen_rtx_NE (VOIDmode
, tmp
, const0_rtx
);
6474 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
6475 gen_rtx_LABEL_REF (VOIDmode
, end_2_label
),
6477 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
6478 JUMP_LABEL (tmp
) = end_2_label
;
6480 /* Not in the first two. Move two bytes forward. */
6481 emit_insn (gen_lshrsi3 (tmpreg
, tmpreg
, GEN_INT (16)));
6482 emit_insn (gen_addsi3 (out
, out
, GEN_INT (2)));
6484 emit_label (end_2_label
);
6488 /* Avoid branch in fixing the byte. */
6489 tmpreg
= gen_lowpart (QImode
, tmpreg
);
6490 emit_insn (gen_addqi3_cc (tmpreg
, tmpreg
, tmpreg
));
6491 emit_insn (gen_subsi3_carry (out
, out
, GEN_INT (3)));
6493 emit_label (end_0_label
);
6496 /* Clear stack slot assignments remembered from previous functions.
6497 This is called from INIT_EXPANDERS once before RTL is emitted for each
6501 ix86_init_machine_status (p
)
6504 p
->machine
= (struct machine_function
*)
6505 xcalloc (1, sizeof (struct machine_function
));
6508 /* Mark machine specific bits of P for GC. */
6510 ix86_mark_machine_status (p
)
6513 struct machine_function
*machine
= p
->machine
;
6514 enum machine_mode mode
;
6520 for (mode
= VOIDmode
; (int) mode
< (int) MAX_MACHINE_MODE
;
6521 mode
= (enum machine_mode
) ((int) mode
+ 1))
6522 for (n
= 0; n
< MAX_386_STACK_LOCALS
; n
++)
6523 ggc_mark_rtx (machine
->stack_locals
[(int) mode
][n
]);
6527 ix86_free_machine_status (p
)
6534 /* Return a MEM corresponding to a stack slot with mode MODE.
6535 Allocate a new slot if necessary.
6537 The RTL for a function can have several slots available: N is
6538 which slot to use. */
6541 assign_386_stack_local (mode
, n
)
6542 enum machine_mode mode
;
6545 if (n
< 0 || n
>= MAX_386_STACK_LOCALS
)
6548 if (ix86_stack_locals
[(int) mode
][n
] == NULL_RTX
)
6549 ix86_stack_locals
[(int) mode
][n
]
6550 = assign_stack_local (mode
, GET_MODE_SIZE (mode
), 0);
6552 return ix86_stack_locals
[(int) mode
][n
];
6555 /* Calculate the length of the memory address in the instruction
6556 encoding. Does not include the one-byte modrm, opcode, or prefix. */
6559 memory_address_length (addr
)
6562 struct ix86_address parts
;
6563 rtx base
, index
, disp
;
6566 if (GET_CODE (addr
) == PRE_DEC
6567 || GET_CODE (addr
) == POST_INC
)
6570 if (! ix86_decompose_address (addr
, &parts
))
6574 index
= parts
.index
;
6578 /* Register Indirect. */
6579 if (base
&& !index
&& !disp
)
6581 /* Special cases: ebp and esp need the two-byte modrm form. */
6582 if (addr
== stack_pointer_rtx
6583 || addr
== arg_pointer_rtx
6584 || addr
== frame_pointer_rtx
6585 || addr
== hard_frame_pointer_rtx
)
6589 /* Direct Addressing. */
6590 else if (disp
&& !base
&& !index
)
6595 /* Find the length of the displacement constant. */
6598 if (GET_CODE (disp
) == CONST_INT
6599 && CONST_OK_FOR_LETTER_P (INTVAL (disp
), 'K'))
6605 /* An index requires the two-byte modrm form. */
6613 /* Compute default value for "length_immediate" attribute. When SHORTFORM is set
6614 expect that insn have 8bit immediate alternative. */
6616 ix86_attr_length_immediate_default (insn
, shortform
)
6622 extract_insn_cached (insn
);
6623 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
6624 if (CONSTANT_P (recog_data
.operand
[i
]))
6629 && GET_CODE (recog_data
.operand
[i
]) == CONST_INT
6630 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data
.operand
[i
]), 'K'))
6634 switch (get_attr_mode (insn
))
6646 fatal_insn ("Unknown insn mode", insn
);
6652 /* Compute default value for "length_address" attribute. */
6654 ix86_attr_length_address_default (insn
)
6658 extract_insn_cached (insn
);
6659 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
6660 if (GET_CODE (recog_data
.operand
[i
]) == MEM
)
6662 return memory_address_length (XEXP (recog_data
.operand
[i
], 0));
6668 /* Return the maximum number of instructions a cpu can issue. */
6675 case PROCESSOR_PENTIUM
:
6679 case PROCESSOR_PENTIUMPRO
:
6687 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
6688 by DEP_INSN and nothing set by DEP_INSN. */
6691 ix86_flags_dependant (insn
, dep_insn
, insn_type
)
6693 enum attr_type insn_type
;
6697 /* Simplify the test for uninteresting insns. */
6698 if (insn_type
!= TYPE_SETCC
6699 && insn_type
!= TYPE_ICMOV
6700 && insn_type
!= TYPE_FCMOV
6701 && insn_type
!= TYPE_IBR
)
6704 if ((set
= single_set (dep_insn
)) != 0)
6706 set
= SET_DEST (set
);
6709 else if (GET_CODE (PATTERN (dep_insn
)) == PARALLEL
6710 && XVECLEN (PATTERN (dep_insn
), 0) == 2
6711 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 0)) == SET
6712 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 1)) == SET
)
6714 set
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
6715 set2
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
6720 if (GET_CODE (set
) != REG
|| REGNO (set
) != FLAGS_REG
)
6723 /* This test is true if the dependant insn reads the flags but
6724 not any other potentially set register. */
6725 if (!reg_overlap_mentioned_p (set
, PATTERN (insn
)))
6728 if (set2
&& reg_overlap_mentioned_p (set2
, PATTERN (insn
)))
6734 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
6735 address with operands set by DEP_INSN. */
6738 ix86_agi_dependant (insn
, dep_insn
, insn_type
)
6740 enum attr_type insn_type
;
6744 if (insn_type
== TYPE_LEA
)
6746 addr
= PATTERN (insn
);
6747 if (GET_CODE (addr
) == SET
)
6749 else if (GET_CODE (addr
) == PARALLEL
6750 && GET_CODE (XVECEXP (addr
, 0, 0)) == SET
)
6751 addr
= XVECEXP (addr
, 0, 0);
6754 addr
= SET_SRC (addr
);
6759 extract_insn_cached (insn
);
6760 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
6761 if (GET_CODE (recog_data
.operand
[i
]) == MEM
)
6763 addr
= XEXP (recog_data
.operand
[i
], 0);
6770 return modified_in_p (addr
, dep_insn
);
6774 ix86_adjust_cost (insn
, link
, dep_insn
, cost
)
6775 rtx insn
, link
, dep_insn
;
6778 enum attr_type insn_type
, dep_insn_type
;
6779 enum attr_memory memory
;
6781 int dep_insn_code_number
;
6783 /* Anti and output depenancies have zero cost on all CPUs. */
6784 if (REG_NOTE_KIND (link
) != 0)
6787 dep_insn_code_number
= recog_memoized (dep_insn
);
6789 /* If we can't recognize the insns, we can't really do anything. */
6790 if (dep_insn_code_number
< 0 || recog_memoized (insn
) < 0)
6793 insn_type
= get_attr_type (insn
);
6794 dep_insn_type
= get_attr_type (dep_insn
);
6796 /* Prologue and epilogue allocators can have a false dependency on ebp.
6797 This results in one cycle extra stall on Pentium prologue scheduling,
6798 so handle this important case manually. */
6799 if (dep_insn_code_number
== CODE_FOR_pro_epilogue_adjust_stack
6800 && dep_insn_type
== TYPE_ALU
6801 && !reg_mentioned_p (stack_pointer_rtx
, insn
))
6806 case PROCESSOR_PENTIUM
:
6807 /* Address Generation Interlock adds a cycle of latency. */
6808 if (ix86_agi_dependant (insn
, dep_insn
, insn_type
))
6811 /* ??? Compares pair with jump/setcc. */
6812 if (ix86_flags_dependant (insn
, dep_insn
, insn_type
))
6815 /* Floating point stores require value to be ready one cycle ealier. */
6816 if (insn_type
== TYPE_FMOV
6817 && get_attr_memory (insn
) == MEMORY_STORE
6818 && !ix86_agi_dependant (insn
, dep_insn
, insn_type
))
6822 case PROCESSOR_PENTIUMPRO
:
6823 /* Since we can't represent delayed latencies of load+operation,
6824 increase the cost here for non-imov insns. */
6825 if (dep_insn_type
!= TYPE_IMOV
6826 && dep_insn_type
!= TYPE_FMOV
6827 && ((memory
= get_attr_memory (dep_insn
) == MEMORY_LOAD
)
6828 || memory
== MEMORY_BOTH
))
6831 /* INT->FP conversion is expensive. */
6832 if (get_attr_fp_int_src (dep_insn
))
6835 /* There is one cycle extra latency between an FP op and a store. */
6836 if (insn_type
== TYPE_FMOV
6837 && (set
= single_set (dep_insn
)) != NULL_RTX
6838 && (set2
= single_set (insn
)) != NULL_RTX
6839 && rtx_equal_p (SET_DEST (set
), SET_SRC (set2
))
6840 && GET_CODE (SET_DEST (set2
)) == MEM
)
6845 /* The esp dependency is resolved before the instruction is really
6847 if ((insn_type
== TYPE_PUSH
|| insn_type
== TYPE_POP
)
6848 && (dep_insn_type
== TYPE_PUSH
|| dep_insn_type
== TYPE_POP
))
6851 /* Since we can't represent delayed latencies of load+operation,
6852 increase the cost here for non-imov insns. */
6853 if ((memory
= get_attr_memory (dep_insn
) == MEMORY_LOAD
)
6854 || memory
== MEMORY_BOTH
)
6855 cost
+= (dep_insn_type
!= TYPE_IMOV
) ? 2 : 1;
6857 /* INT->FP conversion is expensive. */
6858 if (get_attr_fp_int_src (dep_insn
))
6862 case PROCESSOR_ATHLON
:
6863 if ((memory
= get_attr_memory (dep_insn
)) == MEMORY_LOAD
6864 || memory
== MEMORY_BOTH
)
6866 if (dep_insn_type
== TYPE_IMOV
|| dep_insn_type
== TYPE_FMOV
)
6881 struct ppro_sched_data
6884 int issued_this_cycle
;
6889 ix86_safe_length (insn
)
6892 if (recog_memoized (insn
) >= 0)
6893 return get_attr_length(insn
);
6899 ix86_safe_length_prefix (insn
)
6902 if (recog_memoized (insn
) >= 0)
6903 return get_attr_length(insn
);
6908 static enum attr_memory
6909 ix86_safe_memory (insn
)
6912 if (recog_memoized (insn
) >= 0)
6913 return get_attr_memory(insn
);
6915 return MEMORY_UNKNOWN
;
6918 static enum attr_pent_pair
6919 ix86_safe_pent_pair (insn
)
6922 if (recog_memoized (insn
) >= 0)
6923 return get_attr_pent_pair(insn
);
6925 return PENT_PAIR_NP
;
6928 static enum attr_ppro_uops
6929 ix86_safe_ppro_uops (insn
)
6932 if (recog_memoized (insn
) >= 0)
6933 return get_attr_ppro_uops (insn
);
6935 return PPRO_UOPS_MANY
;
6939 ix86_dump_ppro_packet (dump
)
6942 if (ix86_sched_data
.ppro
.decode
[0])
6944 fprintf (dump
, "PPRO packet: %d",
6945 INSN_UID (ix86_sched_data
.ppro
.decode
[0]));
6946 if (ix86_sched_data
.ppro
.decode
[1])
6947 fprintf (dump
, " %d", INSN_UID (ix86_sched_data
.ppro
.decode
[1]));
6948 if (ix86_sched_data
.ppro
.decode
[2])
6949 fprintf (dump
, " %d", INSN_UID (ix86_sched_data
.ppro
.decode
[2]));
6954 /* We're beginning a new block. Initialize data structures as necessary. */
6957 ix86_sched_init (dump
, sched_verbose
)
6958 FILE *dump ATTRIBUTE_UNUSED
;
6959 int sched_verbose ATTRIBUTE_UNUSED
;
6961 memset (&ix86_sched_data
, 0, sizeof (ix86_sched_data
));
6964 /* Shift INSN to SLOT, and shift everything else down. */
6967 ix86_reorder_insn (insnp
, slot
)
6974 insnp
[0] = insnp
[1];
6975 while (++insnp
!= slot
);
6980 /* Find an instruction with given pairability and minimal amount of cycles
6981 lost by the fact that the CPU waits for both pipelines to finish before
6982 reading next instructions. Also take care that both instructions together
6983 can not exceed 7 bytes. */
6986 ix86_pent_find_pair (e_ready
, ready
, type
, first
)
6989 enum attr_pent_pair type
;
6992 int mincycles
, cycles
;
6993 enum attr_pent_pair tmp
;
6994 enum attr_memory memory
;
6995 rtx
*insnp
, *bestinsnp
= NULL
;
6997 if (ix86_safe_length (first
) > 7 + ix86_safe_length_prefix (first
))
7000 memory
= ix86_safe_memory (first
);
7001 cycles
= result_ready_cost (first
);
7002 mincycles
= INT_MAX
;
7004 for (insnp
= e_ready
; insnp
>= ready
&& mincycles
; --insnp
)
7005 if ((tmp
= ix86_safe_pent_pair (*insnp
)) == type
7006 && ix86_safe_length (*insnp
) <= 7 + ix86_safe_length_prefix (*insnp
))
7008 enum attr_memory second_memory
;
7009 int secondcycles
, currentcycles
;
7011 second_memory
= ix86_safe_memory (*insnp
);
7012 secondcycles
= result_ready_cost (*insnp
);
7013 currentcycles
= abs (cycles
- secondcycles
);
7015 if (secondcycles
>= 1 && cycles
>= 1)
7017 /* Two read/modify/write instructions together takes two
7019 if (memory
== MEMORY_BOTH
&& second_memory
== MEMORY_BOTH
)
7022 /* Read modify/write instruction followed by read/modify
7023 takes one cycle longer. */
7024 if (memory
== MEMORY_BOTH
&& second_memory
== MEMORY_LOAD
7025 && tmp
!= PENT_PAIR_UV
7026 && ix86_safe_pent_pair (first
) != PENT_PAIR_UV
)
7029 if (currentcycles
< mincycles
)
7030 bestinsnp
= insnp
, mincycles
= currentcycles
;
7036 /* Subroutines of ix86_sched_reorder. */
7039 ix86_sched_reorder_pentium (ready
, e_ready
)
7043 enum attr_pent_pair pair1
, pair2
;
7046 /* This wouldn't be necessary if Haifa knew that static insn ordering
7047 is important to which pipe an insn is issued to. So we have to make
7048 some minor rearrangements. */
7050 pair1
= ix86_safe_pent_pair (*e_ready
);
7052 /* If the first insn is non-pairable, let it be. */
7053 if (pair1
== PENT_PAIR_NP
)
7056 pair2
= PENT_PAIR_NP
;
7059 /* If the first insn is UV or PV pairable, search for a PU
7061 if (pair1
== PENT_PAIR_UV
|| pair1
== PENT_PAIR_PV
)
7063 insnp
= ix86_pent_find_pair (e_ready
-1, ready
,
7064 PENT_PAIR_PU
, *e_ready
);
7066 pair2
= PENT_PAIR_PU
;
7069 /* If the first insn is PU or UV pairable, search for a PV
7071 if (pair2
== PENT_PAIR_NP
7072 && (pair1
== PENT_PAIR_PU
|| pair1
== PENT_PAIR_UV
))
7074 insnp
= ix86_pent_find_pair (e_ready
-1, ready
,
7075 PENT_PAIR_PV
, *e_ready
);
7077 pair2
= PENT_PAIR_PV
;
7080 /* If the first insn is pairable, search for a UV
7082 if (pair2
== PENT_PAIR_NP
)
7084 insnp
= ix86_pent_find_pair (e_ready
-1, ready
,
7085 PENT_PAIR_UV
, *e_ready
);
7087 pair2
= PENT_PAIR_UV
;
7090 if (pair2
== PENT_PAIR_NP
)
7093 /* Found something! Decide if we need to swap the order. */
7094 if (pair1
== PENT_PAIR_PV
|| pair2
== PENT_PAIR_PU
7095 || (pair1
== PENT_PAIR_UV
&& pair2
== PENT_PAIR_UV
7096 && ix86_safe_memory (*e_ready
) == MEMORY_BOTH
7097 && ix86_safe_memory (*insnp
) == MEMORY_LOAD
))
7098 ix86_reorder_insn (insnp
, e_ready
);
7100 ix86_reorder_insn (insnp
, e_ready
- 1);
7104 ix86_sched_reorder_ppro (ready
, e_ready
)
7109 enum attr_ppro_uops cur_uops
;
7110 int issued_this_cycle
;
7114 /* At this point .ppro.decode contains the state of the three
7115 decoders from last "cycle". That is, those insns that were
7116 actually independent. But here we're scheduling for the
7117 decoder, and we may find things that are decodable in the
7120 memcpy (decode
, ix86_sched_data
.ppro
.decode
, sizeof (decode
));
7121 issued_this_cycle
= 0;
7124 cur_uops
= ix86_safe_ppro_uops (*insnp
);
7126 /* If the decoders are empty, and we've a complex insn at the
7127 head of the priority queue, let it issue without complaint. */
7128 if (decode
[0] == NULL
)
7130 if (cur_uops
== PPRO_UOPS_MANY
)
7136 /* Otherwise, search for a 2-4 uop unsn to issue. */
7137 while (cur_uops
!= PPRO_UOPS_FEW
)
7141 cur_uops
= ix86_safe_ppro_uops (*--insnp
);
7144 /* If so, move it to the head of the line. */
7145 if (cur_uops
== PPRO_UOPS_FEW
)
7146 ix86_reorder_insn (insnp
, e_ready
);
7148 /* Issue the head of the queue. */
7149 issued_this_cycle
= 1;
7150 decode
[0] = *e_ready
--;
7153 /* Look for simple insns to fill in the other two slots. */
7154 for (i
= 1; i
< 3; ++i
)
7155 if (decode
[i
] == NULL
)
7157 if (ready
>= e_ready
)
7161 cur_uops
= ix86_safe_ppro_uops (*insnp
);
7162 while (cur_uops
!= PPRO_UOPS_ONE
)
7166 cur_uops
= ix86_safe_ppro_uops (*--insnp
);
7169 /* Found one. Move it to the head of the queue and issue it. */
7170 if (cur_uops
== PPRO_UOPS_ONE
)
7172 ix86_reorder_insn (insnp
, e_ready
);
7173 decode
[i
] = *e_ready
--;
7174 issued_this_cycle
++;
7178 /* ??? Didn't find one. Ideally, here we would do a lazy split
7179 of 2-uop insns, issue one and queue the other. */
7183 if (issued_this_cycle
== 0)
7184 issued_this_cycle
= 1;
7185 ix86_sched_data
.ppro
.issued_this_cycle
= issued_this_cycle
;
7188 /* We are about to being issuing insns for this clock cycle.
7189 Override the default sort algorithm to better slot instructions. */
7191 ix86_sched_reorder (dump
, sched_verbose
, ready
, n_ready
, clock_var
)
7192 FILE *dump ATTRIBUTE_UNUSED
;
7193 int sched_verbose ATTRIBUTE_UNUSED
;
7196 int clock_var ATTRIBUTE_UNUSED
;
7198 rtx
*e_ready
= ready
+ n_ready
- 1;
7208 case PROCESSOR_PENTIUM
:
7209 ix86_sched_reorder_pentium (ready
, e_ready
);
7212 case PROCESSOR_PENTIUMPRO
:
7213 ix86_sched_reorder_ppro (ready
, e_ready
);
7218 return ix86_issue_rate ();
7221 /* We are about to issue INSN. Return the number of insns left on the
7222 ready queue that can be issued this cycle. */
7225 ix86_variable_issue (dump
, sched_verbose
, insn
, can_issue_more
)
7235 return can_issue_more
- 1;
7237 case PROCESSOR_PENTIUMPRO
:
7239 enum attr_ppro_uops uops
= ix86_safe_ppro_uops (insn
);
7241 if (uops
== PPRO_UOPS_MANY
)
7244 ix86_dump_ppro_packet (dump
);
7245 ix86_sched_data
.ppro
.decode
[0] = insn
;
7246 ix86_sched_data
.ppro
.decode
[1] = NULL
;
7247 ix86_sched_data
.ppro
.decode
[2] = NULL
;
7249 ix86_dump_ppro_packet (dump
);
7250 ix86_sched_data
.ppro
.decode
[0] = NULL
;
7252 else if (uops
== PPRO_UOPS_FEW
)
7255 ix86_dump_ppro_packet (dump
);
7256 ix86_sched_data
.ppro
.decode
[0] = insn
;
7257 ix86_sched_data
.ppro
.decode
[1] = NULL
;
7258 ix86_sched_data
.ppro
.decode
[2] = NULL
;
7262 for (i
= 0; i
< 3; ++i
)
7263 if (ix86_sched_data
.ppro
.decode
[i
] == NULL
)
7265 ix86_sched_data
.ppro
.decode
[i
] = insn
;
7273 ix86_dump_ppro_packet (dump
);
7274 ix86_sched_data
.ppro
.decode
[0] = NULL
;
7275 ix86_sched_data
.ppro
.decode
[1] = NULL
;
7276 ix86_sched_data
.ppro
.decode
[2] = NULL
;
7280 return --ix86_sched_data
.ppro
.issued_this_cycle
;
7284 /* Compute the alignment given to a constant that is being placed in memory.
7285 EXP is the constant and ALIGN is the alignment that the object would
7287 The value of this function is used instead of that alignment to align
7291 ix86_constant_alignment (exp
, align
)
7295 if (TREE_CODE (exp
) == REAL_CST
)
7297 if (TYPE_MODE (TREE_TYPE (exp
)) == DFmode
&& align
< 64)
7299 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp
))) && align
< 128)
7302 else if (TREE_CODE (exp
) == STRING_CST
&& TREE_STRING_LENGTH (exp
) >= 31
7309 /* Compute the alignment for a static variable.
7310 TYPE is the data type, and ALIGN is the alignment that
7311 the object would ordinarily have. The value of this function is used
7312 instead of that alignment to align the object. */
7315 ix86_data_alignment (type
, align
)
7319 if (AGGREGATE_TYPE_P (type
)
7321 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
7322 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 256
7323 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 256)
7326 if (TREE_CODE (type
) == ARRAY_TYPE
)
7328 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
7330 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
7333 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
7336 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
7338 if (TYPE_MODE (type
) == XCmode
&& align
< 128)
7341 else if ((TREE_CODE (type
) == RECORD_TYPE
7342 || TREE_CODE (type
) == UNION_TYPE
7343 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
7344 && TYPE_FIELDS (type
))
7346 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
7348 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
7351 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
7352 || TREE_CODE (type
) == INTEGER_TYPE
)
7354 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
7356 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
7363 /* Compute the alignment for a local variable.
7364 TYPE is the data type, and ALIGN is the alignment that
7365 the object would ordinarily have. The value of this macro is used
7366 instead of that alignment to align the object. */
7369 ix86_local_alignment (type
, align
)
7373 if (TREE_CODE (type
) == ARRAY_TYPE
)
7375 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
7377 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
7380 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
7382 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
7384 if (TYPE_MODE (type
) == XCmode
&& align
< 128)
7387 else if ((TREE_CODE (type
) == RECORD_TYPE
7388 || TREE_CODE (type
) == UNION_TYPE
7389 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
7390 && TYPE_FIELDS (type
))
7392 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
7394 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
7397 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
7398 || TREE_CODE (type
) == INTEGER_TYPE
)
7401 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
7403 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
7409 #define def_builtin(NAME, TYPE, CODE) \
7410 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, NULL_PTR)
7411 struct builtin_description
7413 enum insn_code icode
;
7415 enum ix86_builtins code
;
7416 enum rtx_code comparison
;
7420 static struct builtin_description bdesc_comi
[] =
7422 { CODE_FOR_sse_comi
, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS
, EQ
, 0 },
7423 { CODE_FOR_sse_comi
, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS
, LT
, 0 },
7424 { CODE_FOR_sse_comi
, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS
, LE
, 0 },
7425 { CODE_FOR_sse_comi
, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS
, LT
, 1 },
7426 { CODE_FOR_sse_comi
, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS
, LE
, 1 },
7427 { CODE_FOR_sse_comi
, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS
, NE
, 0 },
7428 { CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS
, EQ
, 0 },
7429 { CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS
, LT
, 0 },
7430 { CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS
, LE
, 0 },
7431 { CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS
, LT
, 1 },
7432 { CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS
, LE
, 1 },
7433 { CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS
, NE
, 0 }
7436 static struct builtin_description bdesc_2arg
[] =
7439 { CODE_FOR_addv4sf3
, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS
, 0, 0 },
7440 { CODE_FOR_subv4sf3
, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS
, 0, 0 },
7441 { CODE_FOR_mulv4sf3
, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS
, 0, 0 },
7442 { CODE_FOR_divv4sf3
, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS
, 0, 0 },
7443 { CODE_FOR_vmaddv4sf3
, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS
, 0, 0 },
7444 { CODE_FOR_vmsubv4sf3
, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS
, 0, 0 },
7445 { CODE_FOR_vmmulv4sf3
, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS
, 0, 0 },
7446 { CODE_FOR_vmdivv4sf3
, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS
, 0, 0 },
7448 { CODE_FOR_maskcmpv4sf3
, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS
, EQ
, 0 },
7449 { CODE_FOR_maskcmpv4sf3
, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS
, LT
, 0 },
7450 { CODE_FOR_maskcmpv4sf3
, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS
, LE
, 0 },
7451 { CODE_FOR_maskcmpv4sf3
, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS
, LT
, 1 },
7452 { CODE_FOR_maskcmpv4sf3
, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS
, LE
, 1 },
7453 { CODE_FOR_maskcmpv4sf3
, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS
, UNORDERED
, 0 },
7454 { CODE_FOR_maskncmpv4sf3
, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS
, EQ
, 0 },
7455 { CODE_FOR_maskncmpv4sf3
, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS
, LT
, 0 },
7456 { CODE_FOR_maskncmpv4sf3
, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS
, LE
, 0 },
7457 { CODE_FOR_maskncmpv4sf3
, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS
, LT
, 1 },
7458 { CODE_FOR_maskncmpv4sf3
, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS
, LE
, 1 },
7459 { CODE_FOR_maskncmpv4sf3
, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS
, UNORDERED
, 0 },
7460 { CODE_FOR_vmmaskcmpv4sf3
, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS
, EQ
, 0 },
7461 { CODE_FOR_vmmaskcmpv4sf3
, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS
, LT
, 0 },
7462 { CODE_FOR_vmmaskcmpv4sf3
, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS
, LE
, 0 },
7463 { CODE_FOR_vmmaskcmpv4sf3
, "__builtin_ia32_cmpgtss", IX86_BUILTIN_CMPGTSS
, LT
, 1 },
7464 { CODE_FOR_vmmaskcmpv4sf3
, "__builtin_ia32_cmpgess", IX86_BUILTIN_CMPGESS
, LE
, 1 },
7465 { CODE_FOR_vmmaskcmpv4sf3
, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS
, UNORDERED
, 0 },
7466 { CODE_FOR_vmmaskncmpv4sf3
, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS
, EQ
, 0 },
7467 { CODE_FOR_vmmaskncmpv4sf3
, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS
, LT
, 0 },
7468 { CODE_FOR_vmmaskncmpv4sf3
, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS
, LE
, 0 },
7469 { CODE_FOR_vmmaskncmpv4sf3
, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS
, LT
, 1 },
7470 { CODE_FOR_vmmaskncmpv4sf3
, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS
, LE
, 1 },
7471 { CODE_FOR_vmmaskncmpv4sf3
, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS
, UNORDERED
, 0 },
7473 { CODE_FOR_sminv4sf3
, "__builtin_ia32_minps", IX86_BUILTIN_MINPS
, 0, 0 },
7474 { CODE_FOR_smaxv4sf3
, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS
, 0, 0 },
7475 { CODE_FOR_vmsminv4sf3
, "__builtin_ia32_minss", IX86_BUILTIN_MINSS
, 0, 0 },
7476 { CODE_FOR_vmsmaxv4sf3
, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS
, 0, 0 },
7478 { CODE_FOR_sse_andti3
, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS
, 0, 0 },
7479 { CODE_FOR_sse_nandti3
, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS
, 0, 0 },
7480 { CODE_FOR_sse_iorti3
, "__builtin_ia32_orps", IX86_BUILTIN_ORPS
, 0, 0 },
7481 { CODE_FOR_sse_xorti3
, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS
, 0, 0 },
7483 { CODE_FOR_sse_movss
, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS
, 0, 0 },
7484 { CODE_FOR_sse_movhlps
, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS
, 0, 0 },
7485 { CODE_FOR_sse_movlhps
, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS
, 0, 0 },
7486 { CODE_FOR_sse_unpckhps
, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS
, 0, 0 },
7487 { CODE_FOR_sse_unpcklps
, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS
, 0, 0 },
7490 { CODE_FOR_addv8qi3
, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB
, 0, 0 },
7491 { CODE_FOR_addv4hi3
, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW
, 0, 0 },
7492 { CODE_FOR_addv2si3
, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD
, 0, 0 },
7493 { CODE_FOR_subv8qi3
, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB
, 0, 0 },
7494 { CODE_FOR_subv4hi3
, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW
, 0, 0 },
7495 { CODE_FOR_subv2si3
, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD
, 0, 0 },
7497 { CODE_FOR_ssaddv8qi3
, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB
, 0, 0 },
7498 { CODE_FOR_ssaddv4hi3
, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW
, 0, 0 },
7499 { CODE_FOR_sssubv8qi3
, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB
, 0, 0 },
7500 { CODE_FOR_sssubv4hi3
, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW
, 0, 0 },
7501 { CODE_FOR_usaddv8qi3
, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB
, 0, 0 },
7502 { CODE_FOR_usaddv4hi3
, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW
, 0, 0 },
7503 { CODE_FOR_ussubv8qi3
, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB
, 0, 0 },
7504 { CODE_FOR_ussubv4hi3
, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW
, 0, 0 },
7506 { CODE_FOR_mulv4hi3
, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW
, 0, 0 },
7507 { CODE_FOR_smulv4hi3_highpart
, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW
, 0, 0 },
7508 { CODE_FOR_umulv4hi3_highpart
, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW
, 0, 0 },
7510 { CODE_FOR_mmx_anddi3
, "__builtin_ia32_pand", IX86_BUILTIN_PAND
, 0, 0 },
7511 { CODE_FOR_mmx_nanddi3
, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN
, 0, 0 },
7512 { CODE_FOR_mmx_iordi3
, "__builtin_ia32_por", IX86_BUILTIN_POR
, 0, 0 },
7513 { CODE_FOR_mmx_xordi3
, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR
, 0, 0 },
7515 { CODE_FOR_mmx_uavgv8qi3
, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB
, 0, 0 },
7516 { CODE_FOR_mmx_uavgv4hi3
, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW
, 0, 0 },
7518 { CODE_FOR_eqv8qi3
, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB
, 0, 0 },
7519 { CODE_FOR_eqv4hi3
, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW
, 0, 0 },
7520 { CODE_FOR_eqv2si3
, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD
, 0, 0 },
7521 { CODE_FOR_gtv8qi3
, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB
, 0, 0 },
7522 { CODE_FOR_gtv4hi3
, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW
, 0, 0 },
7523 { CODE_FOR_gtv2si3
, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD
, 0, 0 },
7525 { CODE_FOR_umaxv8qi3
, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB
, 0, 0 },
7526 { CODE_FOR_smaxv4hi3
, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW
, 0, 0 },
7527 { CODE_FOR_uminv8qi3
, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB
, 0, 0 },
7528 { CODE_FOR_sminv4hi3
, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW
, 0, 0 },
7530 { CODE_FOR_mmx_punpckhbw
, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW
, 0, 0 },
7531 { CODE_FOR_mmx_punpckhwd
, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD
, 0, 0 },
7532 { CODE_FOR_mmx_punpckhdq
, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ
, 0, 0 },
7533 { CODE_FOR_mmx_punpcklbw
, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW
, 0, 0 },
7534 { CODE_FOR_mmx_punpcklwd
, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD
, 0, 0 },
7535 { CODE_FOR_mmx_punpckldq
, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ
, 0, 0 },
7538 { CODE_FOR_mmx_packsswb
, 0, IX86_BUILTIN_PACKSSWB
, 0, 0 },
7539 { CODE_FOR_mmx_packssdw
, 0, IX86_BUILTIN_PACKSSDW
, 0, 0 },
7540 { CODE_FOR_mmx_packuswb
, 0, IX86_BUILTIN_PACKUSWB
, 0, 0 },
7542 { CODE_FOR_cvtpi2ps
, 0, IX86_BUILTIN_CVTPI2PS
, 0, 0 },
7543 { CODE_FOR_cvtsi2ss
, 0, IX86_BUILTIN_CVTSI2SS
, 0, 0 },
7545 { CODE_FOR_ashlv4hi3
, 0, IX86_BUILTIN_PSLLW
, 0, 0 },
7546 { CODE_FOR_ashlv4hi3
, 0, IX86_BUILTIN_PSLLWI
, 0, 0 },
7547 { CODE_FOR_ashlv2si3
, 0, IX86_BUILTIN_PSLLD
, 0, 0 },
7548 { CODE_FOR_ashlv2si3
, 0, IX86_BUILTIN_PSLLDI
, 0, 0 },
7549 { CODE_FOR_mmx_ashldi3
, 0, IX86_BUILTIN_PSLLQ
, 0, 0 },
7550 { CODE_FOR_mmx_ashldi3
, 0, IX86_BUILTIN_PSLLQI
, 0, 0 },
7552 { CODE_FOR_lshrv4hi3
, 0, IX86_BUILTIN_PSRLW
, 0, 0 },
7553 { CODE_FOR_lshrv4hi3
, 0, IX86_BUILTIN_PSRLWI
, 0, 0 },
7554 { CODE_FOR_lshrv2si3
, 0, IX86_BUILTIN_PSRLD
, 0, 0 },
7555 { CODE_FOR_lshrv2si3
, 0, IX86_BUILTIN_PSRLDI
, 0, 0 },
7556 { CODE_FOR_mmx_lshrdi3
, 0, IX86_BUILTIN_PSRLQ
, 0, 0 },
7557 { CODE_FOR_mmx_lshrdi3
, 0, IX86_BUILTIN_PSRLQI
, 0, 0 },
7559 { CODE_FOR_ashrv4hi3
, 0, IX86_BUILTIN_PSRAW
, 0, 0 },
7560 { CODE_FOR_ashrv4hi3
, 0, IX86_BUILTIN_PSRAWI
, 0, 0 },
7561 { CODE_FOR_ashrv2si3
, 0, IX86_BUILTIN_PSRAD
, 0, 0 },
7562 { CODE_FOR_ashrv2si3
, 0, IX86_BUILTIN_PSRADI
, 0, 0 },
7564 { CODE_FOR_mmx_psadbw
, 0, IX86_BUILTIN_PSADBW
, 0, 0 },
7565 { CODE_FOR_mmx_pmaddwd
, 0, IX86_BUILTIN_PMADDWD
, 0, 0 }
7569 static struct builtin_description bdesc_1arg
[] =
7571 { CODE_FOR_mmx_pmovmskb
, 0, IX86_BUILTIN_PMOVMSKB
, 0, 0 },
7572 { CODE_FOR_sse_movmskps
, 0, IX86_BUILTIN_MOVMSKPS
, 0, 0 },
7574 { CODE_FOR_sqrtv4sf2
, 0, IX86_BUILTIN_SQRTPS
, 0, 0 },
7575 { CODE_FOR_rsqrtv4sf2
, 0, IX86_BUILTIN_RSQRTPS
, 0, 0 },
7576 { CODE_FOR_rcpv4sf2
, 0, IX86_BUILTIN_RCPPS
, 0, 0 },
7578 { CODE_FOR_cvtps2pi
, 0, IX86_BUILTIN_CVTPS2PI
, 0, 0 },
7579 { CODE_FOR_cvtss2si
, 0, IX86_BUILTIN_CVTSS2SI
, 0, 0 },
7580 { CODE_FOR_cvttps2pi
, 0, IX86_BUILTIN_CVTTPS2PI
, 0, 0 },
7581 { CODE_FOR_cvttss2si
, 0, IX86_BUILTIN_CVTTSS2SI
, 0, 0 }
7585 /* Expand all the target specific builtins. This is not called if TARGET_MMX
7586 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
7589 ix86_init_builtins ()
7591 struct builtin_description
* d
;
7593 tree endlink
= void_list_node
;
7595 tree pchar_type_node
= build_pointer_type (char_type_node
);
7596 tree pfloat_type_node
= build_pointer_type (float_type_node
);
7597 tree pv2si_type_node
= build_pointer_type (V2SI_type_node
);
7598 tree pdi_type_node
= build_pointer_type (long_long_unsigned_type_node
);
7601 tree int_ftype_v4sf_v4sf
7602 = build_function_type (integer_type_node
,
7603 tree_cons (NULL_TREE
, V4SF_type_node
,
7604 tree_cons (NULL_TREE
,
7607 tree v4si_ftype_v4sf_v4sf
7608 = build_function_type (V4SI_type_node
,
7609 tree_cons (NULL_TREE
, V4SF_type_node
,
7610 tree_cons (NULL_TREE
,
7613 /* MMX/SSE/integer conversions. */
7614 tree int_ftype_v4sf_int
7615 = build_function_type (integer_type_node
,
7616 tree_cons (NULL_TREE
, V4SF_type_node
,
7617 tree_cons (NULL_TREE
,
7621 = build_function_type (integer_type_node
,
7622 tree_cons (NULL_TREE
, V4SF_type_node
,
7625 = build_function_type (integer_type_node
,
7626 tree_cons (NULL_TREE
, V8QI_type_node
,
7629 = build_function_type (integer_type_node
,
7630 tree_cons (NULL_TREE
, V2SI_type_node
,
7633 = build_function_type (V2SI_type_node
,
7634 tree_cons (NULL_TREE
, integer_type_node
,
7636 tree v4sf_ftype_v4sf_int
7637 = build_function_type (integer_type_node
,
7638 tree_cons (NULL_TREE
, V4SF_type_node
,
7639 tree_cons (NULL_TREE
, integer_type_node
,
7641 tree v4sf_ftype_v4sf_v2si
7642 = build_function_type (V4SF_type_node
,
7643 tree_cons (NULL_TREE
, V4SF_type_node
,
7644 tree_cons (NULL_TREE
, V2SI_type_node
,
7646 tree int_ftype_v4hi_int
7647 = build_function_type (integer_type_node
,
7648 tree_cons (NULL_TREE
, V4HI_type_node
,
7649 tree_cons (NULL_TREE
, integer_type_node
,
7651 tree v4hi_ftype_v4hi_int_int
7652 = build_function_type (V4HI_type_node
,
7653 tree_cons (NULL_TREE
, V4HI_type_node
,
7654 tree_cons (NULL_TREE
, integer_type_node
,
7655 tree_cons (NULL_TREE
,
7658 /* Miscellaneous. */
7659 tree v8qi_ftype_v4hi_v4hi
7660 = build_function_type (V8QI_type_node
,
7661 tree_cons (NULL_TREE
, V4HI_type_node
,
7662 tree_cons (NULL_TREE
, V4HI_type_node
,
7664 tree v4hi_ftype_v2si_v2si
7665 = build_function_type (V4HI_type_node
,
7666 tree_cons (NULL_TREE
, V2SI_type_node
,
7667 tree_cons (NULL_TREE
, V2SI_type_node
,
7669 tree v4sf_ftype_v4sf_v4sf_int
7670 = build_function_type (V4SF_type_node
,
7671 tree_cons (NULL_TREE
, V4SF_type_node
,
7672 tree_cons (NULL_TREE
, V4SF_type_node
,
7673 tree_cons (NULL_TREE
,
7676 tree v4hi_ftype_v8qi_v8qi
7677 = build_function_type (V4HI_type_node
,
7678 tree_cons (NULL_TREE
, V8QI_type_node
,
7679 tree_cons (NULL_TREE
, V8QI_type_node
,
7681 tree v2si_ftype_v4hi_v4hi
7682 = build_function_type (V2SI_type_node
,
7683 tree_cons (NULL_TREE
, V4HI_type_node
,
7684 tree_cons (NULL_TREE
, V4HI_type_node
,
7686 tree v4hi_ftype_v4hi_int
7687 = build_function_type (V4HI_type_node
,
7688 tree_cons (NULL_TREE
, V4HI_type_node
,
7689 tree_cons (NULL_TREE
, integer_type_node
,
7691 tree di_ftype_di_int
7692 = build_function_type (long_long_unsigned_type_node
,
7693 tree_cons (NULL_TREE
, long_long_unsigned_type_node
,
7694 tree_cons (NULL_TREE
, integer_type_node
,
7696 tree v8qi_ftype_v8qi_di
7697 = build_function_type (V8QI_type_node
,
7698 tree_cons (NULL_TREE
, V8QI_type_node
,
7699 tree_cons (NULL_TREE
,
7700 long_long_integer_type_node
,
7702 tree v4hi_ftype_v4hi_di
7703 = build_function_type (V4HI_type_node
,
7704 tree_cons (NULL_TREE
, V4HI_type_node
,
7705 tree_cons (NULL_TREE
,
7706 long_long_integer_type_node
,
7708 tree v2si_ftype_v2si_di
7709 = build_function_type (V2SI_type_node
,
7710 tree_cons (NULL_TREE
, V2SI_type_node
,
7711 tree_cons (NULL_TREE
,
7712 long_long_integer_type_node
,
7714 tree void_ftype_void
7715 = build_function_type (void_type_node
, endlink
);
7716 tree void_ftype_pchar_int
7717 = build_function_type (void_type_node
,
7718 tree_cons (NULL_TREE
, pchar_type_node
,
7719 tree_cons (NULL_TREE
, integer_type_node
,
7721 tree void_ftype_unsigned
7722 = build_function_type (void_type_node
,
7723 tree_cons (NULL_TREE
, unsigned_type_node
,
7725 tree unsigned_ftype_void
7726 = build_function_type (unsigned_type_node
, endlink
);
7728 = build_function_type (long_long_unsigned_type_node
, endlink
);
7730 = build_function_type (intTI_type_node
, endlink
);
7731 tree v2si_ftype_v4sf
7732 = build_function_type (V2SI_type_node
,
7733 tree_cons (NULL_TREE
, V4SF_type_node
,
7736 tree maskmovq_args
= tree_cons (NULL_TREE
, V8QI_type_node
,
7737 tree_cons (NULL_TREE
, V8QI_type_node
,
7738 tree_cons (NULL_TREE
,
7741 tree void_ftype_v8qi_v8qi_pchar
7742 = build_function_type (void_type_node
, maskmovq_args
);
7743 tree v4sf_ftype_pfloat
7744 = build_function_type (V4SF_type_node
,
7745 tree_cons (NULL_TREE
, pfloat_type_node
,
7747 tree v4sf_ftype_float
7748 = build_function_type (V4SF_type_node
,
7749 tree_cons (NULL_TREE
, float_type_node
,
7751 tree v4sf_ftype_float_float_float_float
7752 = build_function_type (V4SF_type_node
,
7753 tree_cons (NULL_TREE
, float_type_node
,
7754 tree_cons (NULL_TREE
, float_type_node
,
7755 tree_cons (NULL_TREE
,
7757 tree_cons (NULL_TREE
,
7760 /* @@@ the type is bogus */
7761 tree v4sf_ftype_v4sf_pv2si
7762 = build_function_type (V4SF_type_node
,
7763 tree_cons (NULL_TREE
, V4SF_type_node
,
7764 tree_cons (NULL_TREE
, pv2si_type_node
,
7766 tree v4sf_ftype_pv2si_v4sf
7767 = build_function_type (V4SF_type_node
,
7768 tree_cons (NULL_TREE
, V4SF_type_node
,
7769 tree_cons (NULL_TREE
, pv2si_type_node
,
7771 tree void_ftype_pfloat_v4sf
7772 = build_function_type (void_type_node
,
7773 tree_cons (NULL_TREE
, pfloat_type_node
,
7774 tree_cons (NULL_TREE
, V4SF_type_node
,
7776 tree void_ftype_pdi_di
7777 = build_function_type (void_type_node
,
7778 tree_cons (NULL_TREE
, pdi_type_node
,
7779 tree_cons (NULL_TREE
,
7780 long_long_unsigned_type_node
,
7782 /* Normal vector unops. */
7783 tree v4sf_ftype_v4sf
7784 = build_function_type (V4SF_type_node
,
7785 tree_cons (NULL_TREE
, V4SF_type_node
,
7788 /* Normal vector binops. */
7789 tree v4sf_ftype_v4sf_v4sf
7790 = build_function_type (V4SF_type_node
,
7791 tree_cons (NULL_TREE
, V4SF_type_node
,
7792 tree_cons (NULL_TREE
, V4SF_type_node
,
7794 tree v8qi_ftype_v8qi_v8qi
7795 = build_function_type (V8QI_type_node
,
7796 tree_cons (NULL_TREE
, V8QI_type_node
,
7797 tree_cons (NULL_TREE
, V8QI_type_node
,
7799 tree v4hi_ftype_v4hi_v4hi
7800 = build_function_type (V4HI_type_node
,
7801 tree_cons (NULL_TREE
, V4HI_type_node
,
7802 tree_cons (NULL_TREE
, V4HI_type_node
,
7804 tree v2si_ftype_v2si_v2si
7805 = build_function_type (V2SI_type_node
,
7806 tree_cons (NULL_TREE
, V2SI_type_node
,
7807 tree_cons (NULL_TREE
, V2SI_type_node
,
7810 = build_function_type (intTI_type_node
,
7811 tree_cons (NULL_TREE
, intTI_type_node
,
7812 tree_cons (NULL_TREE
, intTI_type_node
,
7815 = build_function_type (long_long_unsigned_type_node
,
7816 tree_cons (NULL_TREE
, long_long_unsigned_type_node
,
7817 tree_cons (NULL_TREE
,
7818 long_long_unsigned_type_node
,
7821 /* Add all builtins that are more or less simple operations on two
7823 for (i
= 0, d
= bdesc_2arg
; i
< sizeof (bdesc_2arg
) / sizeof *d
; i
++, d
++)
7825 /* Use one of the operands; the target can have a different mode for
7826 mask-generating compares. */
7827 enum machine_mode mode
;
7832 mode
= insn_data
[d
->icode
].operand
[1].mode
;
7834 if (! TARGET_SSE
&& ! VALID_MMX_REG_MODE (mode
))
7840 type
= v4sf_ftype_v4sf_v4sf
;
7843 type
= v8qi_ftype_v8qi_v8qi
;
7846 type
= v4hi_ftype_v4hi_v4hi
;
7849 type
= v2si_ftype_v2si_v2si
;
7852 type
= ti_ftype_ti_ti
;
7855 type
= di_ftype_di_di
;
7862 /* Override for comparisons. */
7863 if (d
->icode
== CODE_FOR_maskcmpv4sf3
7864 || d
->icode
== CODE_FOR_maskncmpv4sf3
7865 || d
->icode
== CODE_FOR_vmmaskcmpv4sf3
7866 || d
->icode
== CODE_FOR_vmmaskncmpv4sf3
)
7867 type
= v4si_ftype_v4sf_v4sf
;
7869 def_builtin (d
->name
, type
, d
->code
);
7872 /* Add the remaining MMX insns with somewhat more complicated types. */
7873 def_builtin ("__builtin_ia32_m_from_int", v2si_ftype_int
, IX86_BUILTIN_M_FROM_INT
);
7874 def_builtin ("__builtin_ia32_m_to_int", int_ftype_v2si
, IX86_BUILTIN_M_TO_INT
);
7875 def_builtin ("__builtin_ia32_mmx_zero", di_ftype_void
, IX86_BUILTIN_MMX_ZERO
);
7876 def_builtin ("__builtin_ia32_emms", void_ftype_void
, IX86_BUILTIN_EMMS
);
7877 def_builtin ("__builtin_ia32_ldmxcsr", void_ftype_unsigned
, IX86_BUILTIN_LDMXCSR
);
7878 def_builtin ("__builtin_ia32_stmxcsr", unsigned_ftype_void
, IX86_BUILTIN_STMXCSR
);
7879 def_builtin ("__builtin_ia32_psllw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSLLW
);
7880 def_builtin ("__builtin_ia32_pslld", v2si_ftype_v2si_di
, IX86_BUILTIN_PSLLD
);
7881 def_builtin ("__builtin_ia32_psllq", di_ftype_di_di
, IX86_BUILTIN_PSLLQ
);
7883 def_builtin ("__builtin_ia32_psrlw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSRLW
);
7884 def_builtin ("__builtin_ia32_psrld", v2si_ftype_v2si_di
, IX86_BUILTIN_PSRLD
);
7885 def_builtin ("__builtin_ia32_psrlq", di_ftype_di_di
, IX86_BUILTIN_PSRLQ
);
7887 def_builtin ("__builtin_ia32_psraw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSRAW
);
7888 def_builtin ("__builtin_ia32_psrad", v2si_ftype_v2si_di
, IX86_BUILTIN_PSRAD
);
7890 def_builtin ("__builtin_ia32_pshufw", v4hi_ftype_v4hi_int
, IX86_BUILTIN_PSHUFW
);
7891 def_builtin ("__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi
, IX86_BUILTIN_PMADDWD
);
7893 /* Everything beyond this point is SSE only. */
7897 /* comi/ucomi insns. */
7898 for (i
= 0, d
= bdesc_comi
; i
< sizeof (bdesc_comi
) / sizeof *d
; i
++, d
++)
7899 def_builtin (d
->name
, int_ftype_v4sf_v4sf
, d
->code
);
7901 def_builtin ("__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi
, IX86_BUILTIN_PACKSSWB
);
7902 def_builtin ("__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si
, IX86_BUILTIN_PACKSSDW
);
7903 def_builtin ("__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi
, IX86_BUILTIN_PACKUSWB
);
7905 def_builtin ("__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si
, IX86_BUILTIN_CVTPI2PS
);
7906 def_builtin ("__builtin_ia32_cvtps2pi", v2si_ftype_v4sf
, IX86_BUILTIN_CVTPS2PI
);
7907 def_builtin ("__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int
, IX86_BUILTIN_CVTSI2SS
);
7908 def_builtin ("__builtin_ia32_cvtss2si", int_ftype_v4sf
, IX86_BUILTIN_CVTSS2SI
);
7909 def_builtin ("__builtin_ia32_cvttps2pi", v2si_ftype_v4sf
, IX86_BUILTIN_CVTTPS2PI
);
7910 def_builtin ("__builtin_ia32_cvttss2si", int_ftype_v4sf
, IX86_BUILTIN_CVTTSS2SI
);
7912 def_builtin ("__builtin_ia32_pextrw", int_ftype_v4hi_int
, IX86_BUILTIN_PEXTRW
);
7913 def_builtin ("__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int
, IX86_BUILTIN_PINSRW
);
7915 def_builtin ("__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar
, IX86_BUILTIN_MASKMOVQ
);
7917 def_builtin ("__builtin_ia32_loadaps", v4sf_ftype_pfloat
, IX86_BUILTIN_LOADAPS
);
7918 def_builtin ("__builtin_ia32_loadups", v4sf_ftype_pfloat
, IX86_BUILTIN_LOADUPS
);
7919 def_builtin ("__builtin_ia32_loadss", v4sf_ftype_pfloat
, IX86_BUILTIN_LOADSS
);
7920 def_builtin ("__builtin_ia32_storeaps", void_ftype_pfloat_v4sf
, IX86_BUILTIN_STOREAPS
);
7921 def_builtin ("__builtin_ia32_storeups", void_ftype_pfloat_v4sf
, IX86_BUILTIN_STOREUPS
);
7922 def_builtin ("__builtin_ia32_storess", void_ftype_pfloat_v4sf
, IX86_BUILTIN_STORESS
);
7924 def_builtin ("__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si
, IX86_BUILTIN_LOADHPS
);
7925 def_builtin ("__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si
, IX86_BUILTIN_LOADLPS
);
7926 def_builtin ("__builtin_ia32_storehps", v4sf_ftype_pv2si_v4sf
, IX86_BUILTIN_STOREHPS
);
7927 def_builtin ("__builtin_ia32_storelps", v4sf_ftype_pv2si_v4sf
, IX86_BUILTIN_STORELPS
);
7929 def_builtin ("__builtin_ia32_movmskps", int_ftype_v4sf
, IX86_BUILTIN_MOVMSKPS
);
7930 def_builtin ("__builtin_ia32_pmovmskb", int_ftype_v8qi
, IX86_BUILTIN_PMOVMSKB
);
7931 def_builtin ("__builtin_ia32_movntps", void_ftype_pfloat_v4sf
, IX86_BUILTIN_MOVNTPS
);
7932 def_builtin ("__builtin_ia32_movntq", void_ftype_pdi_di
, IX86_BUILTIN_MOVNTQ
);
7934 def_builtin ("__builtin_ia32_sfence", void_ftype_void
, IX86_BUILTIN_SFENCE
);
7935 def_builtin ("__builtin_ia32_prefetch", void_ftype_pchar_int
, IX86_BUILTIN_PREFETCH
);
7937 def_builtin ("__builtin_ia32_psadbw", v4hi_ftype_v8qi_v8qi
, IX86_BUILTIN_PSADBW
);
7939 def_builtin ("__builtin_ia32_rcpps", v4sf_ftype_v4sf
, IX86_BUILTIN_RCPPS
);
7940 def_builtin ("__builtin_ia32_rcpss", v4sf_ftype_v4sf
, IX86_BUILTIN_RCPSS
);
7941 def_builtin ("__builtin_ia32_rsqrtps", v4sf_ftype_v4sf
, IX86_BUILTIN_RSQRTPS
);
7942 def_builtin ("__builtin_ia32_rsqrtss", v4sf_ftype_v4sf
, IX86_BUILTIN_RSQRTSS
);
7943 def_builtin ("__builtin_ia32_sqrtps", v4sf_ftype_v4sf
, IX86_BUILTIN_SQRTPS
);
7944 def_builtin ("__builtin_ia32_sqrtss", v4sf_ftype_v4sf
, IX86_BUILTIN_SQRTSS
);
7946 def_builtin ("__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int
, IX86_BUILTIN_SHUFPS
);
7948 /* Composite intrinsics. */
7949 def_builtin ("__builtin_ia32_setps1", v4sf_ftype_float
, IX86_BUILTIN_SETPS1
);
7950 def_builtin ("__builtin_ia32_setps", v4sf_ftype_float_float_float_float
, IX86_BUILTIN_SETPS
);
7951 def_builtin ("__builtin_ia32_setzerops", ti_ftype_void
, IX86_BUILTIN_CLRPS
);
7952 def_builtin ("__builtin_ia32_loadps1", v4sf_ftype_pfloat
, IX86_BUILTIN_LOADPS1
);
7953 def_builtin ("__builtin_ia32_loadrps", v4sf_ftype_pfloat
, IX86_BUILTIN_LOADRPS
);
7954 def_builtin ("__builtin_ia32_storeps1", void_ftype_pfloat_v4sf
, IX86_BUILTIN_STOREPS1
);
7955 def_builtin ("__builtin_ia32_storerps", void_ftype_pfloat_v4sf
, IX86_BUILTIN_STORERPS
);
7958 /* Errors in the source file can cause expand_expr to return const0_rtx
7959 where we expect a vector. To avoid crashing, use one of the vector
7960 clear instructions. */
7962 safe_vector_operand (x
, mode
)
7964 enum machine_mode mode
;
7966 if (x
!= const0_rtx
)
7968 x
= gen_reg_rtx (mode
);
7970 if (VALID_MMX_REG_MODE (mode
))
7971 emit_insn (gen_mmx_clrdi (mode
== DImode
? x
7972 : gen_rtx_SUBREG (DImode
, x
, 0)));
7974 emit_insn (gen_sse_clrti (mode
== TImode
? x
7975 : gen_rtx_SUBREG (TImode
, x
, 0)));
7979 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
7982 ix86_expand_binop_builtin (icode
, arglist
, target
)
7983 enum insn_code icode
;
7988 tree arg0
= TREE_VALUE (arglist
);
7989 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
7990 rtx op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
7991 rtx op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
7992 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
7993 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
7994 enum machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
7996 if (VECTOR_MODE_P (mode0
))
7997 op0
= safe_vector_operand (op0
, mode0
);
7998 if (VECTOR_MODE_P (mode1
))
7999 op1
= safe_vector_operand (op1
, mode1
);
8002 || GET_MODE (target
) != tmode
8003 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
8004 target
= gen_reg_rtx (tmode
);
8006 /* In case the insn wants input operands in modes different from
8007 the result, abort. */
8008 if (GET_MODE (op0
) != mode0
|| GET_MODE (op1
) != mode1
)
8011 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
8012 op0
= copy_to_mode_reg (mode0
, op0
);
8013 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
8014 op1
= copy_to_mode_reg (mode1
, op1
);
8016 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
8023 /* Subroutine of ix86_expand_builtin to take care of stores. */
8026 ix86_expand_store_builtin (icode
, arglist
, shuffle
)
8027 enum insn_code icode
;
8032 tree arg0
= TREE_VALUE (arglist
);
8033 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
8034 rtx op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
8035 rtx op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
8036 enum machine_mode mode0
= insn_data
[icode
].operand
[0].mode
;
8037 enum machine_mode mode1
= insn_data
[icode
].operand
[1].mode
;
8039 if (VECTOR_MODE_P (mode1
))
8040 op1
= safe_vector_operand (op1
, mode1
);
8042 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
8043 if (shuffle
>= 0 || ! (*insn_data
[icode
].operand
[1].predicate
) (op1
, mode1
))
8044 op1
= copy_to_mode_reg (mode1
, op1
);
8046 emit_insn (gen_sse_shufps (op1
, op1
, op1
, GEN_INT (shuffle
)));
8047 pat
= GEN_FCN (icode
) (op0
, op1
);
8053 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
8056 ix86_expand_unop_builtin (icode
, arglist
, target
, do_load
)
8057 enum insn_code icode
;
8063 tree arg0
= TREE_VALUE (arglist
);
8064 rtx op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
8065 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
8066 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
8069 || GET_MODE (target
) != tmode
8070 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
8071 target
= gen_reg_rtx (tmode
);
8073 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
8076 if (VECTOR_MODE_P (mode0
))
8077 op0
= safe_vector_operand (op0
, mode0
);
8079 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
8080 op0
= copy_to_mode_reg (mode0
, op0
);
8083 pat
= GEN_FCN (icode
) (target
, op0
);
8090 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
8091 sqrtss, rsqrtss, rcpss. */
8094 ix86_expand_unop1_builtin (icode
, arglist
, target
)
8095 enum insn_code icode
;
8100 tree arg0
= TREE_VALUE (arglist
);
8101 rtx op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
8102 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
8103 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
8106 || GET_MODE (target
) != tmode
8107 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
8108 target
= gen_reg_rtx (tmode
);
8110 if (VECTOR_MODE_P (mode0
))
8111 op0
= safe_vector_operand (op0
, mode0
);
8113 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
8114 op0
= copy_to_mode_reg (mode0
, op0
);
8116 pat
= GEN_FCN (icode
) (target
, op0
, op0
);
8123 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
8126 ix86_expand_sse_compare (d
, arglist
, target
)
8127 struct builtin_description
*d
;
8132 tree arg0
= TREE_VALUE (arglist
);
8133 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
8134 rtx op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
8135 rtx op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
8137 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
8138 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
8139 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[2].mode
;
8140 enum rtx_code comparison
= d
->comparison
;
8142 if (VECTOR_MODE_P (mode0
))
8143 op0
= safe_vector_operand (op0
, mode0
);
8144 if (VECTOR_MODE_P (mode1
))
8145 op1
= safe_vector_operand (op1
, mode1
);
8147 /* Swap operands if we have a comparison that isn't available in
8151 target
= gen_reg_rtx (tmode
);
8152 emit_move_insn (target
, op1
);
8155 comparison
= swap_condition (comparison
);
8158 || GET_MODE (target
) != tmode
8159 || ! (*insn_data
[d
->icode
].operand
[0].predicate
) (target
, tmode
))
8160 target
= gen_reg_rtx (tmode
);
8162 if (! (*insn_data
[d
->icode
].operand
[1].predicate
) (op0
, mode0
))
8163 op0
= copy_to_mode_reg (mode0
, op0
);
8164 if (! (*insn_data
[d
->icode
].operand
[2].predicate
) (op1
, mode1
))
8165 op1
= copy_to_mode_reg (mode1
, op1
);
8167 op2
= gen_rtx_fmt_ee (comparison
, mode0
, op0
, op1
);
8168 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
, op2
);
8175 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
8178 ix86_expand_sse_comi (d
, arglist
, target
)
8179 struct builtin_description
*d
;
8184 tree arg0
= TREE_VALUE (arglist
);
8185 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
8186 rtx op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
8187 rtx op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
8189 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[0].mode
;
8190 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[1].mode
;
8191 enum rtx_code comparison
= d
->comparison
;
8193 if (VECTOR_MODE_P (mode0
))
8194 op0
= safe_vector_operand (op0
, mode0
);
8195 if (VECTOR_MODE_P (mode1
))
8196 op1
= safe_vector_operand (op1
, mode1
);
8198 /* Swap operands if we have a comparison that isn't available in
8205 comparison
= swap_condition (comparison
);
8208 target
= gen_reg_rtx (SImode
);
8209 emit_move_insn (target
, const0_rtx
);
8210 target
= gen_rtx_SUBREG (QImode
, target
, 0);
8212 if (! (*insn_data
[d
->icode
].operand
[0].predicate
) (op0
, mode0
))
8213 op0
= copy_to_mode_reg (mode0
, op0
);
8214 if (! (*insn_data
[d
->icode
].operand
[1].predicate
) (op1
, mode1
))
8215 op1
= copy_to_mode_reg (mode1
, op1
);
8217 op2
= gen_rtx_fmt_ee (comparison
, mode0
, op0
, op1
);
8218 pat
= GEN_FCN (d
->icode
) (op0
, op1
, op2
);
8222 emit_insn (gen_setcc_2 (target
, op2
));
8227 /* Expand an expression EXP that calls a built-in function,
8228 with result going to TARGET if that's convenient
8229 (and in mode MODE if that's convenient).
8230 SUBTARGET may be used as the target for computing one of EXP's operands.
8231 IGNORE is nonzero if the value is to be ignored. */
8234 ix86_expand_builtin (exp
, target
, subtarget
, mode
, ignore
)
8237 rtx subtarget ATTRIBUTE_UNUSED
;
8238 enum machine_mode mode ATTRIBUTE_UNUSED
;
8239 int ignore ATTRIBUTE_UNUSED
;
8241 struct builtin_description
*d
;
8243 enum insn_code icode
;
8244 tree fndecl
= TREE_OPERAND (TREE_OPERAND (exp
, 0), 0);
8245 tree arglist
= TREE_OPERAND (exp
, 1);
8246 tree arg0
, arg1
, arg2
, arg3
;
8247 rtx op0
, op1
, op2
, pat
;
8248 enum machine_mode tmode
, mode0
, mode1
, mode2
;
8249 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
8253 case IX86_BUILTIN_EMMS
:
8254 emit_insn (gen_emms ());
8257 case IX86_BUILTIN_SFENCE
:
8258 emit_insn (gen_sfence ());
8261 case IX86_BUILTIN_M_FROM_INT
:
8262 target
= gen_reg_rtx (DImode
);
8263 op0
= expand_expr (TREE_VALUE (arglist
), NULL_RTX
, VOIDmode
, 0);
8264 emit_move_insn (gen_rtx_SUBREG (SImode
, target
, 0), op0
);
8267 case IX86_BUILTIN_M_TO_INT
:
8268 op0
= expand_expr (TREE_VALUE (arglist
), NULL_RTX
, VOIDmode
, 0);
8269 op0
= copy_to_mode_reg (DImode
, op0
);
8270 target
= gen_reg_rtx (SImode
);
8271 emit_move_insn (target
, gen_rtx_SUBREG (SImode
, op0
, 0));
8274 case IX86_BUILTIN_PEXTRW
:
8275 icode
= CODE_FOR_mmx_pextrw
;
8276 arg0
= TREE_VALUE (arglist
);
8277 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
8278 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
8279 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
8280 tmode
= insn_data
[icode
].operand
[0].mode
;
8281 mode0
= insn_data
[icode
].operand
[1].mode
;
8282 mode1
= insn_data
[icode
].operand
[2].mode
;
8284 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
8285 op0
= copy_to_mode_reg (mode0
, op0
);
8286 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
8288 /* @@@ better error message */
8289 error ("selector must be an immediate");
8293 || GET_MODE (target
) != tmode
8294 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
8295 target
= gen_reg_rtx (tmode
);
8296 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
8302 case IX86_BUILTIN_PINSRW
:
8303 icode
= CODE_FOR_mmx_pinsrw
;
8304 arg0
= TREE_VALUE (arglist
);
8305 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
8306 arg2
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
8307 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
8308 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
8309 op2
= expand_expr (arg2
, NULL_RTX
, VOIDmode
, 0);
8310 tmode
= insn_data
[icode
].operand
[0].mode
;
8311 mode0
= insn_data
[icode
].operand
[1].mode
;
8312 mode1
= insn_data
[icode
].operand
[2].mode
;
8313 mode2
= insn_data
[icode
].operand
[3].mode
;
8315 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
8316 op0
= copy_to_mode_reg (mode0
, op0
);
8317 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
8318 op1
= copy_to_mode_reg (mode1
, op1
);
8319 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
8321 /* @@@ better error message */
8322 error ("selector must be an immediate");
8326 || GET_MODE (target
) != tmode
8327 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
8328 target
= gen_reg_rtx (tmode
);
8329 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
8335 case IX86_BUILTIN_MASKMOVQ
:
8336 icode
= CODE_FOR_mmx_maskmovq
;
8337 /* Note the arg order is different from the operand order. */
8338 arg1
= TREE_VALUE (arglist
);
8339 arg2
= TREE_VALUE (TREE_CHAIN (arglist
));
8340 arg0
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
8341 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
8342 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
8343 op2
= expand_expr (arg2
, NULL_RTX
, VOIDmode
, 0);
8344 mode0
= insn_data
[icode
].operand
[0].mode
;
8345 mode1
= insn_data
[icode
].operand
[1].mode
;
8346 mode2
= insn_data
[icode
].operand
[2].mode
;
8348 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
8349 op0
= copy_to_mode_reg (mode0
, op0
);
8350 if (! (*insn_data
[icode
].operand
[1].predicate
) (op1
, mode1
))
8351 op1
= copy_to_mode_reg (mode1
, op1
);
8352 if (! (*insn_data
[icode
].operand
[2].predicate
) (op2
, mode2
))
8353 op2
= copy_to_mode_reg (mode2
, op2
);
8354 pat
= GEN_FCN (icode
) (op0
, op1
, op2
);
8360 case IX86_BUILTIN_SQRTSS
:
8361 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2
, arglist
, target
);
8362 case IX86_BUILTIN_RSQRTSS
:
8363 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2
, arglist
, target
);
8364 case IX86_BUILTIN_RCPSS
:
8365 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2
, arglist
, target
);
8367 case IX86_BUILTIN_LOADAPS
:
8368 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps
, arglist
, target
, 1);
8370 case IX86_BUILTIN_LOADUPS
:
8371 return ix86_expand_unop_builtin (CODE_FOR_sse_movups
, arglist
, target
, 1);
8373 case IX86_BUILTIN_STOREAPS
:
8374 return ix86_expand_store_builtin (CODE_FOR_sse_movaps
, arglist
, -1);
8375 case IX86_BUILTIN_STOREUPS
:
8376 return ix86_expand_store_builtin (CODE_FOR_sse_movups
, arglist
, -1);
8378 case IX86_BUILTIN_LOADSS
:
8379 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss
, arglist
, target
, 1);
8381 case IX86_BUILTIN_STORESS
:
8382 return ix86_expand_store_builtin (CODE_FOR_sse_storess
, arglist
, -1);
8384 case IX86_BUILTIN_LOADHPS
:
8385 case IX86_BUILTIN_LOADLPS
:
8386 icode
= (fcode
== IX86_BUILTIN_LOADHPS
8387 ? CODE_FOR_sse_movhps
: CODE_FOR_sse_movlps
);
8388 arg0
= TREE_VALUE (arglist
);
8389 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
8390 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
8391 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
8392 tmode
= insn_data
[icode
].operand
[0].mode
;
8393 mode0
= insn_data
[icode
].operand
[1].mode
;
8394 mode1
= insn_data
[icode
].operand
[2].mode
;
8396 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
8397 op0
= copy_to_mode_reg (mode0
, op0
);
8398 op1
= gen_rtx_MEM (mode1
, copy_to_mode_reg (Pmode
, op1
));
8400 || GET_MODE (target
) != tmode
8401 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
8402 target
= gen_reg_rtx (tmode
);
8403 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
8409 case IX86_BUILTIN_STOREHPS
:
8410 case IX86_BUILTIN_STORELPS
:
8411 icode
= (fcode
== IX86_BUILTIN_STOREHPS
8412 ? CODE_FOR_sse_movhps
: CODE_FOR_sse_movlps
);
8413 arg0
= TREE_VALUE (arglist
);
8414 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
8415 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
8416 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
8417 mode0
= insn_data
[icode
].operand
[1].mode
;
8418 mode1
= insn_data
[icode
].operand
[2].mode
;
8420 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
8421 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
8422 op1
= copy_to_mode_reg (mode1
, op1
);
8424 pat
= GEN_FCN (icode
) (op0
, op0
, op1
);
8430 case IX86_BUILTIN_MOVNTPS
:
8431 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf
, arglist
, -1);
8432 case IX86_BUILTIN_MOVNTQ
:
8433 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi
, arglist
, -1);
8435 case IX86_BUILTIN_LDMXCSR
:
8436 op0
= expand_expr (TREE_VALUE (arglist
), NULL_RTX
, VOIDmode
, 0);
8437 target
= assign_386_stack_local (SImode
, 0);
8438 emit_move_insn (target
, op0
);
8439 emit_insn (gen_ldmxcsr (target
));
8442 case IX86_BUILTIN_STMXCSR
:
8443 target
= assign_386_stack_local (SImode
, 0);
8444 emit_insn (gen_stmxcsr (target
));
8445 return copy_to_mode_reg (SImode
, target
);
8447 case IX86_BUILTIN_PREFETCH
:
8448 icode
= CODE_FOR_prefetch
;
8449 arg0
= TREE_VALUE (arglist
);
8450 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
8451 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
8452 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
8453 mode0
= insn_data
[icode
].operand
[0].mode
;
8454 mode1
= insn_data
[icode
].operand
[1].mode
;
8456 if (! (*insn_data
[icode
].operand
[1].predicate
) (op1
, mode1
))
8458 /* @@@ better error message */
8459 error ("selector must be an immediate");
8463 op0
= copy_to_mode_reg (Pmode
, op0
);
8464 pat
= GEN_FCN (icode
) (op0
, op1
);
8470 case IX86_BUILTIN_SHUFPS
:
8471 icode
= CODE_FOR_sse_shufps
;
8472 arg0
= TREE_VALUE (arglist
);
8473 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
8474 arg2
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
8475 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
8476 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
8477 op2
= expand_expr (arg2
, NULL_RTX
, VOIDmode
, 0);
8478 tmode
= insn_data
[icode
].operand
[0].mode
;
8479 mode0
= insn_data
[icode
].operand
[1].mode
;
8480 mode1
= insn_data
[icode
].operand
[2].mode
;
8481 mode2
= insn_data
[icode
].operand
[3].mode
;
8483 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
8484 op0
= copy_to_mode_reg (mode0
, op0
);
8485 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
8486 op1
= copy_to_mode_reg (mode1
, op1
);
8487 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
8489 /* @@@ better error message */
8490 error ("mask must be an immediate");
8494 || GET_MODE (target
) != tmode
8495 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
8496 target
= gen_reg_rtx (tmode
);
8497 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
8503 case IX86_BUILTIN_PSHUFW
:
8504 icode
= CODE_FOR_mmx_pshufw
;
8505 arg0
= TREE_VALUE (arglist
);
8506 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
8507 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
8508 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
8509 tmode
= insn_data
[icode
].operand
[0].mode
;
8510 mode0
= insn_data
[icode
].operand
[2].mode
;
8511 mode1
= insn_data
[icode
].operand
[3].mode
;
8513 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
8514 op0
= copy_to_mode_reg (mode0
, op0
);
8515 if (! (*insn_data
[icode
].operand
[3].predicate
) (op1
, mode1
))
8517 /* @@@ better error message */
8518 error ("mask must be an immediate");
8522 || GET_MODE (target
) != tmode
8523 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
8524 target
= gen_reg_rtx (tmode
);
8525 pat
= GEN_FCN (icode
) (target
, target
, op0
, op1
);
8531 /* Composite intrinsics. */
8532 case IX86_BUILTIN_SETPS1
:
8533 target
= assign_386_stack_local (SFmode
, 0);
8534 arg0
= TREE_VALUE (arglist
);
8535 emit_move_insn (change_address (target
, SFmode
, XEXP (target
, 0)),
8536 expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0));
8537 op0
= gen_reg_rtx (V4SFmode
);
8538 emit_insn (gen_sse_loadss (op0
, change_address (target
, V4SFmode
,
8539 XEXP (target
, 0))));
8540 emit_insn (gen_sse_shufps (op0
, op0
, op0
, GEN_INT (0)));
8543 case IX86_BUILTIN_SETPS
:
8544 target
= assign_386_stack_local (V4SFmode
, 0);
8545 op0
= change_address (target
, SFmode
, XEXP (target
, 0));
8546 arg0
= TREE_VALUE (arglist
);
8547 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
8548 arg2
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
8549 arg3
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist
))));
8550 emit_move_insn (op0
,
8551 expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0));
8552 emit_move_insn (adj_offsettable_operand (op0
, 4),
8553 expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0));
8554 emit_move_insn (adj_offsettable_operand (op0
, 8),
8555 expand_expr (arg2
, NULL_RTX
, VOIDmode
, 0));
8556 emit_move_insn (adj_offsettable_operand (op0
, 12),
8557 expand_expr (arg3
, NULL_RTX
, VOIDmode
, 0));
8558 op0
= gen_reg_rtx (V4SFmode
);
8559 emit_insn (gen_sse_movaps (op0
, target
));
8562 case IX86_BUILTIN_CLRPS
:
8563 target
= gen_reg_rtx (TImode
);
8564 emit_insn (gen_sse_clrti (target
));
8567 case IX86_BUILTIN_LOADRPS
:
8568 target
= ix86_expand_unop_builtin (CODE_FOR_sse_movaps
, arglist
,
8569 gen_reg_rtx (V4SFmode
), 1);
8570 emit_insn (gen_sse_shufps (target
, target
, target
, GEN_INT (0x1b)));
8573 case IX86_BUILTIN_LOADPS1
:
8574 target
= ix86_expand_unop_builtin (CODE_FOR_sse_loadss
, arglist
,
8575 gen_reg_rtx (V4SFmode
), 1);
8576 emit_insn (gen_sse_shufps (target
, target
, target
, const0_rtx
));
8579 case IX86_BUILTIN_STOREPS1
:
8580 return ix86_expand_store_builtin (CODE_FOR_sse_movaps
, arglist
, 0);
8581 case IX86_BUILTIN_STORERPS
:
8582 return ix86_expand_store_builtin (CODE_FOR_sse_movaps
, arglist
, 0x1B);
8584 case IX86_BUILTIN_MMX_ZERO
:
8585 target
= gen_reg_rtx (DImode
);
8586 emit_insn (gen_mmx_clrdi (target
));
8593 for (i
= 0, d
= bdesc_2arg
; i
< sizeof (bdesc_2arg
) / sizeof *d
; i
++, d
++)
8594 if (d
->code
== fcode
)
8596 /* Compares are treated specially. */
8597 if (d
->icode
== CODE_FOR_maskcmpv4sf3
8598 || d
->icode
== CODE_FOR_vmmaskcmpv4sf3
8599 || d
->icode
== CODE_FOR_maskncmpv4sf3
8600 || d
->icode
== CODE_FOR_vmmaskncmpv4sf3
)
8601 return ix86_expand_sse_compare (d
, arglist
, target
);
8603 return ix86_expand_binop_builtin (d
->icode
, arglist
, target
);
8606 for (i
= 0, d
= bdesc_1arg
; i
< sizeof (bdesc_1arg
) / sizeof *d
; i
++, d
++)
8607 if (d
->code
== fcode
)
8608 return ix86_expand_unop_builtin (d
->icode
, arglist
, target
, 0);
8610 for (i
= 0, d
= bdesc_comi
; i
< sizeof (bdesc_comi
) / sizeof *d
; i
++, d
++)
8611 if (d
->code
== fcode
)
8612 return ix86_expand_sse_comi (d
, arglist
, target
);
8614 /* @@@ Should really do something sensible here. */
8618 /* Store OPERAND to the memory after reload is completed. This means
8619 that we can't easilly use assign_stack_local. */
8621 ix86_force_to_memory (mode
, operand
)
8622 enum machine_mode mode
;
8625 if (!reload_completed
)
8632 split_di (&operand
, 1, operands
, operands
+1);
8634 gen_rtx_SET (VOIDmode
,
8635 gen_rtx_MEM (SImode
,
8636 gen_rtx_PRE_DEC (Pmode
,
8637 stack_pointer_rtx
)),
8640 gen_rtx_SET (VOIDmode
,
8641 gen_rtx_MEM (SImode
,
8642 gen_rtx_PRE_DEC (Pmode
,
8643 stack_pointer_rtx
)),
8648 /* It is better to store HImodes as SImodes. */
8649 if (!TARGET_PARTIAL_REG_STALL
)
8650 operand
= gen_lowpart (SImode
, operand
);
8654 gen_rtx_SET (VOIDmode
,
8655 gen_rtx_MEM (GET_MODE (operand
),
8656 gen_rtx_PRE_DEC (SImode
,
8657 stack_pointer_rtx
)),
8663 return gen_rtx_MEM (mode
, stack_pointer_rtx
);
8666 /* Free operand from the memory. */
8668 ix86_free_from_memory (mode
)
8669 enum machine_mode mode
;
8671 /* Use LEA to deallocate stack space. In peephole2 it will be converted
8672 to pop or add instruction if registers are available. */
8673 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
8674 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
8675 GEN_INT (mode
== DImode
8677 : mode
== HImode
&& TARGET_PARTIAL_REG_STALL