1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001
3 Free Software Foundation, Inc.
5 This file is part of GNU CC.
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
28 #include "hard-reg-set.h"
30 #include "insn-config.h"
31 #include "conditions.h"
33 #include "insn-attr.h"
41 #include "basic-block.h"
44 #include "target-def.h"
46 #ifndef CHECK_STACK_LIMIT
47 #define CHECK_STACK_LIMIT -1
50 /* Processor costs (relative to an add) */
52 struct processor_costs size_cost
= { /* costs for tunning for size */
53 2, /* cost of an add instruction */
54 3, /* cost of a lea instruction */
55 2, /* variable shift costs */
56 3, /* constant shift costs */
57 3, /* cost of starting a multiply */
58 0, /* cost of multiply per each bit set */
59 3, /* cost of a divide/mod */
62 2, /* cost for loading QImode using movzbl */
63 {2, 2, 2}, /* cost of loading integer registers
64 in QImode, HImode and SImode.
65 Relative to reg-reg move (2). */
66 {2, 2, 2}, /* cost of storing integer registers */
67 2, /* cost of reg,reg fld/fst */
68 {2, 2, 2}, /* cost of loading fp registers
69 in SFmode, DFmode and XFmode */
70 {2, 2, 2}, /* cost of loading integer registers */
71 3, /* cost of moving MMX register */
72 {3, 3}, /* cost of loading MMX registers
73 in SImode and DImode */
74 {3, 3}, /* cost of storing MMX registers
75 in SImode and DImode */
76 3, /* cost of moving SSE register */
77 {3, 3, 3}, /* cost of loading SSE registers
78 in SImode, DImode and TImode */
79 {3, 3, 3}, /* cost of storing SSE registers
80 in SImode, DImode and TImode */
81 3, /* MMX or SSE register to integer */
83 /* Processor costs (relative to an add) */
85 struct processor_costs i386_cost
= { /* 386 specific costs */
86 1, /* cost of an add instruction */
87 1, /* cost of a lea instruction */
88 3, /* variable shift costs */
89 2, /* constant shift costs */
90 6, /* cost of starting a multiply */
91 1, /* cost of multiply per each bit set */
92 23, /* cost of a divide/mod */
93 15, /* "large" insn */
95 4, /* cost for loading QImode using movzbl */
96 {2, 4, 2}, /* cost of loading integer registers
97 in QImode, HImode and SImode.
98 Relative to reg-reg move (2). */
99 {2, 4, 2}, /* cost of storing integer registers */
100 2, /* cost of reg,reg fld/fst */
101 {8, 8, 8}, /* cost of loading fp registers
102 in SFmode, DFmode and XFmode */
103 {8, 8, 8}, /* cost of loading integer registers */
104 2, /* cost of moving MMX register */
105 {4, 8}, /* cost of loading MMX registers
106 in SImode and DImode */
107 {4, 8}, /* cost of storing MMX registers
108 in SImode and DImode */
109 2, /* cost of moving SSE register */
110 {4, 8, 16}, /* cost of loading SSE registers
111 in SImode, DImode and TImode */
112 {4, 8, 16}, /* cost of storing SSE registers
113 in SImode, DImode and TImode */
114 3, /* MMX or SSE register to integer */
118 struct processor_costs i486_cost
= { /* 486 specific costs */
119 1, /* cost of an add instruction */
120 1, /* cost of a lea instruction */
121 3, /* variable shift costs */
122 2, /* constant shift costs */
123 12, /* cost of starting a multiply */
124 1, /* cost of multiply per each bit set */
125 40, /* cost of a divide/mod */
126 15, /* "large" insn */
128 4, /* cost for loading QImode using movzbl */
129 {2, 4, 2}, /* cost of loading integer registers
130 in QImode, HImode and SImode.
131 Relative to reg-reg move (2). */
132 {2, 4, 2}, /* cost of storing integer registers */
133 2, /* cost of reg,reg fld/fst */
134 {8, 8, 8}, /* cost of loading fp registers
135 in SFmode, DFmode and XFmode */
136 {8, 8, 8}, /* cost of loading integer registers */
137 2, /* cost of moving MMX register */
138 {4, 8}, /* cost of loading MMX registers
139 in SImode and DImode */
140 {4, 8}, /* cost of storing MMX registers
141 in SImode and DImode */
142 2, /* cost of moving SSE register */
143 {4, 8, 16}, /* cost of loading SSE registers
144 in SImode, DImode and TImode */
145 {4, 8, 16}, /* cost of storing SSE registers
146 in SImode, DImode and TImode */
147 3 /* MMX or SSE register to integer */
151 struct processor_costs pentium_cost
= {
152 1, /* cost of an add instruction */
153 1, /* cost of a lea instruction */
154 4, /* variable shift costs */
155 1, /* constant shift costs */
156 11, /* cost of starting a multiply */
157 0, /* cost of multiply per each bit set */
158 25, /* cost of a divide/mod */
159 8, /* "large" insn */
161 6, /* cost for loading QImode using movzbl */
162 {2, 4, 2}, /* cost of loading integer registers
163 in QImode, HImode and SImode.
164 Relative to reg-reg move (2). */
165 {2, 4, 2}, /* cost of storing integer registers */
166 2, /* cost of reg,reg fld/fst */
167 {2, 2, 6}, /* cost of loading fp registers
168 in SFmode, DFmode and XFmode */
169 {4, 4, 6}, /* cost of loading integer registers */
170 8, /* cost of moving MMX register */
171 {8, 8}, /* cost of loading MMX registers
172 in SImode and DImode */
173 {8, 8}, /* cost of storing MMX registers
174 in SImode and DImode */
175 2, /* cost of moving SSE register */
176 {4, 8, 16}, /* cost of loading SSE registers
177 in SImode, DImode and TImode */
178 {4, 8, 16}, /* cost of storing SSE registers
179 in SImode, DImode and TImode */
180 3 /* MMX or SSE register to integer */
184 struct processor_costs pentiumpro_cost
= {
185 1, /* cost of an add instruction */
186 1, /* cost of a lea instruction */
187 1, /* variable shift costs */
188 1, /* constant shift costs */
189 4, /* cost of starting a multiply */
190 0, /* cost of multiply per each bit set */
191 17, /* cost of a divide/mod */
192 8, /* "large" insn */
194 2, /* cost for loading QImode using movzbl */
195 {4, 4, 4}, /* cost of loading integer registers
196 in QImode, HImode and SImode.
197 Relative to reg-reg move (2). */
198 {2, 2, 2}, /* cost of storing integer registers */
199 2, /* cost of reg,reg fld/fst */
200 {2, 2, 6}, /* cost of loading fp registers
201 in SFmode, DFmode and XFmode */
202 {4, 4, 6}, /* cost of loading integer registers */
203 2, /* cost of moving MMX register */
204 {2, 2}, /* cost of loading MMX registers
205 in SImode and DImode */
206 {2, 2}, /* cost of storing MMX registers
207 in SImode and DImode */
208 2, /* cost of moving SSE register */
209 {2, 2, 8}, /* cost of loading SSE registers
210 in SImode, DImode and TImode */
211 {2, 2, 8}, /* cost of storing SSE registers
212 in SImode, DImode and TImode */
213 3 /* MMX or SSE register to integer */
217 struct processor_costs k6_cost
= {
218 1, /* cost of an add instruction */
219 2, /* cost of a lea instruction */
220 1, /* variable shift costs */
221 1, /* constant shift costs */
222 3, /* cost of starting a multiply */
223 0, /* cost of multiply per each bit set */
224 18, /* cost of a divide/mod */
225 8, /* "large" insn */
227 3, /* cost for loading QImode using movzbl */
228 {4, 5, 4}, /* cost of loading integer registers
229 in QImode, HImode and SImode.
230 Relative to reg-reg move (2). */
231 {2, 3, 2}, /* cost of storing integer registers */
232 4, /* cost of reg,reg fld/fst */
233 {6, 6, 6}, /* cost of loading fp registers
234 in SFmode, DFmode and XFmode */
235 {4, 4, 4}, /* cost of loading integer registers */
236 2, /* cost of moving MMX register */
237 {2, 2}, /* cost of loading MMX registers
238 in SImode and DImode */
239 {2, 2}, /* cost of storing MMX registers
240 in SImode and DImode */
241 2, /* cost of moving SSE register */
242 {2, 2, 8}, /* cost of loading SSE registers
243 in SImode, DImode and TImode */
244 {2, 2, 8}, /* cost of storing SSE registers
245 in SImode, DImode and TImode */
246 6 /* MMX or SSE register to integer */
250 struct processor_costs athlon_cost
= {
251 1, /* cost of an add instruction */
252 2, /* cost of a lea instruction */
253 1, /* variable shift costs */
254 1, /* constant shift costs */
255 5, /* cost of starting a multiply */
256 0, /* cost of multiply per each bit set */
257 42, /* cost of a divide/mod */
258 8, /* "large" insn */
260 4, /* cost for loading QImode using movzbl */
261 {4, 5, 4}, /* cost of loading integer registers
262 in QImode, HImode and SImode.
263 Relative to reg-reg move (2). */
264 {2, 3, 2}, /* cost of storing integer registers */
265 4, /* cost of reg,reg fld/fst */
266 {6, 6, 20}, /* cost of loading fp registers
267 in SFmode, DFmode and XFmode */
268 {4, 4, 16}, /* cost of loading integer registers */
269 2, /* cost of moving MMX register */
270 {2, 2}, /* cost of loading MMX registers
271 in SImode and DImode */
272 {2, 2}, /* cost of storing MMX registers
273 in SImode and DImode */
274 2, /* cost of moving SSE register */
275 {2, 2, 8}, /* cost of loading SSE registers
276 in SImode, DImode and TImode */
277 {2, 2, 8}, /* cost of storing SSE registers
278 in SImode, DImode and TImode */
279 6 /* MMX or SSE register to integer */
283 struct processor_costs pentium4_cost
= {
284 1, /* cost of an add instruction */
285 1, /* cost of a lea instruction */
286 8, /* variable shift costs */
287 8, /* constant shift costs */
288 30, /* cost of starting a multiply */
289 0, /* cost of multiply per each bit set */
290 112, /* cost of a divide/mod */
291 16, /* "large" insn */
293 2, /* cost for loading QImode using movzbl */
294 {4, 5, 4}, /* cost of loading integer registers
295 in QImode, HImode and SImode.
296 Relative to reg-reg move (2). */
297 {2, 3, 2}, /* cost of storing integer registers */
298 2, /* cost of reg,reg fld/fst */
299 {2, 2, 6}, /* cost of loading fp registers
300 in SFmode, DFmode and XFmode */
301 {4, 4, 6}, /* cost of loading integer registers */
302 2, /* cost of moving MMX register */
303 {2, 2}, /* cost of loading MMX registers
304 in SImode and DImode */
305 {2, 2}, /* cost of storing MMX registers
306 in SImode and DImode */
307 12, /* cost of moving SSE register */
308 {12, 12, 12}, /* cost of loading SSE registers
309 in SImode, DImode and TImode */
310 {2, 2, 8}, /* cost of storing SSE registers
311 in SImode, DImode and TImode */
312 10, /* MMX or SSE register to integer */
315 const struct processor_costs
*ix86_cost
= &pentium_cost
;
317 /* Processor feature/optimization bitmasks. */
318 #define m_386 (1<<PROCESSOR_I386)
319 #define m_486 (1<<PROCESSOR_I486)
320 #define m_PENT (1<<PROCESSOR_PENTIUM)
321 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
322 #define m_K6 (1<<PROCESSOR_K6)
323 #define m_ATHLON (1<<PROCESSOR_ATHLON)
324 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
326 const int x86_use_leave
= m_386
| m_K6
| m_ATHLON
;
327 const int x86_push_memory
= m_386
| m_K6
| m_ATHLON
| m_PENT4
;
328 const int x86_zero_extend_with_and
= m_486
| m_PENT
;
329 const int x86_movx
= m_ATHLON
| m_PPRO
| m_PENT4
/* m_386 | m_K6 */;
330 const int x86_double_with_add
= ~m_386
;
331 const int x86_use_bit_test
= m_386
;
332 const int x86_unroll_strlen
= m_486
| m_PENT
| m_PPRO
| m_ATHLON
| m_K6
;
333 const int x86_cmove
= m_PPRO
| m_ATHLON
| m_PENT4
;
334 const int x86_3dnow_a
= m_ATHLON
;
335 const int x86_deep_branch
= m_PPRO
| m_K6
| m_ATHLON
| m_PENT4
;
336 const int x86_branch_hints
= m_PENT4
;
337 const int x86_use_sahf
= m_PPRO
| m_K6
| m_PENT4
;
338 const int x86_partial_reg_stall
= m_PPRO
;
339 const int x86_use_loop
= m_K6
;
340 const int x86_use_fiop
= ~(m_PPRO
| m_ATHLON
| m_PENT
);
341 const int x86_use_mov0
= m_K6
;
342 const int x86_use_cltd
= ~(m_PENT
| m_K6
);
343 const int x86_read_modify_write
= ~m_PENT
;
344 const int x86_read_modify
= ~(m_PENT
| m_PPRO
);
345 const int x86_split_long_moves
= m_PPRO
;
346 const int x86_promote_QImode
= m_K6
| m_PENT
| m_386
| m_486
;
347 const int x86_single_stringop
= m_386
| m_PENT4
;
348 const int x86_qimode_math
= ~(0);
349 const int x86_promote_qi_regs
= 0;
350 const int x86_himode_math
= ~(m_PPRO
);
351 const int x86_promote_hi_regs
= m_PPRO
;
352 const int x86_sub_esp_4
= m_ATHLON
| m_PPRO
| m_PENT4
;
353 const int x86_sub_esp_8
= m_ATHLON
| m_PPRO
| m_386
| m_486
| m_PENT4
;
354 const int x86_add_esp_4
= m_ATHLON
| m_K6
| m_PENT4
;
355 const int x86_add_esp_8
= m_ATHLON
| m_PPRO
| m_K6
| m_386
| m_486
| m_PENT4
;
356 const int x86_integer_DFmode_moves
= ~(m_ATHLON
| m_PENT4
);
357 const int x86_partial_reg_dependency
= m_ATHLON
| m_PENT4
;
358 const int x86_memory_mismatch_stall
= m_ATHLON
| m_PENT4
;
359 const int x86_accumulate_outgoing_args
= m_ATHLON
| m_PENT4
| m_PPRO
;
360 const int x86_prologue_using_move
= m_ATHLON
| m_PENT4
| m_PPRO
;
361 const int x86_epilogue_using_move
= m_ATHLON
| m_PENT4
| m_PPRO
;
363 /* In case the avreage insn count for single function invocation is
364 lower than this constant, emit fast (but longer) prologue and
366 #define FAST_PROLOGUE_INSN_COUNT 30
367 /* Set by prologue expander and used by epilogue expander to determine
369 static int use_fast_prologue_epilogue
;
371 #define AT_BP(mode) (gen_rtx_MEM ((mode), hard_frame_pointer_rtx))
373 static const char *const hi_reg_name
[] = HI_REGISTER_NAMES
; /* names for 16 bit regs */
374 static const char *const qi_reg_name
[] = QI_REGISTER_NAMES
; /* names for 8 bit regs (low) */
375 static const char *const qi_high_reg_name
[] = QI_HIGH_REGISTER_NAMES
; /* names for 8 bit regs (high) */
377 /* Array of the smallest class containing reg number REGNO, indexed by
378 REGNO. Used by REGNO_REG_CLASS in i386.h. */
380 enum reg_class
const regclass_map
[FIRST_PSEUDO_REGISTER
] =
383 AREG
, DREG
, CREG
, BREG
,
385 SIREG
, DIREG
, NON_Q_REGS
, NON_Q_REGS
,
387 FP_TOP_REG
, FP_SECOND_REG
, FLOAT_REGS
, FLOAT_REGS
,
388 FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
,
391 /* flags, fpsr, dirflag, frame */
392 NO_REGS
, NO_REGS
, NO_REGS
, NON_Q_REGS
,
393 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
395 MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
,
397 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
398 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
399 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
403 /* The "default" register map used in 32bit mode. */
405 int const dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
407 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
408 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
409 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
410 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
411 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
412 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
413 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
416 static int x86_64_int_parameter_registers
[6] = {5 /*RDI*/, 4 /*RSI*/,
417 1 /*RDX*/, 2 /*RCX*/,
418 FIRST_REX_INT_REG
/*R8 */,
419 FIRST_REX_INT_REG
+ 1 /*R9 */};
420 static int x86_64_int_return_registers
[4] = {0 /*RAX*/, 1 /*RDI*/, 5, 4};
422 /* The "default" register map used in 64bit mode. */
423 int const dbx64_register_map
[FIRST_PSEUDO_REGISTER
] =
425 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
426 33, 34, 35, 36, 37, 38, 39, 40 /* fp regs */
427 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
428 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
429 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
430 8,9,10,11,12,13,14,15, /* extended integer registers */
431 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
434 /* Define the register numbers to be used in Dwarf debugging information.
435 The SVR4 reference port C compiler uses the following register numbers
436 in its Dwarf output code:
437 0 for %eax (gcc regno = 0)
438 1 for %ecx (gcc regno = 2)
439 2 for %edx (gcc regno = 1)
440 3 for %ebx (gcc regno = 3)
441 4 for %esp (gcc regno = 7)
442 5 for %ebp (gcc regno = 6)
443 6 for %esi (gcc regno = 4)
444 7 for %edi (gcc regno = 5)
445 The following three DWARF register numbers are never generated by
446 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
447 believes these numbers have these meanings.
448 8 for %eip (no gcc equivalent)
449 9 for %eflags (gcc regno = 17)
450 10 for %trapno (no gcc equivalent)
451 It is not at all clear how we should number the FP stack registers
452 for the x86 architecture. If the version of SDB on x86/svr4 were
453 a bit less brain dead with respect to floating-point then we would
454 have a precedent to follow with respect to DWARF register numbers
455 for x86 FP registers, but the SDB on x86/svr4 is so completely
456 broken with respect to FP registers that it is hardly worth thinking
457 of it as something to strive for compatibility with.
458 The version of x86/svr4 SDB I have at the moment does (partially)
459 seem to believe that DWARF register number 11 is associated with
460 the x86 register %st(0), but that's about all. Higher DWARF
461 register numbers don't seem to be associated with anything in
462 particular, and even for DWARF regno 11, SDB only seems to under-
463 stand that it should say that a variable lives in %st(0) (when
464 asked via an `=' command) if we said it was in DWARF regno 11,
465 but SDB still prints garbage when asked for the value of the
466 variable in question (via a `/' command).
467 (Also note that the labels SDB prints for various FP stack regs
468 when doing an `x' command are all wrong.)
469 Note that these problems generally don't affect the native SVR4
470 C compiler because it doesn't allow the use of -O with -g and
471 because when it is *not* optimizing, it allocates a memory
472 location for each floating-point variable, and the memory
473 location is what gets described in the DWARF AT_location
474 attribute for the variable in question.
475 Regardless of the severe mental illness of the x86/svr4 SDB, we
476 do something sensible here and we use the following DWARF
477 register numbers. Note that these are all stack-top-relative
479 11 for %st(0) (gcc regno = 8)
480 12 for %st(1) (gcc regno = 9)
481 13 for %st(2) (gcc regno = 10)
482 14 for %st(3) (gcc regno = 11)
483 15 for %st(4) (gcc regno = 12)
484 16 for %st(5) (gcc regno = 13)
485 17 for %st(6) (gcc regno = 14)
486 18 for %st(7) (gcc regno = 15)
488 int const svr4_dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
490 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
491 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
492 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
493 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
494 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
495 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded integer registers */
496 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded SSE registers */
499 /* Test and compare insns in i386.md store the information needed to
500 generate branch and scc insns here. */
502 struct rtx_def
*ix86_compare_op0
= NULL_RTX
;
503 struct rtx_def
*ix86_compare_op1
= NULL_RTX
;
505 #define MAX_386_STACK_LOCALS 3
506 /* Size of the register save area. */
507 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
509 /* Define the structure for the machine field in struct function. */
510 struct machine_function
512 rtx stack_locals
[(int) MAX_MACHINE_MODE
][MAX_386_STACK_LOCALS
];
513 int save_varrargs_registers
;
514 int accesses_prev_frame
;
517 #define ix86_stack_locals (cfun->machine->stack_locals)
518 #define ix86_save_varrargs_registers (cfun->machine->save_varrargs_registers)
520 /* Structure describing stack frame layout.
521 Stack grows downward:
527 saved frame pointer if frame_pointer_needed
528 <- HARD_FRAME_POINTER
534 > to_allocate <- FRAME_POINTER
546 int outgoing_arguments_size
;
549 HOST_WIDE_INT to_allocate
;
550 /* The offsets relative to ARG_POINTER. */
551 HOST_WIDE_INT frame_pointer_offset
;
552 HOST_WIDE_INT hard_frame_pointer_offset
;
553 HOST_WIDE_INT stack_pointer_offset
;
556 /* Code model option as passed by user. */
557 const char *ix86_cmodel_string
;
559 enum cmodel ix86_cmodel
;
561 /* which cpu are we scheduling for */
562 enum processor_type ix86_cpu
;
564 /* which instruction set architecture to use. */
567 /* Strings to hold which cpu and instruction set architecture to use. */
568 const char *ix86_cpu_string
; /* for -mcpu=<xxx> */
569 const char *ix86_arch_string
; /* for -march=<xxx> */
571 /* # of registers to use to pass arguments. */
572 const char *ix86_regparm_string
;
574 /* ix86_regparm_string as a number */
577 /* Alignment to use for loops and jumps: */
579 /* Power of two alignment for loops. */
580 const char *ix86_align_loops_string
;
582 /* Power of two alignment for non-loop jumps. */
583 const char *ix86_align_jumps_string
;
585 /* Power of two alignment for stack boundary in bytes. */
586 const char *ix86_preferred_stack_boundary_string
;
588 /* Preferred alignment for stack boundary in bits. */
589 int ix86_preferred_stack_boundary
;
591 /* Values 1-5: see jump.c */
592 int ix86_branch_cost
;
593 const char *ix86_branch_cost_string
;
595 /* Power of two alignment for functions. */
596 const char *ix86_align_funcs_string
;
598 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
599 static char internal_label_prefix
[16];
600 static int internal_label_prefix_len
;
602 static int local_symbolic_operand
PARAMS ((rtx
, enum machine_mode
));
603 static void output_pic_addr_const
PARAMS ((FILE *, rtx
, int));
604 static void put_condition_code
PARAMS ((enum rtx_code
, enum machine_mode
,
606 static rtx ix86_expand_int_compare
PARAMS ((enum rtx_code
, rtx
, rtx
));
607 static enum rtx_code ix86_prepare_fp_compare_args
PARAMS ((enum rtx_code
,
609 static rtx gen_push
PARAMS ((rtx
));
610 static int memory_address_length
PARAMS ((rtx addr
));
611 static int ix86_flags_dependant
PARAMS ((rtx
, rtx
, enum attr_type
));
612 static int ix86_agi_dependant
PARAMS ((rtx
, rtx
, enum attr_type
));
613 static int ix86_safe_length
PARAMS ((rtx
));
614 static enum attr_memory ix86_safe_memory
PARAMS ((rtx
));
615 static enum attr_pent_pair ix86_safe_pent_pair
PARAMS ((rtx
));
616 static enum attr_ppro_uops ix86_safe_ppro_uops
PARAMS ((rtx
));
617 static void ix86_dump_ppro_packet
PARAMS ((FILE *));
618 static void ix86_reorder_insn
PARAMS ((rtx
*, rtx
*));
619 static rtx
* ix86_pent_find_pair
PARAMS ((rtx
*, rtx
*, enum attr_pent_pair
,
621 static void ix86_init_machine_status
PARAMS ((struct function
*));
622 static void ix86_mark_machine_status
PARAMS ((struct function
*));
623 static void ix86_free_machine_status
PARAMS ((struct function
*));
624 static int ix86_split_to_parts
PARAMS ((rtx
, rtx
*, enum machine_mode
));
625 static int ix86_safe_length_prefix
PARAMS ((rtx
));
626 static int ix86_nsaved_regs
PARAMS((void));
627 static void ix86_emit_save_regs
PARAMS((void));
628 static void ix86_emit_save_regs_using_mov
PARAMS ((rtx
, HOST_WIDE_INT
));
629 static void ix86_emit_restore_regs_using_mov
PARAMS ((rtx
, int, int));
630 static void ix86_set_move_mem_attrs_1
PARAMS ((rtx
, rtx
, rtx
, rtx
, rtx
));
631 static void ix86_sched_reorder_pentium
PARAMS((rtx
*, rtx
*));
632 static void ix86_sched_reorder_ppro
PARAMS((rtx
*, rtx
*));
633 static HOST_WIDE_INT ix86_GOT_alias_set
PARAMS ((void));
634 static void ix86_adjust_counter
PARAMS ((rtx
, HOST_WIDE_INT
));
635 static rtx ix86_expand_aligntest
PARAMS ((rtx
, int));
636 static void ix86_expand_strlensi_unroll_1
PARAMS ((rtx
, rtx
));
637 static int ix86_issue_rate
PARAMS ((void));
638 static int ix86_adjust_cost
PARAMS ((rtx
, rtx
, rtx
, int));
639 static void ix86_sched_init
PARAMS ((FILE *, int, int));
640 static int ix86_sched_reorder
PARAMS ((FILE *, int, rtx
*, int *, int));
641 static int ix86_variable_issue
PARAMS ((FILE *, int, rtx
, int));
645 rtx base
, index
, disp
;
649 static int ix86_decompose_address
PARAMS ((rtx
, struct ix86_address
*));
651 struct builtin_description
;
652 static rtx ix86_expand_sse_comi
PARAMS ((const struct builtin_description
*,
654 static rtx ix86_expand_sse_compare
PARAMS ((const struct builtin_description
*,
656 static rtx ix86_expand_unop1_builtin
PARAMS ((enum insn_code
, tree
, rtx
));
657 static rtx ix86_expand_unop_builtin
PARAMS ((enum insn_code
, tree
, rtx
, int));
658 static rtx ix86_expand_binop_builtin
PARAMS ((enum insn_code
, tree
, rtx
));
659 static rtx ix86_expand_store_builtin
PARAMS ((enum insn_code
, tree
, int));
660 static rtx safe_vector_operand
PARAMS ((rtx
, enum machine_mode
));
661 static enum rtx_code ix86_fp_compare_code_to_integer
PARAMS ((enum rtx_code
));
662 static void ix86_fp_comparison_codes
PARAMS ((enum rtx_code code
,
666 static rtx ix86_expand_fp_compare
PARAMS ((enum rtx_code
, rtx
, rtx
, rtx
,
668 static int ix86_fp_comparison_arithmetics_cost
PARAMS ((enum rtx_code code
));
669 static int ix86_fp_comparison_fcomi_cost
PARAMS ((enum rtx_code code
));
670 static int ix86_fp_comparison_sahf_cost
PARAMS ((enum rtx_code code
));
671 static int ix86_fp_comparison_cost
PARAMS ((enum rtx_code code
));
672 static int ix86_save_reg
PARAMS ((int, int));
673 static void ix86_compute_frame_layout
PARAMS ((struct ix86_frame
*));
674 static int ix86_comp_type_attributes
PARAMS ((tree
, tree
));
675 const struct attribute_spec ix86_attribute_table
[];
676 static tree ix86_handle_cdecl_attribute
PARAMS ((tree
*, tree
, tree
, int, bool *));
677 static tree ix86_handle_regparm_attribute
PARAMS ((tree
*, tree
, tree
, int, bool *));
679 #ifdef DO_GLOBAL_CTORS_BODY
680 static void ix86_svr3_asm_out_constructor
PARAMS ((rtx
, int));
682 #if defined(TARGET_ELF) && defined(TARGET_COFF)
683 static void sco_asm_named_section
PARAMS ((const char *, unsigned int));
684 static void sco_asm_out_constructor
PARAMS ((rtx
, int));
686 /* Register class used for passing given 64bit part of the argument.
687 These represent classes as documented by the PS ABI, with the exception
688 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
689 use SF or DFmode move instead of DImode to avoid reformating penalties.
691 Similary we play games with INTEGERSI_CLASS to use cheaper SImode moves
692 whenever possible (upper half does contain padding).
694 enum x86_64_reg_class
697 X86_64_INTEGER_CLASS
,
698 X86_64_INTEGERSI_CLASS
,
707 const char * const x86_64_reg_class_name
[] =
708 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
710 #define MAX_CLASSES 4
711 static int classify_argument
PARAMS ((enum machine_mode
, tree
,
712 enum x86_64_reg_class
[MAX_CLASSES
],
714 static int examine_argument
PARAMS ((enum machine_mode
, tree
, int, int *,
716 static rtx construct_container
PARAMS ((enum machine_mode
, tree
, int, int, int,
718 static enum x86_64_reg_class merge_classes
PARAMS ((enum x86_64_reg_class
,
719 enum x86_64_reg_class
));
721 /* Initialize the GCC target structure. */
722 #undef TARGET_ATTRIBUTE_TABLE
723 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
724 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
725 # undef TARGET_MERGE_DECL_ATTRIBUTES
726 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
729 #undef TARGET_COMP_TYPE_ATTRIBUTES
730 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
732 #undef TARGET_INIT_BUILTINS
733 #define TARGET_INIT_BUILTINS ix86_init_builtins
735 #undef TARGET_EXPAND_BUILTIN
736 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
738 #if defined (OSF_OS) || defined (TARGET_OSF1ELF)
739 static void ix86_osf_output_function_prologue
PARAMS ((FILE *,
741 # undef TARGET_ASM_FUNCTION_PROLOGUE
742 # define TARGET_ASM_FUNCTION_PROLOGUE ix86_osf_output_function_prologue
745 #undef TARGET_ASM_OPEN_PAREN
746 #define TARGET_ASM_OPEN_PAREN ""
747 #undef TARGET_ASM_CLOSE_PAREN
748 #define TARGET_ASM_CLOSE_PAREN ""
750 #undef TARGET_SCHED_ADJUST_COST
751 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
752 #undef TARGET_SCHED_ISSUE_RATE
753 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
754 #undef TARGET_SCHED_VARIABLE_ISSUE
755 #define TARGET_SCHED_VARIABLE_ISSUE ix86_variable_issue
756 #undef TARGET_SCHED_INIT
757 #define TARGET_SCHED_INIT ix86_sched_init
758 #undef TARGET_SCHED_REORDER
759 #define TARGET_SCHED_REORDER ix86_sched_reorder
761 struct gcc_target targetm
= TARGET_INITIALIZER
;
763 /* Sometimes certain combinations of command options do not make
764 sense on a particular target machine. You can define a macro
765 `OVERRIDE_OPTIONS' to take account of this. This macro, if
766 defined, is executed once just after all the command options have
769 Don't use this macro to turn on various extra optimizations for
770 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
776 /* Comes from final.c -- no real reason to change it. */
777 #define MAX_CODE_ALIGN 16
781 const struct processor_costs
*cost
; /* Processor costs */
782 const int target_enable
; /* Target flags to enable. */
783 const int target_disable
; /* Target flags to disable. */
784 const int align_loop
; /* Default alignments. */
785 const int align_jump
;
786 const int align_func
;
787 const int branch_cost
;
789 const processor_target_table
[PROCESSOR_max
] =
791 {&i386_cost
, 0, 0, 2, 2, 2, 1},
792 {&i486_cost
, 0, 0, 4, 4, 4, 1},
793 {&pentium_cost
, 0, 0, -4, -4, -4, 1},
794 {&pentiumpro_cost
, 0, 0, 4, -4, 4, 1},
795 {&k6_cost
, 0, 0, -5, -5, 4, 1},
796 {&athlon_cost
, 0, 0, 4, -4, 4, 1},
797 {&pentium4_cost
, 0, 0, 2, 2, 2, 1}
802 const char *const name
; /* processor name or nickname. */
803 const enum processor_type processor
;
805 const processor_alias_table
[] =
807 {"i386", PROCESSOR_I386
},
808 {"i486", PROCESSOR_I486
},
809 {"i586", PROCESSOR_PENTIUM
},
810 {"pentium", PROCESSOR_PENTIUM
},
811 {"i686", PROCESSOR_PENTIUMPRO
},
812 {"pentiumpro", PROCESSOR_PENTIUMPRO
},
813 {"k6", PROCESSOR_K6
},
814 {"athlon", PROCESSOR_ATHLON
},
815 {"pentium4", PROCESSOR_PENTIUM4
},
818 int const pta_size
= sizeof (processor_alias_table
) / sizeof (struct pta
);
820 #ifdef SUBTARGET_OVERRIDE_OPTIONS
821 SUBTARGET_OVERRIDE_OPTIONS
;
824 ix86_arch
= PROCESSOR_I386
;
825 ix86_cpu
= (enum processor_type
) TARGET_CPU_DEFAULT
;
827 if (ix86_cmodel_string
!= 0)
829 if (!strcmp (ix86_cmodel_string
, "small"))
830 ix86_cmodel
= flag_pic
? CM_SMALL_PIC
: CM_SMALL
;
832 sorry ("Code model %s not supported in PIC mode", ix86_cmodel_string
);
833 else if (!strcmp (ix86_cmodel_string
, "32"))
835 else if (!strcmp (ix86_cmodel_string
, "kernel") && !flag_pic
)
836 ix86_cmodel
= CM_KERNEL
;
837 else if (!strcmp (ix86_cmodel_string
, "medium") && !flag_pic
)
838 ix86_cmodel
= CM_MEDIUM
;
839 else if (!strcmp (ix86_cmodel_string
, "large") && !flag_pic
)
840 ix86_cmodel
= CM_LARGE
;
842 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string
);
848 ix86_cmodel
= flag_pic
? CM_SMALL_PIC
: CM_SMALL
;
850 if ((TARGET_64BIT
== 0) != (ix86_cmodel
== CM_32
))
851 error ("Code model `%s' not supported in the %s bit mode.",
852 ix86_cmodel_string
, TARGET_64BIT
? "64" : "32");
853 if (ix86_cmodel
== CM_LARGE
)
854 sorry ("Code model `large' not supported yet.");
855 if ((TARGET_64BIT
!= 0) != ((target_flags
& MASK_64BIT
) != 0))
856 sorry ("%i-bit mode not compiled in.",
857 (target_flags
& MASK_64BIT
) ? 64 : 32);
859 if (ix86_arch_string
!= 0)
861 for (i
= 0; i
< pta_size
; i
++)
862 if (! strcmp (ix86_arch_string
, processor_alias_table
[i
].name
))
864 ix86_arch
= processor_alias_table
[i
].processor
;
865 /* Default cpu tuning to the architecture. */
866 ix86_cpu
= ix86_arch
;
871 error ("bad value (%s) for -march= switch", ix86_arch_string
);
874 if (ix86_cpu_string
!= 0)
876 for (i
= 0; i
< pta_size
; i
++)
877 if (! strcmp (ix86_cpu_string
, processor_alias_table
[i
].name
))
879 ix86_cpu
= processor_alias_table
[i
].processor
;
883 error ("bad value (%s) for -mcpu= switch", ix86_cpu_string
);
887 ix86_cost
= &size_cost
;
889 ix86_cost
= processor_target_table
[ix86_cpu
].cost
;
890 target_flags
|= processor_target_table
[ix86_cpu
].target_enable
;
891 target_flags
&= ~processor_target_table
[ix86_cpu
].target_disable
;
893 /* Arrange to set up i386_stack_locals for all functions. */
894 init_machine_status
= ix86_init_machine_status
;
895 mark_machine_status
= ix86_mark_machine_status
;
896 free_machine_status
= ix86_free_machine_status
;
898 /* Validate -mregparm= value. */
899 if (ix86_regparm_string
)
901 i
= atoi (ix86_regparm_string
);
902 if (i
< 0 || i
> REGPARM_MAX
)
903 error ("-mregparm=%d is not between 0 and %d", i
, REGPARM_MAX
);
909 ix86_regparm
= REGPARM_MAX
;
911 /* If the user has provided any of the -malign-* options,
912 warn and use that value only if -falign-* is not set.
913 Remove this code in GCC 3.2 or later. */
914 if (ix86_align_loops_string
)
916 warning ("-malign-loops is obsolete, use -falign-loops");
917 if (align_loops
== 0)
919 i
= atoi (ix86_align_loops_string
);
920 if (i
< 0 || i
> MAX_CODE_ALIGN
)
921 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
923 align_loops
= 1 << i
;
927 if (ix86_align_jumps_string
)
929 warning ("-malign-jumps is obsolete, use -falign-jumps");
930 if (align_jumps
== 0)
932 i
= atoi (ix86_align_jumps_string
);
933 if (i
< 0 || i
> MAX_CODE_ALIGN
)
934 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
936 align_jumps
= 1 << i
;
940 if (ix86_align_funcs_string
)
942 warning ("-malign-functions is obsolete, use -falign-functions");
943 if (align_functions
== 0)
945 i
= atoi (ix86_align_funcs_string
);
946 if (i
< 0 || i
> MAX_CODE_ALIGN
)
947 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
949 align_functions
= 1 << i
;
953 /* Default align_* from the processor table. */
954 #define abs(n) (n < 0 ? -n : n)
955 if (align_loops
== 0)
956 align_loops
= 1 << abs (processor_target_table
[ix86_cpu
].align_loop
);
957 if (align_jumps
== 0)
958 align_jumps
= 1 << abs (processor_target_table
[ix86_cpu
].align_jump
);
959 if (align_functions
== 0)
960 align_functions
= 1 << abs (processor_target_table
[ix86_cpu
].align_func
);
962 /* Validate -mpreferred-stack-boundary= value, or provide default.
963 The default of 128 bits is for Pentium III's SSE __m128. */
964 ix86_preferred_stack_boundary
= 128;
965 if (ix86_preferred_stack_boundary_string
)
967 i
= atoi (ix86_preferred_stack_boundary_string
);
968 if (i
< (TARGET_64BIT
? 3 : 2) || i
> 31)
969 error ("-mpreferred-stack-boundary=%d is not between %d and 31", i
,
970 TARGET_64BIT
? 3 : 2);
972 ix86_preferred_stack_boundary
= (1 << i
) * BITS_PER_UNIT
;
975 /* Validate -mbranch-cost= value, or provide default. */
976 ix86_branch_cost
= processor_target_table
[ix86_cpu
].branch_cost
;
977 if (ix86_branch_cost_string
)
979 i
= atoi (ix86_branch_cost_string
);
981 error ("-mbranch-cost=%d is not between 0 and 5", i
);
983 ix86_branch_cost
= i
;
986 /* Keep nonleaf frame pointers. */
987 if (TARGET_OMIT_LEAF_FRAME_POINTER
)
988 flag_omit_frame_pointer
= 1;
990 /* If we're doing fast math, we don't care about comparison order
991 wrt NaNs. This lets us use a shorter comparison sequence. */
992 if (flag_unsafe_math_optimizations
)
993 target_flags
&= ~MASK_IEEE_FP
;
997 if (TARGET_ALIGN_DOUBLE
)
998 error ("-malign-double makes no sense in the 64bit mode.");
1000 error ("-mrtd calling convention not supported in the 64bit mode.");
1001 /* Enable by default the SSE and MMX builtins. */
1002 target_flags
|= MASK_SSE2
| MASK_SSE
| MASK_MMX
| MASK_128BIT_LONG_DOUBLE
;
1005 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1008 target_flags
|= MASK_MMX
;
1010 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1013 target_flags
|= MASK_MMX
;
1014 /* If we are targetting the Athlon architecture, enable the 3Dnow/MMX
1015 extensions it adds. */
1016 if (x86_3dnow_a
& (1 << ix86_arch
))
1017 target_flags
|= MASK_3DNOW_A
;
1019 if ((x86_accumulate_outgoing_args
& CPUMASK
)
1020 && !(target_flags
& MASK_NO_ACCUMULATE_OUTGOING_ARGS
)
1022 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
1024 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1027 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix
, "LX", 0);
1028 p
= strchr (internal_label_prefix
, 'X');
1029 internal_label_prefix_len
= p
- internal_label_prefix
;
1035 optimization_options (level
, size
)
1037 int size ATTRIBUTE_UNUSED
;
1039 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1040 make the problem with not enough registers even worse. */
1041 #ifdef INSN_SCHEDULING
1043 flag_schedule_insns
= 0;
1045 if (TARGET_64BIT
&& optimize
>= 1)
1046 flag_omit_frame_pointer
= 1;
1048 flag_pcc_struct_return
= 0;
1051 /* Table of valid machine attributes. */
1052 const struct attribute_spec ix86_attribute_table
[] =
1054 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1055 /* Stdcall attribute says callee is responsible for popping arguments
1056 if they are not variable. */
1057 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute
},
1058 /* Cdecl attribute says the callee is a normal C declaration */
1059 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute
},
1060 /* Regparm attribute specifies how many integer arguments are to be
1061 passed in registers. */
1062 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute
},
1063 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
1064 { "dllimport", 1, 1, false, false, false, ix86_handle_dll_attribute
},
1065 { "dllexport", 1, 1, false, false, false, ix86_handle_dll_attribute
},
1066 { "shared", 1, 1, true, false, false, ix86_handle_shared_attribute
},
1068 { NULL
, 0, 0, false, false, false, NULL
}
1071 /* Handle a "cdecl" or "stdcall" attribute;
1072 arguments as in struct attribute_spec.handler. */
1074 ix86_handle_cdecl_attribute (node
, name
, args
, flags
, no_add_attrs
)
1077 tree args ATTRIBUTE_UNUSED
;
1078 int flags ATTRIBUTE_UNUSED
;
1081 if (TREE_CODE (*node
) != FUNCTION_TYPE
1082 && TREE_CODE (*node
) != METHOD_TYPE
1083 && TREE_CODE (*node
) != FIELD_DECL
1084 && TREE_CODE (*node
) != TYPE_DECL
)
1086 warning ("`%s' attribute only applies to functions",
1087 IDENTIFIER_POINTER (name
));
1088 *no_add_attrs
= true;
1093 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name
));
1094 *no_add_attrs
= true;
1100 /* Handle a "regparm" attribute;
1101 arguments as in struct attribute_spec.handler. */
1103 ix86_handle_regparm_attribute (node
, name
, args
, flags
, no_add_attrs
)
1107 int flags ATTRIBUTE_UNUSED
;
1110 if (TREE_CODE (*node
) != FUNCTION_TYPE
1111 && TREE_CODE (*node
) != METHOD_TYPE
1112 && TREE_CODE (*node
) != FIELD_DECL
1113 && TREE_CODE (*node
) != TYPE_DECL
)
1115 warning ("`%s' attribute only applies to functions",
1116 IDENTIFIER_POINTER (name
));
1117 *no_add_attrs
= true;
1123 cst
= TREE_VALUE (args
);
1124 if (TREE_CODE (cst
) != INTEGER_CST
)
1126 warning ("`%s' attribute requires an integer constant argument",
1127 IDENTIFIER_POINTER (name
));
1128 *no_add_attrs
= true;
1130 else if (compare_tree_int (cst
, REGPARM_MAX
) > 0)
1132 warning ("argument to `%s' attribute larger than %d",
1133 IDENTIFIER_POINTER (name
), REGPARM_MAX
);
1134 *no_add_attrs
= true;
1141 #if defined (OSF_OS) || defined (TARGET_OSF1ELF)
1143 /* Generate the assembly code for function entry. FILE is a stdio
1144 stream to output the code to. SIZE is an int: how many units of
1145 temporary storage to allocate.
1147 Refer to the array `regs_ever_live' to determine which registers to
1148 save; `regs_ever_live[I]' is nonzero if register number I is ever
1149 used in the function. This function is responsible for knowing
1150 which registers should not be saved even if used.
1152 We override it here to allow for the new profiling code to go before
1153 the prologue and the old mcount code to go after the prologue (and
1154 after %ebx has been set up for ELF shared library support). */
1157 ix86_osf_output_function_prologue (file
, size
)
1162 char *lprefix
= LPREFIX
;
1163 int labelno
= profile_label_no
;
1167 if (TARGET_UNDERSCORES
)
1170 if (profile_flag
&& OSF_PROFILE_BEFORE_PROLOGUE
)
1172 if (!flag_pic
&& !HALF_PIC_P ())
1174 fprintf (file
, "\tmovl $%sP%d,%%edx\n", lprefix
, labelno
);
1175 fprintf (file
, "\tcall *%s_mcount_ptr\n", prefix
);
1178 else if (HALF_PIC_P ())
1182 HALF_PIC_EXTERNAL ("_mcount_ptr");
1183 symref
= HALF_PIC_PTR (gen_rtx_SYMBOL_REF (Pmode
,
1186 fprintf (file
, "\tmovl $%sP%d,%%edx\n", lprefix
, labelno
);
1187 fprintf (file
, "\tmovl %s%s,%%eax\n", prefix
,
1189 fprintf (file
, "\tcall *(%%eax)\n");
1194 static int call_no
= 0;
1196 fprintf (file
, "\tcall %sPc%d\n", lprefix
, call_no
);
1197 fprintf (file
, "%sPc%d:\tpopl %%eax\n", lprefix
, call_no
);
1198 fprintf (file
, "\taddl $_GLOBAL_OFFSET_TABLE_+[.-%sPc%d],%%eax\n",
1199 lprefix
, call_no
++);
1200 fprintf (file
, "\tleal %sP%d@GOTOFF(%%eax),%%edx\n",
1202 fprintf (file
, "\tmovl %s_mcount_ptr@GOT(%%eax),%%eax\n",
1204 fprintf (file
, "\tcall *(%%eax)\n");
1210 if (profile_flag
&& OSF_PROFILE_BEFORE_PROLOGUE
)
1214 fprintf (file
, "\tmovl $%sP%d,%%edx\n", lprefix
, labelno
);
1215 fprintf (file
, "\tcall *%s_mcount_ptr\n", prefix
);
1220 static int call_no
= 0;
1222 fprintf (file
, "\tcall %sPc%d\n", lprefix
, call_no
);
1223 fprintf (file
, "%sPc%d:\tpopl %%eax\n", lprefix
, call_no
);
1224 fprintf (file
, "\taddl $_GLOBAL_OFFSET_TABLE_+[.-%sPc%d],%%eax\n",
1225 lprefix
, call_no
++);
1226 fprintf (file
, "\tleal %sP%d@GOTOFF(%%eax),%%edx\n",
1228 fprintf (file
, "\tmovl %s_mcount_ptr@GOT(%%eax),%%eax\n",
1230 fprintf (file
, "\tcall *(%%eax)\n");
1233 #endif /* !OSF_OS */
1235 function_prologue (file
, size
);
1238 #endif /* OSF_OS || TARGET_OSF1ELF */
1240 /* Return 0 if the attributes for two types are incompatible, 1 if they
1241 are compatible, and 2 if they are nearly compatible (which causes a
1242 warning to be generated). */
1245 ix86_comp_type_attributes (type1
, type2
)
1249 /* Check for mismatch of non-default calling convention. */
1250 const char *const rtdstr
= TARGET_RTD
? "cdecl" : "stdcall";
1252 if (TREE_CODE (type1
) != FUNCTION_TYPE
)
1255 /* Check for mismatched return types (cdecl vs stdcall). */
1256 if (!lookup_attribute (rtdstr
, TYPE_ATTRIBUTES (type1
))
1257 != !lookup_attribute (rtdstr
, TYPE_ATTRIBUTES (type2
)))
1262 /* Value is the number of bytes of arguments automatically
1263 popped when returning from a subroutine call.
1264 FUNDECL is the declaration node of the function (as a tree),
1265 FUNTYPE is the data type of the function (as a tree),
1266 or for a library call it is an identifier node for the subroutine name.
1267 SIZE is the number of bytes of arguments passed on the stack.
1269 On the 80386, the RTD insn may be used to pop them if the number
1270 of args is fixed, but if the number is variable then the caller
1271 must pop them all. RTD can't be used for library calls now
1272 because the library is compiled with the Unix compiler.
1273 Use of RTD is a selectable option, since it is incompatible with
1274 standard Unix calling sequences. If the option is not selected,
1275 the caller must always pop the args.
1277 The attribute stdcall is equivalent to RTD on a per module basis. */
1280 ix86_return_pops_args (fundecl
, funtype
, size
)
1285 int rtd
= TARGET_RTD
&& (!fundecl
|| TREE_CODE (fundecl
) != IDENTIFIER_NODE
);
1287 /* Cdecl functions override -mrtd, and never pop the stack. */
1288 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype
))) {
1290 /* Stdcall functions will pop the stack if not variable args. */
1291 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype
)))
1295 && (TYPE_ARG_TYPES (funtype
) == NULL_TREE
1296 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype
)))
1297 == void_type_node
)))
1301 /* Lose any fake structure return argument. */
1302 if (aggregate_value_p (TREE_TYPE (funtype
))
1304 return GET_MODE_SIZE (Pmode
);
1309 /* Argument support functions. */
1311 /* Return true when register may be used to pass function parameters. */
1313 ix86_function_arg_regno_p (regno
)
1318 return regno
< REGPARM_MAX
|| (TARGET_SSE
&& SSE_REGNO_P (regno
));
1319 if (SSE_REGNO_P (regno
) && TARGET_SSE
)
1321 /* RAX is used as hidden argument to va_arg functions. */
1324 for (i
= 0; i
< REGPARM_MAX
; i
++)
1325 if (regno
== x86_64_int_parameter_registers
[i
])
1330 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1331 for a call to a function whose data type is FNTYPE.
1332 For a library call, FNTYPE is 0. */
1335 init_cumulative_args (cum
, fntype
, libname
)
1336 CUMULATIVE_ARGS
*cum
; /* Argument info to initialize */
1337 tree fntype
; /* tree ptr for function decl */
1338 rtx libname
; /* SYMBOL_REF of library name or 0 */
1340 static CUMULATIVE_ARGS zero_cum
;
1341 tree param
, next_param
;
1343 if (TARGET_DEBUG_ARG
)
1345 fprintf (stderr
, "\ninit_cumulative_args (");
1347 fprintf (stderr
, "fntype code = %s, ret code = %s",
1348 tree_code_name
[(int) TREE_CODE (fntype
)],
1349 tree_code_name
[(int) TREE_CODE (TREE_TYPE (fntype
))]);
1351 fprintf (stderr
, "no fntype");
1354 fprintf (stderr
, ", libname = %s", XSTR (libname
, 0));
1359 /* Set up the number of registers to use for passing arguments. */
1360 cum
->nregs
= ix86_regparm
;
1361 cum
->sse_nregs
= SSE_REGPARM_MAX
;
1362 if (fntype
&& !TARGET_64BIT
)
1364 tree attr
= lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype
));
1367 cum
->nregs
= TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
)));
1369 cum
->maybe_vaarg
= false;
1371 /* Determine if this function has variable arguments. This is
1372 indicated by the last argument being 'void_type_mode' if there
1373 are no variable arguments. If there are variable arguments, then
1374 we won't pass anything in registers */
1378 for (param
= (fntype
) ? TYPE_ARG_TYPES (fntype
) : 0;
1379 param
!= 0; param
= next_param
)
1381 next_param
= TREE_CHAIN (param
);
1382 if (next_param
== 0 && TREE_VALUE (param
) != void_type_node
)
1386 cum
->maybe_vaarg
= true;
1390 if ((!fntype
&& !libname
)
1391 || (fntype
&& !TYPE_ARG_TYPES (fntype
)))
1392 cum
->maybe_vaarg
= 1;
1394 if (TARGET_DEBUG_ARG
)
1395 fprintf (stderr
, ", nregs=%d )\n", cum
->nregs
);
1400 /* x86-64 register passing impleemntation. See x86-64 ABI for details. Goal
1401 of this code is to classify each 8bytes of incomming argument by the register
1402 class and assign registers accordingly. */
1404 /* Return the union class of CLASS1 and CLASS2.
1405 See the x86-64 PS ABI for details. */
1407 static enum x86_64_reg_class
1408 merge_classes (class1
, class2
)
1409 enum x86_64_reg_class class1
, class2
;
1411 /* Rule #1: If both classes are equal, this is the resulting class. */
1412 if (class1
== class2
)
1415 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
1417 if (class1
== X86_64_NO_CLASS
)
1419 if (class2
== X86_64_NO_CLASS
)
1422 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
1423 if (class1
== X86_64_MEMORY_CLASS
|| class2
== X86_64_MEMORY_CLASS
)
1424 return X86_64_MEMORY_CLASS
;
1426 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
1427 if ((class1
== X86_64_INTEGERSI_CLASS
&& class2
== X86_64_SSESF_CLASS
)
1428 || (class2
== X86_64_INTEGERSI_CLASS
&& class1
== X86_64_SSESF_CLASS
))
1429 return X86_64_INTEGERSI_CLASS
;
1430 if (class1
== X86_64_INTEGER_CLASS
|| class1
== X86_64_INTEGERSI_CLASS
1431 || class2
== X86_64_INTEGER_CLASS
|| class2
== X86_64_INTEGERSI_CLASS
)
1432 return X86_64_INTEGER_CLASS
;
1434 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
1435 if (class1
== X86_64_X87_CLASS
|| class1
== X86_64_X87UP_CLASS
1436 || class2
== X86_64_X87_CLASS
|| class2
== X86_64_X87UP_CLASS
)
1437 return X86_64_MEMORY_CLASS
;
1439 /* Rule #6: Otherwise class SSE is used. */
1440 return X86_64_SSE_CLASS
;
1443 /* Classify the argument of type TYPE and mode MODE.
1444 CLASSES will be filled by the register class used to pass each word
1445 of the operand. The number of words is returned. In case the parameter
1446 should be passed in memory, 0 is returned. As a special case for zero
1447 sized containers, classes[0] will be NO_CLASS and 1 is returned.
1449 BIT_OFFSET is used internally for handling records and specifies offset
1450 of the offset in bits modulo 256 to avoid overflow cases.
1452 See the x86-64 PS ABI for details.
1456 classify_argument (mode
, type
, classes
, bit_offset
)
1457 enum machine_mode mode
;
1459 enum x86_64_reg_class classes
[MAX_CLASSES
];
1463 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
1464 int words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
1466 if (type
&& AGGREGATE_TYPE_P (type
))
1470 enum x86_64_reg_class subclasses
[MAX_CLASSES
];
1472 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
1476 for (i
= 0; i
< words
; i
++)
1477 classes
[i
] = X86_64_NO_CLASS
;
1479 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
1480 signalize memory class, so handle it as special case. */
1483 classes
[0] = X86_64_NO_CLASS
;
1487 /* Classify each field of record and merge classes. */
1488 if (TREE_CODE (type
) == RECORD_TYPE
)
1490 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
1492 if (TREE_CODE (field
) == FIELD_DECL
)
1496 /* Bitfields are always classified as integer. Handle them
1497 early, since later code would consider them to be
1498 misaligned integers. */
1499 if (DECL_BIT_FIELD (field
))
1501 for (i
= int_bit_position (field
) / 8 / 8;
1502 i
< (int_bit_position (field
)
1503 + tree_low_cst (DECL_SIZE (field
), 0)
1506 merge_classes (X86_64_INTEGER_CLASS
,
1511 num
= classify_argument (TYPE_MODE (TREE_TYPE (field
)),
1512 TREE_TYPE (field
), subclasses
,
1513 (int_bit_position (field
)
1514 + bit_offset
) % 256);
1517 for (i
= 0; i
< num
; i
++)
1520 (int_bit_position (field
) + bit_offset
) / 8 / 8;
1522 merge_classes (subclasses
[i
], classes
[i
+ pos
]);
1528 /* Arrays are handled as small records. */
1529 else if (TREE_CODE (type
) == ARRAY_TYPE
)
1532 num
= classify_argument (TYPE_MODE (TREE_TYPE (type
)),
1533 TREE_TYPE (type
), subclasses
, bit_offset
);
1537 /* The partial classes are now full classes. */
1538 if (subclasses
[0] == X86_64_SSESF_CLASS
&& bytes
!= 4)
1539 subclasses
[0] = X86_64_SSE_CLASS
;
1540 if (subclasses
[0] == X86_64_INTEGERSI_CLASS
&& bytes
!= 4)
1541 subclasses
[0] = X86_64_INTEGER_CLASS
;
1543 for (i
= 0; i
< words
; i
++)
1544 classes
[i
] = subclasses
[i
% num
];
1546 /* Unions are similar to RECORD_TYPE but offset is always 0. */
1547 else if (TREE_CODE (type
) == UNION_TYPE
)
1549 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
1551 if (TREE_CODE (field
) == FIELD_DECL
)
1554 num
= classify_argument (TYPE_MODE (TREE_TYPE (field
)),
1555 TREE_TYPE (field
), subclasses
,
1559 for (i
= 0; i
< num
; i
++)
1560 classes
[i
] = merge_classes (subclasses
[i
], classes
[i
]);
1567 /* Final merger cleanup. */
1568 for (i
= 0; i
< words
; i
++)
1570 /* If one class is MEMORY, everything should be passed in
1572 if (classes
[i
] == X86_64_MEMORY_CLASS
)
1575 /* The X86_64_SSEUP_CLASS should be always preceeded by
1576 X86_64_SSE_CLASS. */
1577 if (classes
[i
] == X86_64_SSEUP_CLASS
1578 && (i
== 0 || classes
[i
- 1] != X86_64_SSE_CLASS
))
1579 classes
[i
] = X86_64_SSE_CLASS
;
1581 /* X86_64_X87UP_CLASS should be preceeded by X86_64_X87_CLASS. */
1582 if (classes
[i
] == X86_64_X87UP_CLASS
1583 && (i
== 0 || classes
[i
- 1] != X86_64_X87_CLASS
))
1584 classes
[i
] = X86_64_SSE_CLASS
;
1589 /* Compute alignment needed. We align all types to natural boundaries with
1590 exception of XFmode that is aligned to 64bits. */
1591 if (mode
!= VOIDmode
&& mode
!= BLKmode
)
1593 int mode_alignment
= GET_MODE_BITSIZE (mode
);
1596 mode_alignment
= 128;
1597 else if (mode
== XCmode
)
1598 mode_alignment
= 256;
1599 /* Missalignmed fields are always returned in memory. */
1600 if (bit_offset
% mode_alignment
)
1604 /* Classification of atomic types. */
1614 if (bit_offset
+ GET_MODE_BITSIZE (mode
) <= 32)
1615 classes
[0] = X86_64_INTEGERSI_CLASS
;
1617 classes
[0] = X86_64_INTEGER_CLASS
;
1621 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
1624 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
1625 classes
[2] = classes
[3] = X86_64_INTEGER_CLASS
;
1628 if (!(bit_offset
% 64))
1629 classes
[0] = X86_64_SSESF_CLASS
;
1631 classes
[0] = X86_64_SSE_CLASS
;
1634 classes
[0] = X86_64_SSEDF_CLASS
;
1637 classes
[0] = X86_64_X87_CLASS
;
1638 classes
[1] = X86_64_X87UP_CLASS
;
1641 classes
[0] = X86_64_X87_CLASS
;
1642 classes
[1] = X86_64_X87UP_CLASS
;
1643 classes
[2] = X86_64_X87_CLASS
;
1644 classes
[3] = X86_64_X87UP_CLASS
;
1647 classes
[0] = X86_64_SSEDF_CLASS
;
1648 classes
[1] = X86_64_SSEDF_CLASS
;
1651 classes
[0] = X86_64_SSE_CLASS
;
1660 /* Examine the argument and return set number of register required in each
1661 class. Return 0 ifif parameter should be passed in memory. */
1663 examine_argument (mode
, type
, in_return
, int_nregs
, sse_nregs
)
1664 enum machine_mode mode
;
1666 int *int_nregs
, *sse_nregs
;
1669 enum x86_64_reg_class
class[MAX_CLASSES
];
1670 int n
= classify_argument (mode
, type
, class, 0);
1676 for (n
--; n
>= 0; n
--)
1679 case X86_64_INTEGER_CLASS
:
1680 case X86_64_INTEGERSI_CLASS
:
1683 case X86_64_SSE_CLASS
:
1684 case X86_64_SSESF_CLASS
:
1685 case X86_64_SSEDF_CLASS
:
1688 case X86_64_NO_CLASS
:
1689 case X86_64_SSEUP_CLASS
:
1691 case X86_64_X87_CLASS
:
1692 case X86_64_X87UP_CLASS
:
1696 case X86_64_MEMORY_CLASS
:
1701 /* Construct container for the argument used by GCC interface. See
1702 FUNCTION_ARG for the detailed description. */
1704 construct_container (mode
, type
, in_return
, nintregs
, nsseregs
, intreg
, sse_regno
)
1705 enum machine_mode mode
;
1708 int nintregs
, nsseregs
;
1709 int *intreg
, sse_regno
;
1711 enum machine_mode tmpmode
;
1713 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
1714 enum x86_64_reg_class
class[MAX_CLASSES
];
1718 int needed_sseregs
, needed_intregs
;
1719 rtx exp
[MAX_CLASSES
];
1722 n
= classify_argument (mode
, type
, class, 0);
1723 if (TARGET_DEBUG_ARG
)
1726 fprintf (stderr
, "Memory class\n");
1729 fprintf (stderr
, "Classes:");
1730 for (i
= 0; i
< n
; i
++)
1732 fprintf (stderr
, " %s", x86_64_reg_class_name
[class[i
]]);
1734 fprintf (stderr
, "\n");
1739 if (!examine_argument (mode
, type
, in_return
, &needed_intregs
, &needed_sseregs
))
1741 if (needed_intregs
> nintregs
|| needed_sseregs
> nsseregs
)
1744 /* First construct simple cases. Avoid SCmode, since we want to use
1745 single register to pass this type. */
1746 if (n
== 1 && mode
!= SCmode
)
1749 case X86_64_INTEGER_CLASS
:
1750 case X86_64_INTEGERSI_CLASS
:
1751 return gen_rtx_REG (mode
, intreg
[0]);
1752 case X86_64_SSE_CLASS
:
1753 case X86_64_SSESF_CLASS
:
1754 case X86_64_SSEDF_CLASS
:
1755 return gen_rtx_REG (mode
, SSE_REGNO (sse_regno
));
1756 case X86_64_X87_CLASS
:
1757 return gen_rtx_REG (mode
, FIRST_STACK_REG
);
1758 case X86_64_NO_CLASS
:
1759 /* Zero sized array, struct or class. */
1764 if (n
== 2 && class[0] == X86_64_SSE_CLASS
&& class[1] == X86_64_SSEUP_CLASS
)
1765 return gen_rtx_REG (TImode
, SSE_REGNO (sse_regno
));
1767 && class[0] == X86_64_X87_CLASS
&& class[1] == X86_64_X87UP_CLASS
)
1768 return gen_rtx_REG (TFmode
, FIRST_STACK_REG
);
1769 if (n
== 2 && class[0] == X86_64_INTEGER_CLASS
1770 && class[1] == X86_64_INTEGER_CLASS
1771 && (mode
== CDImode
|| mode
== TImode
)
1772 && intreg
[0] + 1 == intreg
[1])
1773 return gen_rtx_REG (mode
, intreg
[0]);
1775 && class[0] == X86_64_X87_CLASS
&& class[1] == X86_64_X87UP_CLASS
1776 && class[2] == X86_64_X87_CLASS
&& class[3] == X86_64_X87UP_CLASS
)
1777 return gen_rtx_REG (TCmode
, FIRST_STACK_REG
);
1779 /* Otherwise figure out the entries of the PARALLEL. */
1780 for (i
= 0; i
< n
; i
++)
1784 case X86_64_NO_CLASS
:
1786 case X86_64_INTEGER_CLASS
:
1787 case X86_64_INTEGERSI_CLASS
:
1788 /* Merge TImodes on aligned occassions here too. */
1789 if (i
* 8 + 8 > bytes
)
1790 tmpmode
= mode_for_size ((bytes
- i
* 8) * BITS_PER_UNIT
, MODE_INT
, 0);
1791 else if (class[i
] == X86_64_INTEGERSI_CLASS
)
1795 /* We've requested 24 bytes we don't have mode for. Use DImode. */
1796 if (tmpmode
== BLKmode
)
1798 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
1799 gen_rtx_REG (tmpmode
, *intreg
),
1803 case X86_64_SSESF_CLASS
:
1804 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
1805 gen_rtx_REG (SFmode
,
1806 SSE_REGNO (sse_regno
)),
1810 case X86_64_SSEDF_CLASS
:
1811 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
1812 gen_rtx_REG (DFmode
,
1813 SSE_REGNO (sse_regno
)),
1817 case X86_64_SSE_CLASS
:
1818 if (i
< n
&& class[i
+ 1] == X86_64_SSEUP_CLASS
)
1819 tmpmode
= TImode
, i
++;
1822 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
1823 gen_rtx_REG (tmpmode
,
1824 SSE_REGNO (sse_regno
)),
1832 ret
= gen_rtx_PARALLEL (mode
, rtvec_alloc (nexps
));
1833 for (i
= 0; i
< nexps
; i
++)
1834 XVECEXP (ret
, 0, i
) = exp
[i
];
1838 /* Update the data in CUM to advance over an argument
1839 of mode MODE and data type TYPE.
1840 (TYPE is null for libcalls where that information may not be available.) */
1843 function_arg_advance (cum
, mode
, type
, named
)
1844 CUMULATIVE_ARGS
*cum
; /* current arg information */
1845 enum machine_mode mode
; /* current arg mode */
1846 tree type
; /* type of the argument or 0 if lib support */
1847 int named
; /* whether or not the argument was named */
1850 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
1851 int words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
1853 if (TARGET_DEBUG_ARG
)
1855 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
1856 words
, cum
->words
, cum
->nregs
, GET_MODE_NAME (mode
), named
);
1859 int int_nregs
, sse_nregs
;
1860 if (!examine_argument (mode
, type
, 0, &int_nregs
, &sse_nregs
))
1861 cum
->words
+= words
;
1862 else if (sse_nregs
<= cum
->sse_nregs
&& int_nregs
<= cum
->nregs
)
1864 cum
->nregs
-= int_nregs
;
1865 cum
->sse_nregs
-= sse_nregs
;
1866 cum
->regno
+= int_nregs
;
1867 cum
->sse_regno
+= sse_nregs
;
1870 cum
->words
+= words
;
1874 if (TARGET_SSE
&& mode
== TImode
)
1876 cum
->sse_words
+= words
;
1877 cum
->sse_nregs
-= 1;
1878 cum
->sse_regno
+= 1;
1879 if (cum
->sse_nregs
<= 0)
1887 cum
->words
+= words
;
1888 cum
->nregs
-= words
;
1889 cum
->regno
+= words
;
1891 if (cum
->nregs
<= 0)
1901 /* Define where to put the arguments to a function.
1902 Value is zero to push the argument on the stack,
1903 or a hard register in which to store the argument.
1905 MODE is the argument's machine mode.
1906 TYPE is the data type of the argument (as a tree).
1907 This is null for libcalls where that information may
1909 CUM is a variable of type CUMULATIVE_ARGS which gives info about
1910 the preceding args and about the function being called.
1911 NAMED is nonzero if this argument is a named parameter
1912 (otherwise it is an extra parameter matching an ellipsis). */
1915 function_arg (cum
, mode
, type
, named
)
1916 CUMULATIVE_ARGS
*cum
; /* current arg information */
1917 enum machine_mode mode
; /* current arg mode */
1918 tree type
; /* type of the argument or 0 if lib support */
1919 int named
; /* != 0 for normal args, == 0 for ... args */
1923 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
1924 int words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
1926 /* Handle an hidden AL argument containing number of registers for varargs
1927 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
1929 if (mode
== VOIDmode
)
1932 return GEN_INT (cum
->maybe_vaarg
1933 ? (cum
->sse_nregs
< 0
1941 ret
= construct_container (mode
, type
, 0, cum
->nregs
, cum
->sse_nregs
,
1942 &x86_64_int_parameter_registers
[cum
->regno
],
1947 /* For now, pass fp/complex values on the stack. */
1956 if (words
<= cum
->nregs
)
1957 ret
= gen_rtx_REG (mode
, cum
->regno
);
1961 ret
= gen_rtx_REG (mode
, cum
->sse_regno
);
1965 if (TARGET_DEBUG_ARG
)
1968 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d",
1969 words
, cum
->words
, cum
->nregs
, GET_MODE_NAME (mode
), named
);
1972 fprintf (stderr
, ", reg=%%e%s", reg_names
[ REGNO(ret
) ]);
1974 fprintf (stderr
, ", stack");
1976 fprintf (stderr
, " )\n");
1982 /* Gives the alignment boundary, in bits, of an argument with the specified mode
1986 ix86_function_arg_boundary (mode
, type
)
1987 enum machine_mode mode
;
1992 return PARM_BOUNDARY
;
1994 align
= TYPE_ALIGN (type
);
1996 align
= GET_MODE_ALIGNMENT (mode
);
1997 if (align
< PARM_BOUNDARY
)
1998 align
= PARM_BOUNDARY
;
2004 /* Return true if N is a possible register number of function value. */
2006 ix86_function_value_regno_p (regno
)
2011 return ((regno
) == 0
2012 || ((regno
) == FIRST_FLOAT_REG
&& TARGET_FLOAT_RETURNS_IN_80387
)
2013 || ((regno
) == FIRST_SSE_REG
&& TARGET_SSE
));
2015 return ((regno
) == 0 || (regno
) == FIRST_FLOAT_REG
2016 || ((regno
) == FIRST_SSE_REG
&& TARGET_SSE
)
2017 || ((regno
) == FIRST_FLOAT_REG
&& TARGET_FLOAT_RETURNS_IN_80387
));
2020 /* Define how to find the value returned by a function.
2021 VALTYPE is the data type of the value (as a tree).
2022 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2023 otherwise, FUNC is 0. */
2025 ix86_function_value (valtype
)
2030 rtx ret
= construct_container (TYPE_MODE (valtype
), valtype
, 1,
2031 REGPARM_MAX
, SSE_REGPARM_MAX
,
2032 x86_64_int_return_registers
, 0);
2033 /* For zero sized structures, construct_continer return NULL, but we need
2034 to keep rest of compiler happy by returning meaningfull value. */
2036 ret
= gen_rtx_REG (TYPE_MODE (valtype
), 0);
2040 return gen_rtx_REG (TYPE_MODE (valtype
), VALUE_REGNO (TYPE_MODE (valtype
)));
2043 /* Return false ifif type is returned in memory. */
2045 ix86_return_in_memory (type
)
2048 int needed_intregs
, needed_sseregs
;
2051 return !examine_argument (TYPE_MODE (type
), type
, 1,
2052 &needed_intregs
, &needed_sseregs
);
2056 if (TYPE_MODE (type
) == BLKmode
2057 || (VECTOR_MODE_P (TYPE_MODE (type
))
2058 && int_size_in_bytes (type
) == 8)
2059 || (int_size_in_bytes (type
) > 12 && TYPE_MODE (type
) != TImode
2060 && TYPE_MODE (type
) != TFmode
2061 && !VECTOR_MODE_P (TYPE_MODE (type
))))
2067 /* Define how to find the value returned by a library function
2068 assuming the value has mode MODE. */
2070 ix86_libcall_value (mode
)
2071 enum machine_mode mode
;
2081 return gen_rtx_REG (mode
, FIRST_SSE_REG
);
2084 return gen_rtx_REG (mode
, FIRST_FLOAT_REG
);
2086 return gen_rtx_REG (mode
, 0);
2090 return gen_rtx_REG (mode
, VALUE_REGNO (mode
));
2093 /* Create the va_list data type. */
2096 ix86_build_va_list ()
2098 tree f_gpr
, f_fpr
, f_ovf
, f_sav
, record
, type_decl
;
2100 /* For i386 we use plain pointer to argument area. */
2102 return build_pointer_type (char_type_node
);
2104 record
= make_lang_type (RECORD_TYPE
);
2105 type_decl
= build_decl (TYPE_DECL
, get_identifier ("__va_list_tag"), record
);
2107 f_gpr
= build_decl (FIELD_DECL
, get_identifier ("gp_offset"),
2108 unsigned_type_node
);
2109 f_fpr
= build_decl (FIELD_DECL
, get_identifier ("fp_offset"),
2110 unsigned_type_node
);
2111 f_ovf
= build_decl (FIELD_DECL
, get_identifier ("overflow_arg_area"),
2113 f_sav
= build_decl (FIELD_DECL
, get_identifier ("reg_save_area"),
2116 DECL_FIELD_CONTEXT (f_gpr
) = record
;
2117 DECL_FIELD_CONTEXT (f_fpr
) = record
;
2118 DECL_FIELD_CONTEXT (f_ovf
) = record
;
2119 DECL_FIELD_CONTEXT (f_sav
) = record
;
2121 TREE_CHAIN (record
) = type_decl
;
2122 TYPE_NAME (record
) = type_decl
;
2123 TYPE_FIELDS (record
) = f_gpr
;
2124 TREE_CHAIN (f_gpr
) = f_fpr
;
2125 TREE_CHAIN (f_fpr
) = f_ovf
;
2126 TREE_CHAIN (f_ovf
) = f_sav
;
2128 layout_type (record
);
2130 /* The correct type is an array type of one element. */
2131 return build_array_type (record
, build_index_type (size_zero_node
));
2134 /* Perform any needed actions needed for a function that is receiving a
2135 variable number of arguments.
2139 MODE and TYPE are the mode and type of the current parameter.
2141 PRETEND_SIZE is a variable that should be set to the amount of stack
2142 that must be pushed by the prolog to pretend that our caller pushed
2145 Normally, this macro will push all remaining incoming registers on the
2146 stack and set PRETEND_SIZE to the length of the registers pushed. */
2149 ix86_setup_incoming_varargs (cum
, mode
, type
, pretend_size
, no_rtl
)
2150 CUMULATIVE_ARGS
*cum
;
2151 enum machine_mode mode
;
2153 int *pretend_size ATTRIBUTE_UNUSED
;
2157 CUMULATIVE_ARGS next_cum
;
2158 rtx save_area
= NULL_RTX
, mem
;
2171 /* Indicate to allocate space on the stack for varargs save area. */
2172 ix86_save_varrargs_registers
= 1;
2174 fntype
= TREE_TYPE (current_function_decl
);
2175 stdarg_p
= (TYPE_ARG_TYPES (fntype
) != 0
2176 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype
)))
2177 != void_type_node
));
2179 /* For varargs, we do not want to skip the dummy va_dcl argument.
2180 For stdargs, we do want to skip the last named argument. */
2183 function_arg_advance (&next_cum
, mode
, type
, 1);
2186 save_area
= frame_pointer_rtx
;
2188 set
= get_varargs_alias_set ();
2190 for (i
= next_cum
.regno
; i
< ix86_regparm
; i
++)
2192 mem
= gen_rtx_MEM (Pmode
,
2193 plus_constant (save_area
, i
* UNITS_PER_WORD
));
2194 set_mem_alias_set (mem
, set
);
2195 emit_move_insn (mem
, gen_rtx_REG (Pmode
,
2196 x86_64_int_parameter_registers
[i
]));
2199 if (next_cum
.sse_nregs
)
2201 /* Now emit code to save SSE registers. The AX parameter contains number
2202 of SSE parameter regsiters used to call this function. We use
2203 sse_prologue_save insn template that produces computed jump across
2204 SSE saves. We need some preparation work to get this working. */
2206 label
= gen_label_rtx ();
2207 label_ref
= gen_rtx_LABEL_REF (Pmode
, label
);
2209 /* Compute address to jump to :
2210 label - 5*eax + nnamed_sse_arguments*5 */
2211 tmp_reg
= gen_reg_rtx (Pmode
);
2212 nsse_reg
= gen_reg_rtx (Pmode
);
2213 emit_insn (gen_zero_extendqidi2 (nsse_reg
, gen_rtx_REG (QImode
, 0)));
2214 emit_insn (gen_rtx_SET (VOIDmode
, tmp_reg
,
2215 gen_rtx_MULT (VOIDmode
, nsse_reg
,
2217 if (next_cum
.sse_regno
)
2220 gen_rtx_CONST (DImode
,
2221 gen_rtx_PLUS (DImode
,
2223 GEN_INT (next_cum
.sse_regno
* 4))));
2225 emit_move_insn (nsse_reg
, label_ref
);
2226 emit_insn (gen_subdi3 (nsse_reg
, nsse_reg
, tmp_reg
));
2228 /* Compute address of memory block we save into. We always use pointer
2229 pointing 127 bytes after first byte to store - this is needed to keep
2230 instruction size limited by 4 bytes. */
2231 tmp_reg
= gen_reg_rtx (Pmode
);
2232 emit_insn (gen_rtx_SET(VOIDmode
, tmp_reg
,
2233 plus_constant (save_area
, 8 * REGPARM_MAX
+ 127)));
2234 mem
= gen_rtx_MEM (BLKmode
, plus_constant (tmp_reg
, -127));
2235 set_mem_alias_set (mem
, set
);
2237 /* And finally do the dirty job! */
2238 emit_insn (gen_sse_prologue_save (mem
, nsse_reg
, GEN_INT (next_cum
.sse_regno
),
2244 /* Implement va_start. */
2247 ix86_va_start (stdarg_p
, valist
, nextarg
)
2252 HOST_WIDE_INT words
, n_gpr
, n_fpr
;
2253 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
2254 tree gpr
, fpr
, ovf
, sav
, t
;
2256 /* Only 64bit target needs something special. */
2259 std_expand_builtin_va_start (stdarg_p
, valist
, nextarg
);
2263 f_gpr
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
2264 f_fpr
= TREE_CHAIN (f_gpr
);
2265 f_ovf
= TREE_CHAIN (f_fpr
);
2266 f_sav
= TREE_CHAIN (f_ovf
);
2268 valist
= build1 (INDIRECT_REF
, TREE_TYPE (TREE_TYPE (valist
)), valist
);
2269 gpr
= build (COMPONENT_REF
, TREE_TYPE (f_gpr
), valist
, f_gpr
);
2270 fpr
= build (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
);
2271 ovf
= build (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
);
2272 sav
= build (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
);
2274 /* Count number of gp and fp argument registers used. */
2275 words
= current_function_args_info
.words
;
2276 n_gpr
= current_function_args_info
.regno
;
2277 n_fpr
= current_function_args_info
.sse_regno
;
2279 if (TARGET_DEBUG_ARG
)
2280 fprintf (stderr
, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
2281 (int)words
, (int)n_gpr
, (int)n_fpr
);
2283 t
= build (MODIFY_EXPR
, TREE_TYPE (gpr
), gpr
,
2284 build_int_2 (n_gpr
* 8, 0));
2285 TREE_SIDE_EFFECTS (t
) = 1;
2286 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
2288 t
= build (MODIFY_EXPR
, TREE_TYPE (fpr
), fpr
,
2289 build_int_2 (n_fpr
* 16 + 8*REGPARM_MAX
, 0));
2290 TREE_SIDE_EFFECTS (t
) = 1;
2291 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
2293 /* Find the overflow area. */
2294 t
= make_tree (TREE_TYPE (ovf
), virtual_incoming_args_rtx
);
2296 t
= build (PLUS_EXPR
, TREE_TYPE (ovf
), t
,
2297 build_int_2 (words
* UNITS_PER_WORD
, 0));
2298 t
= build (MODIFY_EXPR
, TREE_TYPE (ovf
), ovf
, t
);
2299 TREE_SIDE_EFFECTS (t
) = 1;
2300 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
2302 /* Find the register save area.
2303 Prologue of the function save it right above stack frame. */
2304 t
= make_tree (TREE_TYPE (sav
), frame_pointer_rtx
);
2305 t
= build (MODIFY_EXPR
, TREE_TYPE (sav
), sav
, t
);
2306 TREE_SIDE_EFFECTS (t
) = 1;
2307 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
2310 /* Implement va_arg. */
2312 ix86_va_arg (valist
, type
)
2315 static int intreg
[6] = { 0, 1, 2, 3, 4, 5 };
2316 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
2317 tree gpr
, fpr
, ovf
, sav
, t
;
2318 int indirect_p
= 0, size
, rsize
;
2319 rtx lab_false
, lab_over
= NULL_RTX
;
2323 /* Only 64bit target needs something special. */
2326 return std_expand_builtin_va_arg (valist
, type
);
2329 f_gpr
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
2330 f_fpr
= TREE_CHAIN (f_gpr
);
2331 f_ovf
= TREE_CHAIN (f_fpr
);
2332 f_sav
= TREE_CHAIN (f_ovf
);
2334 valist
= build1 (INDIRECT_REF
, TREE_TYPE (TREE_TYPE (valist
)), valist
);
2335 gpr
= build (COMPONENT_REF
, TREE_TYPE (f_gpr
), valist
, f_gpr
);
2336 fpr
= build (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
);
2337 ovf
= build (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
);
2338 sav
= build (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
);
2340 size
= int_size_in_bytes (type
);
2341 rsize
= (size
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
2343 container
= construct_container (TYPE_MODE (type
), type
, 0,
2344 REGPARM_MAX
, SSE_REGPARM_MAX
, intreg
, 0);
2346 * Pull the value out of the saved registers ...
2349 addr_rtx
= gen_reg_rtx (Pmode
);
2353 rtx int_addr_rtx
, sse_addr_rtx
;
2354 int needed_intregs
, needed_sseregs
;
2357 lab_over
= gen_label_rtx ();
2358 lab_false
= gen_label_rtx ();
2360 examine_argument (TYPE_MODE (type
), type
, 0,
2361 &needed_intregs
, &needed_sseregs
);
2364 need_temp
= ((needed_intregs
&& TYPE_ALIGN (type
) > 64)
2365 || TYPE_ALIGN (type
) > 128);
2367 /* In case we are passing structure, verify that it is consetuctive block
2368 on the register save area. If not we need to do moves. */
2369 if (!need_temp
&& !REG_P (container
))
2371 /* Verify that all registers are strictly consetuctive */
2372 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container
, 0, 0), 0))))
2376 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
2378 rtx slot
= XVECEXP (container
, 0, i
);
2379 if (REGNO (XEXP (slot
, 0)) != FIRST_SSE_REG
+ (unsigned int)i
2380 || INTVAL (XEXP (slot
, 1)) != i
* 16)
2388 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
2390 rtx slot
= XVECEXP (container
, 0, i
);
2391 if (REGNO (XEXP (slot
, 0)) != (unsigned int)i
2392 || INTVAL (XEXP (slot
, 1)) != i
* 8)
2399 int_addr_rtx
= addr_rtx
;
2400 sse_addr_rtx
= addr_rtx
;
2404 int_addr_rtx
= gen_reg_rtx (Pmode
);
2405 sse_addr_rtx
= gen_reg_rtx (Pmode
);
2407 /* First ensure that we fit completely in registers. */
2410 emit_cmp_and_jump_insns (expand_expr
2411 (gpr
, NULL_RTX
, SImode
, EXPAND_NORMAL
),
2412 GEN_INT ((REGPARM_MAX
- needed_intregs
+
2413 1) * 8), GE
, const1_rtx
, SImode
,
2418 emit_cmp_and_jump_insns (expand_expr
2419 (fpr
, NULL_RTX
, SImode
, EXPAND_NORMAL
),
2420 GEN_INT ((SSE_REGPARM_MAX
-
2421 needed_sseregs
+ 1) * 16 +
2422 REGPARM_MAX
* 8), GE
, const1_rtx
,
2423 SImode
, 1, 1, lab_false
);
2426 /* Compute index to start of area used for integer regs. */
2429 t
= build (PLUS_EXPR
, ptr_type_node
, sav
, gpr
);
2430 r
= expand_expr (t
, int_addr_rtx
, Pmode
, EXPAND_NORMAL
);
2431 if (r
!= int_addr_rtx
)
2432 emit_move_insn (int_addr_rtx
, r
);
2436 t
= build (PLUS_EXPR
, ptr_type_node
, sav
, fpr
);
2437 r
= expand_expr (t
, sse_addr_rtx
, Pmode
, EXPAND_NORMAL
);
2438 if (r
!= sse_addr_rtx
)
2439 emit_move_insn (sse_addr_rtx
, r
);
2446 mem
= assign_temp (type
, 0, 1, 0);
2447 set_mem_alias_set (mem
, get_varargs_alias_set ());
2448 addr_rtx
= XEXP (mem
, 0);
2449 for (i
= 0; i
< XVECLEN (container
, 0); i
++)
2451 rtx slot
= XVECEXP (container
, 0, i
);
2452 rtx reg
= XEXP (slot
, 0);
2453 enum machine_mode mode
= GET_MODE (reg
);
2459 if (SSE_REGNO_P (REGNO (reg
)))
2461 src_addr
= sse_addr_rtx
;
2462 src_offset
= (REGNO (reg
) - FIRST_SSE_REG
) * 16;
2466 src_addr
= int_addr_rtx
;
2467 src_offset
= REGNO (reg
) * 8;
2469 src_mem
= gen_rtx_MEM (mode
, src_addr
);
2470 set_mem_alias_set (src_mem
, get_varargs_alias_set ());
2471 src_mem
= adjust_address (src_mem
, mode
, src_offset
);
2472 dest_mem
= adjust_address (mem
, mode
, INTVAL (XEXP (slot
, 1)));
2473 PUT_MODE (dest_mem
, mode
);
2474 /* ??? Break out TImode moves from integer registers? */
2475 emit_move_insn (dest_mem
, src_mem
);
2482 build (PLUS_EXPR
, TREE_TYPE (gpr
), gpr
,
2483 build_int_2 (needed_intregs
* 8, 0));
2484 t
= build (MODIFY_EXPR
, TREE_TYPE (gpr
), gpr
, t
);
2485 TREE_SIDE_EFFECTS (t
) = 1;
2486 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
2491 build (PLUS_EXPR
, TREE_TYPE (fpr
), fpr
,
2492 build_int_2 (needed_sseregs
* 16, 0));
2493 t
= build (MODIFY_EXPR
, TREE_TYPE (fpr
), fpr
, t
);
2494 TREE_SIDE_EFFECTS (t
) = 1;
2495 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
2498 emit_jump_insn (gen_jump (lab_over
));
2500 emit_label (lab_false
);
2503 /* ... otherwise out of the overflow area. */
2505 /* Care for on-stack alignment if needed. */
2506 if (FUNCTION_ARG_BOUNDARY (VOIDmode
, type
) <= 64)
2510 HOST_WIDE_INT align
= FUNCTION_ARG_BOUNDARY (VOIDmode
, type
) / 8;
2511 t
= build (PLUS_EXPR
, TREE_TYPE (ovf
), ovf
, build_int_2 (align
- 1, 0));
2512 t
= build (BIT_AND_EXPR
, TREE_TYPE (t
), t
, build_int_2 (-align
, -1));
2516 r
= expand_expr (t
, addr_rtx
, Pmode
, EXPAND_NORMAL
);
2518 emit_move_insn (addr_rtx
, r
);
2521 build (PLUS_EXPR
, TREE_TYPE (t
), t
,
2522 build_int_2 (rsize
* UNITS_PER_WORD
, 0));
2523 t
= build (MODIFY_EXPR
, TREE_TYPE (ovf
), ovf
, t
);
2524 TREE_SIDE_EFFECTS (t
) = 1;
2525 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
2528 emit_label (lab_over
);
2533 r
= gen_rtx_MEM (Pmode
, addr_rtx
);
2534 set_mem_alias_set (r
, get_varargs_alias_set ());
2535 emit_move_insn (addr_rtx
, r
);
2541 /* Return nonzero if OP is general operand representable on x86_64. */
2544 x86_64_general_operand (op
, mode
)
2546 enum machine_mode mode
;
2549 return general_operand (op
, mode
);
2550 if (nonimmediate_operand (op
, mode
))
2552 return x86_64_sign_extended_value (op
);
2555 /* Return nonzero if OP is general operand representable on x86_64
2556 as eighter sign extended or zero extended constant. */
2559 x86_64_szext_general_operand (op
, mode
)
2561 enum machine_mode mode
;
2564 return general_operand (op
, mode
);
2565 if (nonimmediate_operand (op
, mode
))
2567 return x86_64_sign_extended_value (op
) || x86_64_zero_extended_value (op
);
2570 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
2573 x86_64_nonmemory_operand (op
, mode
)
2575 enum machine_mode mode
;
2578 return nonmemory_operand (op
, mode
);
2579 if (register_operand (op
, mode
))
2581 return x86_64_sign_extended_value (op
);
2584 /* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
2587 x86_64_movabs_operand (op
, mode
)
2589 enum machine_mode mode
;
2591 if (!TARGET_64BIT
|| !flag_pic
)
2592 return nonmemory_operand (op
, mode
);
2593 if (register_operand (op
, mode
) || x86_64_sign_extended_value (op
))
2595 if (CONSTANT_P (op
) && !symbolic_reference_mentioned_p (op
))
2600 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
2603 x86_64_szext_nonmemory_operand (op
, mode
)
2605 enum machine_mode mode
;
2608 return nonmemory_operand (op
, mode
);
2609 if (register_operand (op
, mode
))
2611 return x86_64_sign_extended_value (op
) || x86_64_zero_extended_value (op
);
2614 /* Return nonzero if OP is immediate operand representable on x86_64. */
2617 x86_64_immediate_operand (op
, mode
)
2619 enum machine_mode mode
;
2622 return immediate_operand (op
, mode
);
2623 return x86_64_sign_extended_value (op
);
2626 /* Return nonzero if OP is immediate operand representable on x86_64. */
2629 x86_64_zext_immediate_operand (op
, mode
)
2631 enum machine_mode mode ATTRIBUTE_UNUSED
;
2633 return x86_64_zero_extended_value (op
);
2636 /* Return nonzero if OP is (const_int 1), else return zero. */
2639 const_int_1_operand (op
, mode
)
2641 enum machine_mode mode ATTRIBUTE_UNUSED
;
2643 return (GET_CODE (op
) == CONST_INT
&& INTVAL (op
) == 1);
2646 /* Returns 1 if OP is either a symbol reference or a sum of a symbol
2647 reference and a constant. */
2650 symbolic_operand (op
, mode
)
2652 enum machine_mode mode ATTRIBUTE_UNUSED
;
2654 switch (GET_CODE (op
))
2662 if (GET_CODE (op
) == SYMBOL_REF
2663 || GET_CODE (op
) == LABEL_REF
2664 || (GET_CODE (op
) == UNSPEC
2665 && (XINT (op
, 1) == 6
2666 || XINT (op
, 1) == 7
2667 || XINT (op
, 1) == 15)))
2669 if (GET_CODE (op
) != PLUS
2670 || GET_CODE (XEXP (op
, 1)) != CONST_INT
)
2674 if (GET_CODE (op
) == SYMBOL_REF
2675 || GET_CODE (op
) == LABEL_REF
)
2677 /* Only @GOTOFF gets offsets. */
2678 if (GET_CODE (op
) != UNSPEC
2679 || XINT (op
, 1) != 7)
2682 op
= XVECEXP (op
, 0, 0);
2683 if (GET_CODE (op
) == SYMBOL_REF
2684 || GET_CODE (op
) == LABEL_REF
)
2693 /* Return true if the operand contains a @GOT or @GOTOFF reference. */
2696 pic_symbolic_operand (op
, mode
)
2698 enum machine_mode mode ATTRIBUTE_UNUSED
;
2700 if (GET_CODE (op
) != CONST
)
2705 if (GET_CODE (XEXP (op
, 0)) == UNSPEC
)
2710 if (GET_CODE (op
) == UNSPEC
)
2712 if (GET_CODE (op
) != PLUS
2713 || GET_CODE (XEXP (op
, 1)) != CONST_INT
)
2716 if (GET_CODE (op
) == UNSPEC
)
2722 /* Return true if OP is a symbolic operand that resolves locally. */
2725 local_symbolic_operand (op
, mode
)
2727 enum machine_mode mode ATTRIBUTE_UNUSED
;
2729 if (GET_CODE (op
) == LABEL_REF
)
2732 if (GET_CODE (op
) == CONST
2733 && GET_CODE (XEXP (op
, 0)) == PLUS
2734 && GET_CODE (XEXP (XEXP (op
, 0), 1)) == CONST_INT
)
2735 op
= XEXP (XEXP (op
, 0), 0);
2737 if (GET_CODE (op
) != SYMBOL_REF
)
2740 /* These we've been told are local by varasm and encode_section_info
2742 if (CONSTANT_POOL_ADDRESS_P (op
) || SYMBOL_REF_FLAG (op
))
2745 /* There is, however, a not insubstantial body of code in the rest of
2746 the compiler that assumes it can just stick the results of
2747 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */
2748 /* ??? This is a hack. Should update the body of the compiler to
2749 always create a DECL an invoke ENCODE_SECTION_INFO. */
2750 if (strncmp (XSTR (op
, 0), internal_label_prefix
,
2751 internal_label_prefix_len
) == 0)
2757 /* Test for a valid operand for a call instruction. Don't allow the
2758 arg pointer register or virtual regs since they may decay into
2759 reg + const, which the patterns can't handle. */
2762 call_insn_operand (op
, mode
)
2764 enum machine_mode mode ATTRIBUTE_UNUSED
;
2766 /* Disallow indirect through a virtual register. This leads to
2767 compiler aborts when trying to eliminate them. */
2768 if (GET_CODE (op
) == REG
2769 && (op
== arg_pointer_rtx
2770 || op
== frame_pointer_rtx
2771 || (REGNO (op
) >= FIRST_PSEUDO_REGISTER
2772 && REGNO (op
) <= LAST_VIRTUAL_REGISTER
)))
2775 /* Disallow `call 1234'. Due to varying assembler lameness this
2776 gets either rejected or translated to `call .+1234'. */
2777 if (GET_CODE (op
) == CONST_INT
)
2780 /* Explicitly allow SYMBOL_REF even if pic. */
2781 if (GET_CODE (op
) == SYMBOL_REF
)
2784 /* Half-pic doesn't allow anything but registers and constants.
2785 We've just taken care of the later. */
2787 return register_operand (op
, Pmode
);
2789 /* Otherwise we can allow any general_operand in the address. */
2790 return general_operand (op
, Pmode
);
2794 constant_call_address_operand (op
, mode
)
2796 enum machine_mode mode ATTRIBUTE_UNUSED
;
2798 if (GET_CODE (op
) == CONST
2799 && GET_CODE (XEXP (op
, 0)) == PLUS
2800 && GET_CODE (XEXP (XEXP (op
, 0), 1)) == CONST_INT
)
2801 op
= XEXP (XEXP (op
, 0), 0);
2802 return GET_CODE (op
) == SYMBOL_REF
;
2805 /* Match exactly zero and one. */
2808 const0_operand (op
, mode
)
2810 enum machine_mode mode
;
2812 return op
== CONST0_RTX (mode
);
2816 const1_operand (op
, mode
)
2818 enum machine_mode mode ATTRIBUTE_UNUSED
;
2820 return op
== const1_rtx
;
2823 /* Match 2, 4, or 8. Used for leal multiplicands. */
2826 const248_operand (op
, mode
)
2828 enum machine_mode mode ATTRIBUTE_UNUSED
;
2830 return (GET_CODE (op
) == CONST_INT
2831 && (INTVAL (op
) == 2 || INTVAL (op
) == 4 || INTVAL (op
) == 8));
2834 /* True if this is a constant appropriate for an increment or decremenmt. */
2837 incdec_operand (op
, mode
)
2839 enum machine_mode mode ATTRIBUTE_UNUSED
;
2841 /* On Pentium4, the inc and dec operations causes extra dependancy on flag
2842 registers, since carry flag is not set. */
2843 if (TARGET_PENTIUM4
&& !optimize_size
)
2845 return op
== const1_rtx
|| op
== constm1_rtx
;
2848 /* Return nonzero if OP is acceptable as operand of DImode shift
2852 shiftdi_operand (op
, mode
)
2854 enum machine_mode mode ATTRIBUTE_UNUSED
;
2857 return nonimmediate_operand (op
, mode
);
2859 return register_operand (op
, mode
);
2862 /* Return false if this is the stack pointer, or any other fake
2863 register eliminable to the stack pointer. Otherwise, this is
2866 This is used to prevent esp from being used as an index reg.
2867 Which would only happen in pathological cases. */
2870 reg_no_sp_operand (op
, mode
)
2872 enum machine_mode mode
;
2875 if (GET_CODE (t
) == SUBREG
)
2877 if (t
== stack_pointer_rtx
|| t
== arg_pointer_rtx
|| t
== frame_pointer_rtx
)
2880 return register_operand (op
, mode
);
2884 mmx_reg_operand (op
, mode
)
2886 enum machine_mode mode ATTRIBUTE_UNUSED
;
2888 return MMX_REG_P (op
);
2891 /* Return false if this is any eliminable register. Otherwise
2895 general_no_elim_operand (op
, mode
)
2897 enum machine_mode mode
;
2900 if (GET_CODE (t
) == SUBREG
)
2902 if (t
== arg_pointer_rtx
|| t
== frame_pointer_rtx
2903 || t
== virtual_incoming_args_rtx
|| t
== virtual_stack_vars_rtx
2904 || t
== virtual_stack_dynamic_rtx
)
2907 && REGNO (t
) >= FIRST_VIRTUAL_REGISTER
2908 && REGNO (t
) <= LAST_VIRTUAL_REGISTER
)
2911 return general_operand (op
, mode
);
2914 /* Return false if this is any eliminable register. Otherwise
2915 register_operand or const_int. */
2918 nonmemory_no_elim_operand (op
, mode
)
2920 enum machine_mode mode
;
2923 if (GET_CODE (t
) == SUBREG
)
2925 if (t
== arg_pointer_rtx
|| t
== frame_pointer_rtx
2926 || t
== virtual_incoming_args_rtx
|| t
== virtual_stack_vars_rtx
2927 || t
== virtual_stack_dynamic_rtx
)
2930 return GET_CODE (op
) == CONST_INT
|| register_operand (op
, mode
);
2933 /* Return true if op is a Q_REGS class register. */
2936 q_regs_operand (op
, mode
)
2938 enum machine_mode mode
;
2940 if (mode
!= VOIDmode
&& GET_MODE (op
) != mode
)
2942 if (GET_CODE (op
) == SUBREG
)
2943 op
= SUBREG_REG (op
);
2944 return QI_REG_P (op
);
2947 /* Return true if op is a NON_Q_REGS class register. */
2950 non_q_regs_operand (op
, mode
)
2952 enum machine_mode mode
;
2954 if (mode
!= VOIDmode
&& GET_MODE (op
) != mode
)
2956 if (GET_CODE (op
) == SUBREG
)
2957 op
= SUBREG_REG (op
);
2958 return NON_QI_REG_P (op
);
2961 /* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
2964 sse_comparison_operator (op
, mode
)
2966 enum machine_mode mode ATTRIBUTE_UNUSED
;
2968 enum rtx_code code
= GET_CODE (op
);
2971 /* Operations supported directly. */
2981 /* These are equivalent to ones above in non-IEEE comparisons. */
2988 return !TARGET_IEEE_FP
;
2993 /* Return 1 if OP is a valid comparison operator in valid mode. */
2995 ix86_comparison_operator (op
, mode
)
2997 enum machine_mode mode
;
2999 enum machine_mode inmode
;
3000 enum rtx_code code
= GET_CODE (op
);
3001 if (mode
!= VOIDmode
&& GET_MODE (op
) != mode
)
3003 if (GET_RTX_CLASS (code
) != '<')
3005 inmode
= GET_MODE (XEXP (op
, 0));
3007 if (inmode
== CCFPmode
|| inmode
== CCFPUmode
)
3009 enum rtx_code second_code
, bypass_code
;
3010 ix86_fp_comparison_codes (code
, &bypass_code
, &code
, &second_code
);
3011 return (bypass_code
== NIL
&& second_code
== NIL
);
3018 if (inmode
== CCmode
|| inmode
== CCGCmode
3019 || inmode
== CCGOCmode
|| inmode
== CCNOmode
)
3022 case LTU
: case GTU
: case LEU
: case ORDERED
: case UNORDERED
: case GEU
:
3023 if (inmode
== CCmode
)
3027 if (inmode
== CCmode
|| inmode
== CCGCmode
|| inmode
== CCNOmode
)
3035 /* Return 1 if OP is a comparison operator that can be issued by fcmov. */
3038 fcmov_comparison_operator (op
, mode
)
3040 enum machine_mode mode
;
3042 enum machine_mode inmode
;
3043 enum rtx_code code
= GET_CODE (op
);
3044 if (mode
!= VOIDmode
&& GET_MODE (op
) != mode
)
3046 if (GET_RTX_CLASS (code
) != '<')
3048 inmode
= GET_MODE (XEXP (op
, 0));
3049 if (inmode
== CCFPmode
|| inmode
== CCFPUmode
)
3051 enum rtx_code second_code
, bypass_code
;
3052 ix86_fp_comparison_codes (code
, &bypass_code
, &code
, &second_code
);
3053 if (bypass_code
!= NIL
|| second_code
!= NIL
)
3055 code
= ix86_fp_compare_code_to_integer (code
);
3057 /* i387 supports just limited amount of conditional codes. */
3060 case LTU
: case GTU
: case LEU
: case GEU
:
3061 if (inmode
== CCmode
|| inmode
== CCFPmode
|| inmode
== CCFPUmode
)
3064 case ORDERED
: case UNORDERED
:
3072 /* Return 1 if OP is a binary operator that can be promoted to wider mode. */
3075 promotable_binary_operator (op
, mode
)
3077 enum machine_mode mode ATTRIBUTE_UNUSED
;
3079 switch (GET_CODE (op
))
3082 /* Modern CPUs have same latency for HImode and SImode multiply,
3083 but 386 and 486 do HImode multiply faster. */
3084 return ix86_cpu
> PROCESSOR_I486
;
3096 /* Nearly general operand, but accept any const_double, since we wish
3097 to be able to drop them into memory rather than have them get pulled
3101 cmp_fp_expander_operand (op
, mode
)
3103 enum machine_mode mode
;
3105 if (mode
!= VOIDmode
&& mode
!= GET_MODE (op
))
3107 if (GET_CODE (op
) == CONST_DOUBLE
)
3109 return general_operand (op
, mode
);
3112 /* Match an SI or HImode register for a zero_extract. */
3115 ext_register_operand (op
, mode
)
3117 enum machine_mode mode ATTRIBUTE_UNUSED
;
3120 if ((!TARGET_64BIT
|| GET_MODE (op
) != DImode
)
3121 && GET_MODE (op
) != SImode
&& GET_MODE (op
) != HImode
)
3124 if (!register_operand (op
, VOIDmode
))
3127 /* Be curefull to accept only registers having upper parts. */
3128 regno
= REG_P (op
) ? REGNO (op
) : REGNO (SUBREG_REG (op
));
3129 return (regno
> LAST_VIRTUAL_REGISTER
|| regno
< 4);
3132 /* Return 1 if this is a valid binary floating-point operation.
3133 OP is the expression matched, and MODE is its mode. */
3136 binary_fp_operator (op
, mode
)
3138 enum machine_mode mode
;
3140 if (mode
!= VOIDmode
&& mode
!= GET_MODE (op
))
3143 switch (GET_CODE (op
))
3149 return GET_MODE_CLASS (GET_MODE (op
)) == MODE_FLOAT
;
3157 mult_operator(op
, mode
)
3159 enum machine_mode mode ATTRIBUTE_UNUSED
;
3161 return GET_CODE (op
) == MULT
;
3165 div_operator(op
, mode
)
3167 enum machine_mode mode ATTRIBUTE_UNUSED
;
3169 return GET_CODE (op
) == DIV
;
3173 arith_or_logical_operator (op
, mode
)
3175 enum machine_mode mode
;
3177 return ((mode
== VOIDmode
|| GET_MODE (op
) == mode
)
3178 && (GET_RTX_CLASS (GET_CODE (op
)) == 'c'
3179 || GET_RTX_CLASS (GET_CODE (op
)) == '2'));
3182 /* Returns 1 if OP is memory operand with a displacement. */
3185 memory_displacement_operand (op
, mode
)
3187 enum machine_mode mode
;
3189 struct ix86_address parts
;
3191 if (! memory_operand (op
, mode
))
3194 if (! ix86_decompose_address (XEXP (op
, 0), &parts
))
3197 return parts
.disp
!= NULL_RTX
;
3200 /* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
3201 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
3203 ??? It seems likely that this will only work because cmpsi is an
3204 expander, and no actual insns use this. */
3207 cmpsi_operand (op
, mode
)
3209 enum machine_mode mode
;
3211 if (nonimmediate_operand (op
, mode
))
3214 if (GET_CODE (op
) == AND
3215 && GET_MODE (op
) == SImode
3216 && GET_CODE (XEXP (op
, 0)) == ZERO_EXTRACT
3217 && GET_CODE (XEXP (XEXP (op
, 0), 1)) == CONST_INT
3218 && GET_CODE (XEXP (XEXP (op
, 0), 2)) == CONST_INT
3219 && INTVAL (XEXP (XEXP (op
, 0), 1)) == 8
3220 && INTVAL (XEXP (XEXP (op
, 0), 2)) == 8
3221 && GET_CODE (XEXP (op
, 1)) == CONST_INT
)
3227 /* Returns 1 if OP is memory operand that can not be represented by the
3231 long_memory_operand (op
, mode
)
3233 enum machine_mode mode
;
3235 if (! memory_operand (op
, mode
))
3238 return memory_address_length (op
) != 0;
3241 /* Return nonzero if the rtx is known aligned. */
3244 aligned_operand (op
, mode
)
3246 enum machine_mode mode
;
3248 struct ix86_address parts
;
3250 if (!general_operand (op
, mode
))
3253 /* Registers and immediate operands are always "aligned". */
3254 if (GET_CODE (op
) != MEM
)
3257 /* Don't even try to do any aligned optimizations with volatiles. */
3258 if (MEM_VOLATILE_P (op
))
3263 /* Pushes and pops are only valid on the stack pointer. */
3264 if (GET_CODE (op
) == PRE_DEC
3265 || GET_CODE (op
) == POST_INC
)
3268 /* Decode the address. */
3269 if (! ix86_decompose_address (op
, &parts
))
3272 /* Look for some component that isn't known to be aligned. */
3276 && REGNO_POINTER_ALIGN (REGNO (parts
.index
)) < 32)
3281 if (REGNO_POINTER_ALIGN (REGNO (parts
.base
)) < 32)
3286 if (GET_CODE (parts
.disp
) != CONST_INT
3287 || (INTVAL (parts
.disp
) & 3) != 0)
3291 /* Didn't find one -- this must be an aligned address. */
3295 /* Return true if the constant is something that can be loaded with
3296 a special instruction. Only handle 0.0 and 1.0; others are less
3300 standard_80387_constant_p (x
)
3303 if (GET_CODE (x
) != CONST_DOUBLE
|| !FLOAT_MODE_P (GET_MODE (x
)))
3305 /* Note that on the 80387, other constants, such as pi, that we should support
3306 too. On some machines, these are much slower to load as standard constant,
3307 than to load from doubles in memory. */
3308 if (x
== CONST0_RTX (GET_MODE (x
)))
3310 if (x
== CONST1_RTX (GET_MODE (x
)))
3315 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
3318 standard_sse_constant_p (x
)
3321 if (GET_CODE (x
) != CONST_DOUBLE
)
3323 return (x
== CONST0_RTX (GET_MODE (x
)));
3326 /* Returns 1 if OP contains a symbol reference */
3329 symbolic_reference_mentioned_p (op
)
3332 register const char *fmt
;
3335 if (GET_CODE (op
) == SYMBOL_REF
|| GET_CODE (op
) == LABEL_REF
)
3338 fmt
= GET_RTX_FORMAT (GET_CODE (op
));
3339 for (i
= GET_RTX_LENGTH (GET_CODE (op
)) - 1; i
>= 0; i
--)
3345 for (j
= XVECLEN (op
, i
) - 1; j
>= 0; j
--)
3346 if (symbolic_reference_mentioned_p (XVECEXP (op
, i
, j
)))
3350 else if (fmt
[i
] == 'e' && symbolic_reference_mentioned_p (XEXP (op
, i
)))
3357 /* Return 1 if it is appropriate to emit `ret' instructions in the
3358 body of a function. Do this only if the epilogue is simple, needing a
3359 couple of insns. Prior to reloading, we can't tell how many registers
3360 must be saved, so return 0 then. Return 0 if there is no frame
3361 marker to de-allocate.
3363 If NON_SAVING_SETJMP is defined and true, then it is not possible
3364 for the epilogue to be simple, so return 0. This is a special case
3365 since NON_SAVING_SETJMP will not cause regs_ever_live to change
3366 until final, but jump_optimize may need to know sooner if a
3370 ix86_can_use_return_insn_p ()
3372 struct ix86_frame frame
;
3374 #ifdef NON_SAVING_SETJMP
3375 if (NON_SAVING_SETJMP
&& current_function_calls_setjmp
)
3378 #ifdef FUNCTION_BLOCK_PROFILER_EXIT
3379 if (profile_block_flag
== 2)
3383 if (! reload_completed
|| frame_pointer_needed
)
3386 /* Don't allow more than 32 pop, since that's all we can do
3387 with one instruction. */
3388 if (current_function_pops_args
3389 && current_function_args_size
>= 32768)
3392 ix86_compute_frame_layout (&frame
);
3393 return frame
.to_allocate
== 0 && frame
.nregs
== 0;
3396 /* Return 1 if VALUE can be stored in the sign extended immediate field. */
3398 x86_64_sign_extended_value (value
)
3401 switch (GET_CODE (value
))
3403 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
3404 to be at least 32 and this all acceptable constants are
3405 represented as CONST_INT. */
3407 if (HOST_BITS_PER_WIDE_INT
== 32)
3411 HOST_WIDE_INT val
= trunc_int_for_mode (INTVAL (value
), DImode
);
3412 return trunc_int_for_mode (val
, SImode
) == val
;
3416 /* For certain code models, the symbolic references are known to fit. */
3418 return ix86_cmodel
== CM_SMALL
|| ix86_cmodel
== CM_KERNEL
;
3420 /* For certain code models, the code is near as well. */
3422 return ix86_cmodel
!= CM_LARGE
&& ix86_cmodel
!= CM_SMALL_PIC
;
3424 /* We also may accept the offsetted memory references in certain special
3427 if (GET_CODE (XEXP (value
, 0)) == UNSPEC
3428 && XVECLEN (XEXP (value
, 0), 0) == 1
3429 && XINT (XEXP (value
, 0), 1) == 15)
3431 else if (GET_CODE (XEXP (value
, 0)) == PLUS
)
3433 rtx op1
= XEXP (XEXP (value
, 0), 0);
3434 rtx op2
= XEXP (XEXP (value
, 0), 1);
3435 HOST_WIDE_INT offset
;
3437 if (ix86_cmodel
== CM_LARGE
)
3439 if (GET_CODE (op2
) != CONST_INT
)
3441 offset
= trunc_int_for_mode (INTVAL (op2
), DImode
);
3442 switch (GET_CODE (op1
))
3445 /* For CM_SMALL assume that latest object is 1MB before
3446 end of 31bits boundary. We may also accept pretty
3447 large negative constants knowing that all objects are
3448 in the positive half of address space. */
3449 if (ix86_cmodel
== CM_SMALL
3450 && offset
< 1024*1024*1024
3451 && trunc_int_for_mode (offset
, SImode
) == offset
)
3453 /* For CM_KERNEL we know that all object resist in the
3454 negative half of 32bits address space. We may not
3455 accept negative offsets, since they may be just off
3456 and we may accept pretty large possitive ones. */
3457 if (ix86_cmodel
== CM_KERNEL
3459 && trunc_int_for_mode (offset
, SImode
) == offset
)
3463 /* These conditions are similar to SYMBOL_REF ones, just the
3464 constraints for code models differ. */
3465 if ((ix86_cmodel
== CM_SMALL
|| ix86_cmodel
== CM_MEDIUM
)
3466 && offset
< 1024*1024*1024
3467 && trunc_int_for_mode (offset
, SImode
) == offset
)
3469 if (ix86_cmodel
== CM_KERNEL
3471 && trunc_int_for_mode (offset
, SImode
) == offset
)
3484 /* Return 1 if VALUE can be stored in the zero extended immediate field. */
3486 x86_64_zero_extended_value (value
)
3489 switch (GET_CODE (value
))
3492 if (HOST_BITS_PER_WIDE_INT
== 32)
3493 return (GET_MODE (value
) == VOIDmode
3494 && !CONST_DOUBLE_HIGH (value
));
3498 if (HOST_BITS_PER_WIDE_INT
== 32)
3499 return INTVAL (value
) >= 0;
3501 return !(INTVAL (value
) & ~(HOST_WIDE_INT
)0xffffffff);
3504 /* For certain code models, the symbolic references are known to fit. */
3506 return ix86_cmodel
== CM_SMALL
;
3508 /* For certain code models, the code is near as well. */
3510 return ix86_cmodel
== CM_SMALL
|| ix86_cmodel
== CM_MEDIUM
;
3512 /* We also may accept the offsetted memory references in certain special
3515 if (GET_CODE (XEXP (value
, 0)) == PLUS
)
3517 rtx op1
= XEXP (XEXP (value
, 0), 0);
3518 rtx op2
= XEXP (XEXP (value
, 0), 1);
3520 if (ix86_cmodel
== CM_LARGE
)
3522 switch (GET_CODE (op1
))
3526 /* For small code model we may accept pretty large possitive
3527 offsets, since one bit is available for free. Negative
3528 offsets are limited by the size of NULL pointer area
3529 specified by the ABI. */
3530 if (ix86_cmodel
== CM_SMALL
3531 && GET_CODE (op2
) == CONST_INT
3532 && trunc_int_for_mode (INTVAL (op2
), DImode
) > -0x10000
3533 && (trunc_int_for_mode (INTVAL (op2
), SImode
)
3536 /* ??? For the kernel, we may accept adjustment of
3537 -0x10000000, since we know that it will just convert
3538 negative address space to possitive, but perhaps this
3539 is not worthwhile. */
3542 /* These conditions are similar to SYMBOL_REF ones, just the
3543 constraints for code models differ. */
3544 if ((ix86_cmodel
== CM_SMALL
|| ix86_cmodel
== CM_MEDIUM
)
3545 && GET_CODE (op2
) == CONST_INT
3546 && trunc_int_for_mode (INTVAL (op2
), DImode
) > -0x10000
3547 && (trunc_int_for_mode (INTVAL (op2
), SImode
)
3561 /* Value should be nonzero if functions must have frame pointers.
3562 Zero means the frame pointer need not be set up (and parms may
3563 be accessed via the stack pointer) in functions that seem suitable. */
3566 ix86_frame_pointer_required ()
3568 /* If we accessed previous frames, then the generated code expects
3569 to be able to access the saved ebp value in our frame. */
3570 if (cfun
->machine
->accesses_prev_frame
)
3573 /* Several x86 os'es need a frame pointer for other reasons,
3574 usually pertaining to setjmp. */
3575 if (SUBTARGET_FRAME_POINTER_REQUIRED
)
3578 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
3579 the frame pointer by default. Turn it back on now if we've not
3580 got a leaf function. */
3581 if (TARGET_OMIT_LEAF_FRAME_POINTER
&& ! leaf_function_p ())
3587 /* Record that the current function accesses previous call frames. */
3590 ix86_setup_frame_addresses ()
3592 cfun
->machine
->accesses_prev_frame
= 1;
3595 static char pic_label_name
[32];
3597 /* This function generates code for -fpic that loads %ebx with
3598 the return address of the caller and then returns. */
3601 ix86_asm_file_end (file
)
3606 if (! TARGET_DEEP_BRANCH_PREDICTION
|| pic_label_name
[0] == 0)
3609 /* ??? Binutils 2.10 and earlier has a linkonce elimination bug related
3610 to updating relocations to a section being discarded such that this
3611 doesn't work. Ought to detect this at configure time. */
3613 /* The trick here is to create a linkonce section containing the
3614 pic label thunk, but to refer to it with an internal label.
3615 Because the label is internal, we don't have inter-dso name
3616 binding issues on hosts that don't support ".hidden".
3618 In order to use these macros, however, we must create a fake
3620 if (targetm
.have_named_sections
)
3622 tree decl
= build_decl (FUNCTION_DECL
,
3623 get_identifier ("i686.get_pc_thunk"),
3625 DECL_ONE_ONLY (decl
) = 1;
3626 UNIQUE_SECTION (decl
, 0);
3627 named_section (decl
, NULL
);
3634 /* This used to call ASM_DECLARE_FUNCTION_NAME() but since it's an
3635 internal (non-global) label that's being emitted, it didn't make
3636 sense to have .type information for local labels. This caused
3637 the SCO OpenServer 5.0.4 ELF assembler grief (why are you giving
3638 me debug info for a label that you're declaring non-global?) this
3639 was changed to call ASM_OUTPUT_LABEL() instead. */
3641 ASM_OUTPUT_LABEL (file
, pic_label_name
);
3643 xops
[0] = pic_offset_table_rtx
;
3644 xops
[1] = gen_rtx_MEM (SImode
, stack_pointer_rtx
);
3645 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops
);
3646 output_asm_insn ("ret", xops
);
3650 load_pic_register ()
3657 gotsym
= gen_rtx_SYMBOL_REF (Pmode
, "_GLOBAL_OFFSET_TABLE_");
3659 if (TARGET_DEEP_BRANCH_PREDICTION
)
3661 if (! pic_label_name
[0])
3662 ASM_GENERATE_INTERNAL_LABEL (pic_label_name
, "LPR", 0);
3663 pclab
= gen_rtx_MEM (QImode
, gen_rtx_SYMBOL_REF (Pmode
, pic_label_name
));
3667 pclab
= gen_rtx_LABEL_REF (VOIDmode
, gen_label_rtx ());
3670 emit_insn (gen_prologue_get_pc (pic_offset_table_rtx
, pclab
));
3672 if (! TARGET_DEEP_BRANCH_PREDICTION
)
3673 emit_insn (gen_popsi1 (pic_offset_table_rtx
));
3675 emit_insn (gen_prologue_set_got (pic_offset_table_rtx
, gotsym
, pclab
));
3678 /* Generate an "push" pattern for input ARG. */
3684 return gen_rtx_SET (VOIDmode
,
3686 gen_rtx_PRE_DEC (Pmode
,
3687 stack_pointer_rtx
)),
3691 /* Return 1 if we need to save REGNO. */
3693 ix86_save_reg (regno
, maybe_eh_return
)
3695 int maybe_eh_return
;
3699 && regno
== PIC_OFFSET_TABLE_REGNUM
3700 && (current_function_uses_pic_offset_table
3701 || current_function_uses_const_pool
3702 || current_function_calls_eh_return
))
3705 if (current_function_calls_eh_return
&& maybe_eh_return
)
3710 unsigned test
= EH_RETURN_DATA_REGNO(i
);
3711 if (test
== INVALID_REGNUM
)
3713 if (test
== (unsigned) regno
)
3718 return (regs_ever_live
[regno
]
3719 && !call_used_regs
[regno
]
3720 && !fixed_regs
[regno
]
3721 && (regno
!= HARD_FRAME_POINTER_REGNUM
|| !frame_pointer_needed
));
3724 /* Return number of registers to be saved on the stack. */
3732 for (regno
= FIRST_PSEUDO_REGISTER
- 1; regno
>= 0; regno
--)
3733 if (ix86_save_reg (regno
, true))
3738 /* Return the offset between two registers, one to be eliminated, and the other
3739 its replacement, at the start of a routine. */
3742 ix86_initial_elimination_offset (from
, to
)
3746 struct ix86_frame frame
;
3747 ix86_compute_frame_layout (&frame
);
3749 if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
3750 return frame
.hard_frame_pointer_offset
;
3751 else if (from
== FRAME_POINTER_REGNUM
3752 && to
== HARD_FRAME_POINTER_REGNUM
)
3753 return frame
.hard_frame_pointer_offset
- frame
.frame_pointer_offset
;
3756 if (to
!= STACK_POINTER_REGNUM
)
3758 else if (from
== ARG_POINTER_REGNUM
)
3759 return frame
.stack_pointer_offset
;
3760 else if (from
!= FRAME_POINTER_REGNUM
)
3763 return frame
.stack_pointer_offset
- frame
.frame_pointer_offset
;
3767 /* Fill structure ix86_frame about frame of currently computed function. */
3770 ix86_compute_frame_layout (frame
)
3771 struct ix86_frame
*frame
;
3773 HOST_WIDE_INT total_size
;
3774 int stack_alignment_needed
= cfun
->stack_alignment_needed
/ BITS_PER_UNIT
;
3776 int preferred_alignment
= cfun
->preferred_stack_boundary
/ BITS_PER_UNIT
;
3777 HOST_WIDE_INT size
= get_frame_size ();
3779 frame
->nregs
= ix86_nsaved_regs ();
3782 /* Skip return value and save base pointer. */
3783 offset
= frame_pointer_needed
? UNITS_PER_WORD
* 2 : UNITS_PER_WORD
;
3785 frame
->hard_frame_pointer_offset
= offset
;
3787 /* Do some sanity checking of stack_alignment_needed and
3788 preferred_alignment, since i386 port is the only using those features
3789 that may break easilly. */
3791 if (size
&& !stack_alignment_needed
)
3793 if (preferred_alignment
< STACK_BOUNDARY
/ BITS_PER_UNIT
)
3795 if (preferred_alignment
> PREFERRED_STACK_BOUNDARY
/ BITS_PER_UNIT
)
3797 if (stack_alignment_needed
> PREFERRED_STACK_BOUNDARY
/ BITS_PER_UNIT
)
3800 if (stack_alignment_needed
< STACK_BOUNDARY
/ BITS_PER_UNIT
)
3801 stack_alignment_needed
= STACK_BOUNDARY
/ BITS_PER_UNIT
;
3803 /* Register save area */
3804 offset
+= frame
->nregs
* UNITS_PER_WORD
;
3807 if (ix86_save_varrargs_registers
)
3809 offset
+= X86_64_VARARGS_SIZE
;
3810 frame
->va_arg_size
= X86_64_VARARGS_SIZE
;
3813 frame
->va_arg_size
= 0;
3815 /* Align start of frame for local function. */
3816 frame
->padding1
= ((offset
+ stack_alignment_needed
- 1)
3817 & -stack_alignment_needed
) - offset
;
3819 offset
+= frame
->padding1
;
3821 /* Frame pointer points here. */
3822 frame
->frame_pointer_offset
= offset
;
3826 /* Add outgoing arguments area. */
3827 if (ACCUMULATE_OUTGOING_ARGS
)
3829 offset
+= current_function_outgoing_args_size
;
3830 frame
->outgoing_arguments_size
= current_function_outgoing_args_size
;
3833 frame
->outgoing_arguments_size
= 0;
3835 /* Align stack boundary. */
3836 frame
->padding2
= ((offset
+ preferred_alignment
- 1)
3837 & -preferred_alignment
) - offset
;
3839 offset
+= frame
->padding2
;
3841 /* We've reached end of stack frame. */
3842 frame
->stack_pointer_offset
= offset
;
3844 /* Size prologue needs to allocate. */
3845 frame
->to_allocate
=
3846 (size
+ frame
->padding1
+ frame
->padding2
3847 + frame
->outgoing_arguments_size
+ frame
->va_arg_size
);
3849 if (TARGET_64BIT
&& TARGET_RED_ZONE
&& current_function_sp_is_unchanging
3850 && current_function_is_leaf
)
3852 frame
->red_zone_size
= frame
->to_allocate
;
3853 if (frame
->red_zone_size
> RED_ZONE_SIZE
- RED_ZONE_RESERVE
)
3854 frame
->red_zone_size
= RED_ZONE_SIZE
- RED_ZONE_RESERVE
;
3857 frame
->red_zone_size
= 0;
3858 frame
->to_allocate
-= frame
->red_zone_size
;
3859 frame
->stack_pointer_offset
-= frame
->red_zone_size
;
3861 fprintf (stderr
, "nregs: %i\n", frame
->nregs
);
3862 fprintf (stderr
, "size: %i\n", size
);
3863 fprintf (stderr
, "alignment1: %i\n", stack_alignment_needed
);
3864 fprintf (stderr
, "padding1: %i\n", frame
->padding1
);
3865 fprintf (stderr
, "va_arg: %i\n", frame
->va_arg_size
);
3866 fprintf (stderr
, "padding2: %i\n", frame
->padding2
);
3867 fprintf (stderr
, "to_allocate: %i\n", frame
->to_allocate
);
3868 fprintf (stderr
, "red_zone_size: %i\n", frame
->red_zone_size
);
3869 fprintf (stderr
, "frame_pointer_offset: %i\n", frame
->frame_pointer_offset
);
3870 fprintf (stderr
, "hard_frame_pointer_offset: %i\n",
3871 frame
->hard_frame_pointer_offset
);
3872 fprintf (stderr
, "stack_pointer_offset: %i\n", frame
->stack_pointer_offset
);
3876 /* Emit code to save registers in the prologue. */
3879 ix86_emit_save_regs ()
3884 for (regno
= FIRST_PSEUDO_REGISTER
- 1; regno
>= 0; regno
--)
3885 if (ix86_save_reg (regno
, true))
3887 insn
= emit_insn (gen_push (gen_rtx_REG (Pmode
, regno
)));
3888 RTX_FRAME_RELATED_P (insn
) = 1;
3892 /* Emit code to save registers using MOV insns. First register
3893 is restored from POINTER + OFFSET. */
3895 ix86_emit_save_regs_using_mov (pointer
, offset
)
3897 HOST_WIDE_INT offset
;
3902 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
3903 if (ix86_save_reg (regno
, true))
3905 insn
= emit_move_insn (adjust_address (gen_rtx_MEM (Pmode
, pointer
),
3907 gen_rtx_REG (Pmode
, regno
));
3908 RTX_FRAME_RELATED_P (insn
) = 1;
3909 offset
+= UNITS_PER_WORD
;
3913 /* Expand the prologue into a bunch of separate insns. */
3916 ix86_expand_prologue ()
3919 int pic_reg_used
= (flag_pic
&& (current_function_uses_pic_offset_table
3920 || current_function_uses_const_pool
)
3922 struct ix86_frame frame
;
3924 HOST_WIDE_INT allocate
;
3928 use_fast_prologue_epilogue
3929 = !expensive_function_p (FAST_PROLOGUE_INSN_COUNT
);
3930 if (TARGET_PROLOGUE_USING_MOVE
)
3931 use_mov
= use_fast_prologue_epilogue
;
3933 ix86_compute_frame_layout (&frame
);
3935 /* Note: AT&T enter does NOT have reversed args. Enter is probably
3936 slower on all targets. Also sdb doesn't like it. */
3938 if (frame_pointer_needed
)
3940 insn
= emit_insn (gen_push (hard_frame_pointer_rtx
));
3941 RTX_FRAME_RELATED_P (insn
) = 1;
3943 insn
= emit_move_insn (hard_frame_pointer_rtx
, stack_pointer_rtx
);
3944 RTX_FRAME_RELATED_P (insn
) = 1;
3947 allocate
= frame
.to_allocate
;
3948 /* In case we are dealing only with single register and empty frame,
3949 push is equivalent of the mov+add sequence. */
3950 if (allocate
== 0 && frame
.nregs
<= 1)
3954 ix86_emit_save_regs ();
3956 allocate
+= frame
.nregs
* UNITS_PER_WORD
;
3960 else if (! TARGET_STACK_PROBE
|| allocate
< CHECK_STACK_LIMIT
)
3962 insn
= emit_insn (gen_pro_epilogue_adjust_stack
3963 (stack_pointer_rtx
, stack_pointer_rtx
,
3964 GEN_INT (-allocate
)));
3965 RTX_FRAME_RELATED_P (insn
) = 1;
3969 /* ??? Is this only valid for Win32? */
3976 arg0
= gen_rtx_REG (SImode
, 0);
3977 emit_move_insn (arg0
, GEN_INT (allocate
));
3979 sym
= gen_rtx_MEM (FUNCTION_MODE
,
3980 gen_rtx_SYMBOL_REF (Pmode
, "_alloca"));
3981 insn
= emit_call_insn (gen_call (sym
, const0_rtx
, constm1_rtx
));
3983 CALL_INSN_FUNCTION_USAGE (insn
)
3984 = gen_rtx_EXPR_LIST (VOIDmode
, gen_rtx_USE (VOIDmode
, arg0
),
3985 CALL_INSN_FUNCTION_USAGE (insn
));
3989 if (!frame_pointer_needed
|| !frame
.to_allocate
)
3990 ix86_emit_save_regs_using_mov (stack_pointer_rtx
, frame
.to_allocate
);
3992 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx
,
3993 -frame
.nregs
* UNITS_PER_WORD
);
3996 #ifdef SUBTARGET_PROLOGUE
4001 load_pic_register ();
4003 /* If we are profiling, make sure no instructions are scheduled before
4004 the call to mcount. However, if -fpic, the above call will have
4006 if ((profile_flag
|| profile_block_flag
) && ! pic_reg_used
)
4007 emit_insn (gen_blockage ());
4010 /* Emit code to restore saved registers using MOV insns. First register
4011 is restored from POINTER + OFFSET. */
4013 ix86_emit_restore_regs_using_mov (pointer
, offset
, maybe_eh_return
)
4016 int maybe_eh_return
;
4020 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
4021 if (ix86_save_reg (regno
, maybe_eh_return
))
4023 emit_move_insn (gen_rtx_REG (Pmode
, regno
),
4024 adjust_address (gen_rtx_MEM (Pmode
, pointer
),
4026 offset
+= UNITS_PER_WORD
;
4030 /* Restore function stack, frame, and registers. */
4033 ix86_expand_epilogue (style
)
4037 int sp_valid
= !frame_pointer_needed
|| current_function_sp_is_unchanging
;
4038 struct ix86_frame frame
;
4039 HOST_WIDE_INT offset
;
4041 ix86_compute_frame_layout (&frame
);
4043 /* Calculate start of saved registers relative to ebp. Special care
4044 must be taken for the normal return case of a function using
4045 eh_return: the eax and edx registers are marked as saved, but not
4046 restored along this path. */
4047 offset
= frame
.nregs
;
4048 if (current_function_calls_eh_return
&& style
!= 2)
4050 offset
*= -UNITS_PER_WORD
;
4052 #ifdef FUNCTION_BLOCK_PROFILER_EXIT
4053 if (profile_block_flag
== 2)
4055 FUNCTION_BLOCK_PROFILER_EXIT
;
4059 /* If we're only restoring one register and sp is not valid then
4060 using a move instruction to restore the register since it's
4061 less work than reloading sp and popping the register.
4063 The default code result in stack adjustment using add/lea instruction,
4064 while this code results in LEAVE instruction (or discrete equivalent),
4065 so it is profitable in some other cases as well. Especially when there
4066 are no registers to restore. We also use this code when TARGET_USE_LEAVE
4067 and there is exactly one register to pop. This heruistic may need some
4068 tuning in future. */
4069 if ((!sp_valid
&& frame
.nregs
<= 1)
4070 || (TARGET_EPILOGUE_USING_MOVE
4071 && use_fast_prologue_epilogue
4072 && (frame
.nregs
> 1 || frame
.to_allocate
))
4073 || (frame_pointer_needed
&& !frame
.nregs
&& frame
.to_allocate
)
4074 || (frame_pointer_needed
&& TARGET_USE_LEAVE
4075 && use_fast_prologue_epilogue
&& frame
.nregs
== 1)
4076 || current_function_calls_eh_return
)
4078 /* Restore registers. We can use ebp or esp to address the memory
4079 locations. If both are available, default to ebp, since offsets
4080 are known to be small. Only exception is esp pointing directly to the
4081 end of block of saved registers, where we may simplify addressing
4084 if (!frame_pointer_needed
|| (sp_valid
&& !frame
.to_allocate
))
4085 ix86_emit_restore_regs_using_mov (stack_pointer_rtx
,
4086 frame
.to_allocate
, style
== 2);
4088 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx
,
4089 offset
, style
== 2);
4091 /* eh_return epilogues need %ecx added to the stack pointer. */
4094 rtx tmp
, sa
= EH_RETURN_STACKADJ_RTX
;
4096 if (frame_pointer_needed
)
4098 tmp
= gen_rtx_PLUS (Pmode
, hard_frame_pointer_rtx
, sa
);
4099 tmp
= plus_constant (tmp
, UNITS_PER_WORD
);
4100 emit_insn (gen_rtx_SET (VOIDmode
, sa
, tmp
));
4102 tmp
= gen_rtx_MEM (Pmode
, hard_frame_pointer_rtx
);
4103 emit_move_insn (hard_frame_pointer_rtx
, tmp
);
4105 emit_insn (gen_pro_epilogue_adjust_stack
4106 (stack_pointer_rtx
, sa
, const0_rtx
));
4110 tmp
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, sa
);
4111 tmp
= plus_constant (tmp
, (frame
.to_allocate
4112 + frame
.nregs
* UNITS_PER_WORD
));
4113 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, tmp
));
4116 else if (!frame_pointer_needed
)
4117 emit_insn (gen_pro_epilogue_adjust_stack
4118 (stack_pointer_rtx
, stack_pointer_rtx
,
4119 GEN_INT (frame
.to_allocate
4120 + frame
.nregs
* UNITS_PER_WORD
)));
4121 /* If not an i386, mov & pop is faster than "leave". */
4122 else if (TARGET_USE_LEAVE
|| optimize_size
|| !use_fast_prologue_epilogue
)
4123 emit_insn (TARGET_64BIT
? gen_leave_rex64 () : gen_leave ());
4126 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx
,
4127 hard_frame_pointer_rtx
,
4130 emit_insn (gen_popdi1 (hard_frame_pointer_rtx
));
4132 emit_insn (gen_popsi1 (hard_frame_pointer_rtx
));
4137 /* First step is to deallocate the stack frame so that we can
4138 pop the registers. */
4141 if (!frame_pointer_needed
)
4143 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx
,
4144 hard_frame_pointer_rtx
,
4147 else if (frame
.to_allocate
)
4148 emit_insn (gen_pro_epilogue_adjust_stack
4149 (stack_pointer_rtx
, stack_pointer_rtx
,
4150 GEN_INT (frame
.to_allocate
)));
4152 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
4153 if (ix86_save_reg (regno
, false))
4156 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode
, regno
)));
4158 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode
, regno
)));
4160 if (frame_pointer_needed
)
4162 /* Leave results in shorter depdendancy chains on CPUs that are
4163 able to grok it fast. */
4164 if (TARGET_USE_LEAVE
)
4165 emit_insn (TARGET_64BIT
? gen_leave_rex64 () : gen_leave ());
4166 else if (TARGET_64BIT
)
4167 emit_insn (gen_popdi1 (hard_frame_pointer_rtx
));
4169 emit_insn (gen_popsi1 (hard_frame_pointer_rtx
));
4173 /* Sibcall epilogues don't want a return instruction. */
4177 if (current_function_pops_args
&& current_function_args_size
)
4179 rtx popc
= GEN_INT (current_function_pops_args
);
4181 /* i386 can only pop 64K bytes. If asked to pop more, pop
4182 return address, do explicit add, and jump indirectly to the
4185 if (current_function_pops_args
>= 65536)
4187 rtx ecx
= gen_rtx_REG (SImode
, 2);
4189 /* There are is no "pascal" calling convention in 64bit ABI. */
4193 emit_insn (gen_popsi1 (ecx
));
4194 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, popc
));
4195 emit_jump_insn (gen_return_indirect_internal (ecx
));
4198 emit_jump_insn (gen_return_pop_internal (popc
));
4201 emit_jump_insn (gen_return_internal ());
4204 /* Extract the parts of an RTL expression that is a valid memory address
4205 for an instruction. Return false if the structure of the address is
4209 ix86_decompose_address (addr
, out
)
4211 struct ix86_address
*out
;
4213 rtx base
= NULL_RTX
;
4214 rtx index
= NULL_RTX
;
4215 rtx disp
= NULL_RTX
;
4216 HOST_WIDE_INT scale
= 1;
4217 rtx scale_rtx
= NULL_RTX
;
4219 if (GET_CODE (addr
) == REG
|| GET_CODE (addr
) == SUBREG
)
4221 else if (GET_CODE (addr
) == PLUS
)
4223 rtx op0
= XEXP (addr
, 0);
4224 rtx op1
= XEXP (addr
, 1);
4225 enum rtx_code code0
= GET_CODE (op0
);
4226 enum rtx_code code1
= GET_CODE (op1
);
4228 if (code0
== REG
|| code0
== SUBREG
)
4230 if (code1
== REG
|| code1
== SUBREG
)
4231 index
= op0
, base
= op1
; /* index + base */
4233 base
= op0
, disp
= op1
; /* base + displacement */
4235 else if (code0
== MULT
)
4237 index
= XEXP (op0
, 0);
4238 scale_rtx
= XEXP (op0
, 1);
4239 if (code1
== REG
|| code1
== SUBREG
)
4240 base
= op1
; /* index*scale + base */
4242 disp
= op1
; /* index*scale + disp */
4244 else if (code0
== PLUS
&& GET_CODE (XEXP (op0
, 0)) == MULT
)
4246 index
= XEXP (XEXP (op0
, 0), 0); /* index*scale + base + disp */
4247 scale_rtx
= XEXP (XEXP (op0
, 0), 1);
4248 base
= XEXP (op0
, 1);
4251 else if (code0
== PLUS
)
4253 index
= XEXP (op0
, 0); /* index + base + disp */
4254 base
= XEXP (op0
, 1);
4260 else if (GET_CODE (addr
) == MULT
)
4262 index
= XEXP (addr
, 0); /* index*scale */
4263 scale_rtx
= XEXP (addr
, 1);
4265 else if (GET_CODE (addr
) == ASHIFT
)
4269 /* We're called for lea too, which implements ashift on occasion. */
4270 index
= XEXP (addr
, 0);
4271 tmp
= XEXP (addr
, 1);
4272 if (GET_CODE (tmp
) != CONST_INT
)
4274 scale
= INTVAL (tmp
);
4275 if ((unsigned HOST_WIDE_INT
) scale
> 3)
4280 disp
= addr
; /* displacement */
4282 /* Extract the integral value of scale. */
4285 if (GET_CODE (scale_rtx
) != CONST_INT
)
4287 scale
= INTVAL (scale_rtx
);
4290 /* Allow arg pointer and stack pointer as index if there is not scaling */
4291 if (base
&& index
&& scale
== 1
4292 && (index
== arg_pointer_rtx
|| index
== frame_pointer_rtx
4293 || index
== stack_pointer_rtx
))
4300 /* Special case: %ebp cannot be encoded as a base without a displacement. */
4301 if ((base
== hard_frame_pointer_rtx
4302 || base
== frame_pointer_rtx
4303 || base
== arg_pointer_rtx
) && !disp
)
4306 /* Special case: on K6, [%esi] makes the instruction vector decoded.
4307 Avoid this by transforming to [%esi+0]. */
4308 if (ix86_cpu
== PROCESSOR_K6
&& !optimize_size
4309 && base
&& !index
&& !disp
4311 && REGNO_REG_CLASS (REGNO (base
)) == SIREG
)
4314 /* Special case: encode reg+reg instead of reg*2. */
4315 if (!base
&& index
&& scale
&& scale
== 2)
4316 base
= index
, scale
= 1;
4318 /* Special case: scaling cannot be encoded without base or displacement. */
4319 if (!base
&& !disp
&& index
&& scale
!= 1)
4330 /* Return cost of the memory address x.
4331 For i386, it is better to use a complex address than let gcc copy
4332 the address into a reg and make a new pseudo. But not if the address
4333 requires to two regs - that would mean more pseudos with longer
4336 ix86_address_cost (x
)
4339 struct ix86_address parts
;
4342 if (!ix86_decompose_address (x
, &parts
))
4345 /* More complex memory references are better. */
4346 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
4349 /* Attempt to minimize number of registers in the address. */
4351 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
))
4353 && (!REG_P (parts
.index
)
4354 || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)))
4358 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
)
4360 && (!REG_P (parts
.index
) || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)
4361 && parts
.base
!= parts
.index
)
4364 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
4365 since it's predecode logic can't detect the length of instructions
4366 and it degenerates to vector decoded. Increase cost of such
4367 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
4368 to split such addresses or even refuse such addresses at all.
4370 Following addressing modes are affected:
4375 The first and last case may be avoidable by explicitly coding the zero in
4376 memory address, but I don't have AMD-K6 machine handy to check this
4380 && ((!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
!= 1)
4381 || (parts
.disp
&& !parts
.base
&& parts
.index
&& parts
.scale
!= 1)
4382 || (!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
== 1)))
4388 /* If X is a machine specific address (i.e. a symbol or label being
4389 referenced as a displacement from the GOT implemented using an
4390 UNSPEC), then return the base term. Otherwise return X. */
4393 ix86_find_base_term (x
)
4400 if (GET_CODE (x
) != CONST
)
4403 if (GET_CODE (term
) == PLUS
4404 && (GET_CODE (XEXP (term
, 1)) == CONST_INT
4405 || GET_CODE (XEXP (term
, 1)) == CONST_DOUBLE
))
4406 term
= XEXP (term
, 0);
4407 if (GET_CODE (term
) != UNSPEC
4408 || XVECLEN (term
, 0) != 1
4409 || XINT (term
, 1) != 15)
4412 term
= XVECEXP (term
, 0, 0);
4414 if (GET_CODE (term
) != SYMBOL_REF
4415 && GET_CODE (term
) != LABEL_REF
)
4421 if (GET_CODE (x
) != PLUS
4422 || XEXP (x
, 0) != pic_offset_table_rtx
4423 || GET_CODE (XEXP (x
, 1)) != CONST
)
4426 term
= XEXP (XEXP (x
, 1), 0);
4428 if (GET_CODE (term
) == PLUS
&& GET_CODE (XEXP (term
, 1)) == CONST_INT
)
4429 term
= XEXP (term
, 0);
4431 if (GET_CODE (term
) != UNSPEC
4432 || XVECLEN (term
, 0) != 1
4433 || XINT (term
, 1) != 7)
4436 term
= XVECEXP (term
, 0, 0);
4438 if (GET_CODE (term
) != SYMBOL_REF
4439 && GET_CODE (term
) != LABEL_REF
)
4445 /* Determine if a given CONST RTX is a valid memory displacement
4449 legitimate_pic_address_disp_p (disp
)
4452 /* In 64bit mode we can allow direct addresses of symbols and labels
4453 when they are not dynamic symbols. */
4457 if (GET_CODE (disp
) == CONST
)
4459 /* ??? Handle PIC code models */
4460 if (GET_CODE (x
) == PLUS
4461 && (GET_CODE (XEXP (x
, 1)) == CONST_INT
4462 && ix86_cmodel
== CM_SMALL_PIC
4463 && INTVAL (XEXP (x
, 1)) < 1024*1024*1024
4464 && INTVAL (XEXP (x
, 1)) > -1024*1024*1024))
4466 if (local_symbolic_operand (x
, Pmode
))
4469 if (GET_CODE (disp
) != CONST
)
4471 disp
= XEXP (disp
, 0);
4475 /* We are unsafe to allow PLUS expressions. This limit allowed distance
4476 of GOT tables. We should not need these anyway. */
4477 if (GET_CODE (disp
) != UNSPEC
4478 || XVECLEN (disp
, 0) != 1
4479 || XINT (disp
, 1) != 15)
4482 if (GET_CODE (XVECEXP (disp
, 0, 0)) != SYMBOL_REF
4483 && GET_CODE (XVECEXP (disp
, 0, 0)) != LABEL_REF
)
4488 if (GET_CODE (disp
) == PLUS
)
4490 if (GET_CODE (XEXP (disp
, 1)) != CONST_INT
)
4492 disp
= XEXP (disp
, 0);
4495 if (GET_CODE (disp
) != UNSPEC
4496 || XVECLEN (disp
, 0) != 1)
4499 /* Must be @GOT or @GOTOFF. */
4500 switch (XINT (disp
, 1))
4503 return GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
;
4505 case 7: /* @GOTOFF */
4506 return local_symbolic_operand (XVECEXP (disp
, 0, 0), Pmode
);
4512 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
4513 memory address for an instruction. The MODE argument is the machine mode
4514 for the MEM expression that wants to use this address.
4516 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
4517 convert common non-canonical forms to canonical form so that they will
4521 legitimate_address_p (mode
, addr
, strict
)
4522 enum machine_mode mode
;
4526 struct ix86_address parts
;
4527 rtx base
, index
, disp
;
4528 HOST_WIDE_INT scale
;
4529 const char *reason
= NULL
;
4530 rtx reason_rtx
= NULL_RTX
;
4532 if (TARGET_DEBUG_ADDR
)
4535 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
4536 GET_MODE_NAME (mode
), strict
);
4540 if (! ix86_decompose_address (addr
, &parts
))
4542 reason
= "decomposition failed";
4547 index
= parts
.index
;
4549 scale
= parts
.scale
;
4551 /* Validate base register.
4553 Don't allow SUBREG's here, it can lead to spill failures when the base
4554 is one word out of a two word structure, which is represented internally
4561 if (GET_CODE (base
) != REG
)
4563 reason
= "base is not a register";
4567 if (GET_MODE (base
) != Pmode
)
4569 reason
= "base is not in Pmode";
4573 if ((strict
&& ! REG_OK_FOR_BASE_STRICT_P (base
))
4574 || (! strict
&& ! REG_OK_FOR_BASE_NONSTRICT_P (base
)))
4576 reason
= "base is not valid";
4581 /* Validate index register.
4583 Don't allow SUBREG's here, it can lead to spill failures when the index
4584 is one word out of a two word structure, which is represented internally
4591 if (GET_CODE (index
) != REG
)
4593 reason
= "index is not a register";
4597 if (GET_MODE (index
) != Pmode
)
4599 reason
= "index is not in Pmode";
4603 if ((strict
&& ! REG_OK_FOR_INDEX_STRICT_P (index
))
4604 || (! strict
&& ! REG_OK_FOR_INDEX_NONSTRICT_P (index
)))
4606 reason
= "index is not valid";
4611 /* Validate scale factor. */
4614 reason_rtx
= GEN_INT (scale
);
4617 reason
= "scale without index";
4621 if (scale
!= 2 && scale
!= 4 && scale
!= 8)
4623 reason
= "scale is not a valid multiplier";
4628 /* Validate displacement. */
4633 if (!CONSTANT_ADDRESS_P (disp
))
4635 reason
= "displacement is not constant";
4641 if (!x86_64_sign_extended_value (disp
))
4643 reason
= "displacement is out of range";
4649 if (GET_CODE (disp
) == CONST_DOUBLE
)
4651 reason
= "displacement is a const_double";
4656 if (flag_pic
&& SYMBOLIC_CONST (disp
))
4658 if (TARGET_64BIT
&& (index
|| base
))
4660 reason
= "non-constant pic memory reference";
4663 if (! legitimate_pic_address_disp_p (disp
))
4665 reason
= "displacement is an invalid pic construct";
4669 /* This code used to verify that a symbolic pic displacement
4670 includes the pic_offset_table_rtx register.
4672 While this is good idea, unfortunately these constructs may
4673 be created by "adds using lea" optimization for incorrect
4682 This code is nonsensical, but results in addressing
4683 GOT table with pic_offset_table_rtx base. We can't
4684 just refuse it easilly, since it gets matched by
4685 "addsi3" pattern, that later gets split to lea in the
4686 case output register differs from input. While this
4687 can be handled by separate addsi pattern for this case
4688 that never results in lea, this seems to be easier and
4689 correct fix for crash to disable this test. */
4691 else if (HALF_PIC_P ())
4693 if (! HALF_PIC_ADDRESS_P (disp
)
4694 || (base
!= NULL_RTX
|| index
!= NULL_RTX
))
4696 reason
= "displacement is an invalid half-pic reference";
4702 /* Everything looks valid. */
4703 if (TARGET_DEBUG_ADDR
)
4704 fprintf (stderr
, "Success.\n");
4708 if (TARGET_DEBUG_ADDR
)
4710 fprintf (stderr
, "Error: %s\n", reason
);
4711 debug_rtx (reason_rtx
);
4716 /* Return an unique alias set for the GOT. */
4718 static HOST_WIDE_INT
4719 ix86_GOT_alias_set ()
4721 static HOST_WIDE_INT set
= -1;
4723 set
= new_alias_set ();
4727 /* Return a legitimate reference for ORIG (an address) using the
4728 register REG. If REG is 0, a new pseudo is generated.
4730 There are two types of references that must be handled:
4732 1. Global data references must load the address from the GOT, via
4733 the PIC reg. An insn is emitted to do this load, and the reg is
4736 2. Static data references, constant pool addresses, and code labels
4737 compute the address as an offset from the GOT, whose base is in
4738 the PIC reg. Static data objects have SYMBOL_REF_FLAG set to
4739 differentiate them from global data objects. The returned
4740 address is the PIC reg + an unspec constant.
4742 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
4743 reg also appears in the address. */
4746 legitimize_pic_address (orig
, reg
)
4754 if (local_symbolic_operand (addr
, Pmode
))
4756 /* In 64bit mode we can address such objects directly. */
4761 /* This symbol may be referenced via a displacement from the PIC
4762 base address (@GOTOFF). */
4764 current_function_uses_pic_offset_table
= 1;
4765 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), 7);
4766 new = gen_rtx_CONST (Pmode
, new);
4767 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
4771 emit_move_insn (reg
, new);
4776 else if (GET_CODE (addr
) == SYMBOL_REF
)
4780 current_function_uses_pic_offset_table
= 1;
4781 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), 15);
4782 new = gen_rtx_CONST (Pmode
, new);
4783 new = gen_rtx_MEM (Pmode
, new);
4784 RTX_UNCHANGING_P (new) = 1;
4785 set_mem_alias_set (new, ix86_GOT_alias_set ());
4788 reg
= gen_reg_rtx (Pmode
);
4789 /* Use directly gen_movsi, otherwise the address is loaded
4790 into register for CSE. We don't want to CSE this addresses,
4791 instead we CSE addresses from the GOT table, so skip this. */
4792 emit_insn (gen_movsi (reg
, new));
4797 /* This symbol must be referenced via a load from the
4798 Global Offset Table (@GOT). */
4800 current_function_uses_pic_offset_table
= 1;
4801 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), 6);
4802 new = gen_rtx_CONST (Pmode
, new);
4803 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
4804 new = gen_rtx_MEM (Pmode
, new);
4805 RTX_UNCHANGING_P (new) = 1;
4806 set_mem_alias_set (new, ix86_GOT_alias_set ());
4809 reg
= gen_reg_rtx (Pmode
);
4810 emit_move_insn (reg
, new);
4816 if (GET_CODE (addr
) == CONST
)
4818 addr
= XEXP (addr
, 0);
4819 if (GET_CODE (addr
) == UNSPEC
)
4821 /* Check that the unspec is one of the ones we generate? */
4823 else if (GET_CODE (addr
) != PLUS
)
4826 if (GET_CODE (addr
) == PLUS
)
4828 rtx op0
= XEXP (addr
, 0), op1
= XEXP (addr
, 1);
4830 /* Check first to see if this is a constant offset from a @GOTOFF
4831 symbol reference. */
4832 if (local_symbolic_operand (op0
, Pmode
)
4833 && GET_CODE (op1
) == CONST_INT
)
4837 current_function_uses_pic_offset_table
= 1;
4838 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, op0
), 7);
4839 new = gen_rtx_PLUS (Pmode
, new, op1
);
4840 new = gen_rtx_CONST (Pmode
, new);
4841 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
4845 emit_move_insn (reg
, new);
4851 /* ??? We need to limit offsets here. */
4856 base
= legitimize_pic_address (XEXP (addr
, 0), reg
);
4857 new = legitimize_pic_address (XEXP (addr
, 1),
4858 base
== reg
? NULL_RTX
: reg
);
4860 if (GET_CODE (new) == CONST_INT
)
4861 new = plus_constant (base
, INTVAL (new));
4864 if (GET_CODE (new) == PLUS
&& CONSTANT_P (XEXP (new, 1)))
4866 base
= gen_rtx_PLUS (Pmode
, base
, XEXP (new, 0));
4867 new = XEXP (new, 1);
4869 new = gen_rtx_PLUS (Pmode
, base
, new);
4877 /* Try machine-dependent ways of modifying an illegitimate address
4878 to be legitimate. If we find one, return the new, valid address.
4879 This macro is used in only one place: `memory_address' in explow.c.
4881 OLDX is the address as it was before break_out_memory_refs was called.
4882 In some cases it is useful to look at this to decide what needs to be done.
4884 MODE and WIN are passed so that this macro can use
4885 GO_IF_LEGITIMATE_ADDRESS.
4887 It is always safe for this macro to do nothing. It exists to recognize
4888 opportunities to optimize the output.
4890 For the 80386, we handle X+REG by loading X into a register R and
4891 using R+REG. R will go in a general reg and indexing will be used.
4892 However, if REG is a broken-out memory address or multiplication,
4893 nothing needs to be done because REG can certainly go in a general reg.
4895 When -fpic is used, special handling is needed for symbolic references.
4896 See comments by legitimize_pic_address in i386.c for details. */
4899 legitimize_address (x
, oldx
, mode
)
4901 register rtx oldx ATTRIBUTE_UNUSED
;
4902 enum machine_mode mode
;
4907 if (TARGET_DEBUG_ADDR
)
4909 fprintf (stderr
, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
4910 GET_MODE_NAME (mode
));
4914 if (flag_pic
&& SYMBOLIC_CONST (x
))
4915 return legitimize_pic_address (x
, 0);
4917 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
4918 if (GET_CODE (x
) == ASHIFT
4919 && GET_CODE (XEXP (x
, 1)) == CONST_INT
4920 && (log
= (unsigned)exact_log2 (INTVAL (XEXP (x
, 1)))) < 4)
4923 x
= gen_rtx_MULT (Pmode
, force_reg (Pmode
, XEXP (x
, 0)),
4924 GEN_INT (1 << log
));
4927 if (GET_CODE (x
) == PLUS
)
4929 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
4931 if (GET_CODE (XEXP (x
, 0)) == ASHIFT
4932 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == CONST_INT
4933 && (log
= (unsigned)exact_log2 (INTVAL (XEXP (XEXP (x
, 0), 1)))) < 4)
4936 XEXP (x
, 0) = gen_rtx_MULT (Pmode
,
4937 force_reg (Pmode
, XEXP (XEXP (x
, 0), 0)),
4938 GEN_INT (1 << log
));
4941 if (GET_CODE (XEXP (x
, 1)) == ASHIFT
4942 && GET_CODE (XEXP (XEXP (x
, 1), 1)) == CONST_INT
4943 && (log
= (unsigned)exact_log2 (INTVAL (XEXP (XEXP (x
, 1), 1)))) < 4)
4946 XEXP (x
, 1) = gen_rtx_MULT (Pmode
,
4947 force_reg (Pmode
, XEXP (XEXP (x
, 1), 0)),
4948 GEN_INT (1 << log
));
4951 /* Put multiply first if it isn't already. */
4952 if (GET_CODE (XEXP (x
, 1)) == MULT
)
4954 rtx tmp
= XEXP (x
, 0);
4955 XEXP (x
, 0) = XEXP (x
, 1);
4960 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
4961 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
4962 created by virtual register instantiation, register elimination, and
4963 similar optimizations. */
4964 if (GET_CODE (XEXP (x
, 0)) == MULT
&& GET_CODE (XEXP (x
, 1)) == PLUS
)
4967 x
= gen_rtx_PLUS (Pmode
,
4968 gen_rtx_PLUS (Pmode
, XEXP (x
, 0),
4969 XEXP (XEXP (x
, 1), 0)),
4970 XEXP (XEXP (x
, 1), 1));
4974 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
4975 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
4976 else if (GET_CODE (x
) == PLUS
&& GET_CODE (XEXP (x
, 0)) == PLUS
4977 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
4978 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == PLUS
4979 && CONSTANT_P (XEXP (x
, 1)))
4982 rtx other
= NULL_RTX
;
4984 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
4986 constant
= XEXP (x
, 1);
4987 other
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
4989 else if (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 1), 1)) == CONST_INT
)
4991 constant
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
4992 other
= XEXP (x
, 1);
5000 x
= gen_rtx_PLUS (Pmode
,
5001 gen_rtx_PLUS (Pmode
, XEXP (XEXP (x
, 0), 0),
5002 XEXP (XEXP (XEXP (x
, 0), 1), 0)),
5003 plus_constant (other
, INTVAL (constant
)));
5007 if (changed
&& legitimate_address_p (mode
, x
, FALSE
))
5010 if (GET_CODE (XEXP (x
, 0)) == MULT
)
5013 XEXP (x
, 0) = force_operand (XEXP (x
, 0), 0);
5016 if (GET_CODE (XEXP (x
, 1)) == MULT
)
5019 XEXP (x
, 1) = force_operand (XEXP (x
, 1), 0);
5023 && GET_CODE (XEXP (x
, 1)) == REG
5024 && GET_CODE (XEXP (x
, 0)) == REG
)
5027 if (flag_pic
&& SYMBOLIC_CONST (XEXP (x
, 1)))
5030 x
= legitimize_pic_address (x
, 0);
5033 if (changed
&& legitimate_address_p (mode
, x
, FALSE
))
5036 if (GET_CODE (XEXP (x
, 0)) == REG
)
5038 register rtx temp
= gen_reg_rtx (Pmode
);
5039 register rtx val
= force_operand (XEXP (x
, 1), temp
);
5041 emit_move_insn (temp
, val
);
5047 else if (GET_CODE (XEXP (x
, 1)) == REG
)
5049 register rtx temp
= gen_reg_rtx (Pmode
);
5050 register rtx val
= force_operand (XEXP (x
, 0), temp
);
5052 emit_move_insn (temp
, val
);
5062 /* Print an integer constant expression in assembler syntax. Addition
5063 and subtraction are the only arithmetic that may appear in these
5064 expressions. FILE is the stdio stream to write to, X is the rtx, and
5065 CODE is the operand print code from the output string. */
5068 output_pic_addr_const (file
, x
, code
)
5075 switch (GET_CODE (x
))
5085 assemble_name (file
, XSTR (x
, 0));
5086 if (code
== 'P' && ! SYMBOL_REF_FLAG (x
))
5087 fputs ("@PLT", file
);
5094 ASM_GENERATE_INTERNAL_LABEL (buf
, "L", CODE_LABEL_NUMBER (x
));
5095 assemble_name (asm_out_file
, buf
);
5099 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
5103 /* This used to output parentheses around the expression,
5104 but that does not work on the 386 (either ATT or BSD assembler). */
5105 output_pic_addr_const (file
, XEXP (x
, 0), code
);
5109 if (GET_MODE (x
) == VOIDmode
)
5111 /* We can use %d if the number is <32 bits and positive. */
5112 if (CONST_DOUBLE_HIGH (x
) || CONST_DOUBLE_LOW (x
) < 0)
5113 fprintf (file
, "0x%lx%08lx",
5114 (unsigned long) CONST_DOUBLE_HIGH (x
),
5115 (unsigned long) CONST_DOUBLE_LOW (x
));
5117 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, CONST_DOUBLE_LOW (x
));
5120 /* We can't handle floating point constants;
5121 PRINT_OPERAND must handle them. */
5122 output_operand_lossage ("floating constant misused");
5126 /* Some assemblers need integer constants to appear first. */
5127 if (GET_CODE (XEXP (x
, 0)) == CONST_INT
)
5129 output_pic_addr_const (file
, XEXP (x
, 0), code
);
5131 output_pic_addr_const (file
, XEXP (x
, 1), code
);
5133 else if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
5135 output_pic_addr_const (file
, XEXP (x
, 1), code
);
5137 output_pic_addr_const (file
, XEXP (x
, 0), code
);
5144 putc (ASSEMBLER_DIALECT
? '(' : '[', file
);
5145 output_pic_addr_const (file
, XEXP (x
, 0), code
);
5147 output_pic_addr_const (file
, XEXP (x
, 1), code
);
5148 putc (ASSEMBLER_DIALECT
? ')' : ']', file
);
5152 if (XVECLEN (x
, 0) != 1)
5154 output_pic_addr_const (file
, XVECEXP (x
, 0, 0), code
);
5155 switch (XINT (x
, 1))
5158 fputs ("@GOT", file
);
5161 fputs ("@GOTOFF", file
);
5164 fputs ("@PLT", file
);
5167 fputs ("@GOTPCREL(%RIP)", file
);
5170 output_operand_lossage ("invalid UNSPEC as operand");
5176 output_operand_lossage ("invalid expression as operand");
5180 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
5181 We need to handle our special PIC relocations. */
5184 i386_dwarf_output_addr_const (file
, x
)
5189 fprintf (file
, "%s", TARGET_64BIT
? ASM_QUAD
: INT_ASM_OP
);
5193 fprintf (file
, "%s", INT_ASM_OP
);
5196 output_pic_addr_const (file
, x
, '\0');
5198 output_addr_const (file
, x
);
5202 /* In the name of slightly smaller debug output, and to cater to
5203 general assembler losage, recognize PIC+GOTOFF and turn it back
5204 into a direct symbol reference. */
5207 i386_simplify_dwarf_addr (orig_x
)
5214 if (GET_CODE (x
) != CONST
5215 || GET_CODE (XEXP (x
, 0)) != UNSPEC
5216 || XINT (XEXP (x
, 0), 1) != 15)
5218 return XVECEXP (XEXP (x
, 0), 0, 0);
5221 if (GET_CODE (x
) != PLUS
5222 || GET_CODE (XEXP (x
, 0)) != REG
5223 || GET_CODE (XEXP (x
, 1)) != CONST
)
5226 x
= XEXP (XEXP (x
, 1), 0);
5227 if (GET_CODE (x
) == UNSPEC
5228 && (XINT (x
, 1) == 6
5229 || XINT (x
, 1) == 7))
5230 return XVECEXP (x
, 0, 0);
5232 if (GET_CODE (x
) == PLUS
5233 && GET_CODE (XEXP (x
, 0)) == UNSPEC
5234 && GET_CODE (XEXP (x
, 1)) == CONST_INT
5235 && (XINT (XEXP (x
, 0), 1) == 6
5236 || XINT (XEXP (x
, 0), 1) == 7))
5237 return gen_rtx_PLUS (VOIDmode
, XVECEXP (XEXP (x
, 0), 0, 0), XEXP (x
, 1));
5243 put_condition_code (code
, mode
, reverse
, fp
, file
)
5245 enum machine_mode mode
;
5251 if (mode
== CCFPmode
|| mode
== CCFPUmode
)
5253 enum rtx_code second_code
, bypass_code
;
5254 ix86_fp_comparison_codes (code
, &bypass_code
, &code
, &second_code
);
5255 if (bypass_code
!= NIL
|| second_code
!= NIL
)
5257 code
= ix86_fp_compare_code_to_integer (code
);
5261 code
= reverse_condition (code
);
5272 if (mode
!= CCmode
&& mode
!= CCNOmode
&& mode
!= CCGCmode
)
5277 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
5278 Those same assemblers have the same but opposite losage on cmov. */
5281 suffix
= fp
? "nbe" : "a";
5284 if (mode
== CCNOmode
|| mode
== CCGOCmode
)
5286 else if (mode
== CCmode
|| mode
== CCGCmode
)
5297 if (mode
== CCNOmode
|| mode
== CCGOCmode
)
5299 else if (mode
== CCmode
|| mode
== CCGCmode
)
5308 suffix
= fp
? "nb" : "ae";
5311 if (mode
!= CCmode
&& mode
!= CCGCmode
&& mode
!= CCNOmode
)
5321 suffix
= fp
? "u" : "p";
5324 suffix
= fp
? "nu" : "np";
5329 fputs (suffix
, file
);
5333 print_reg (x
, code
, file
)
5338 if (REGNO (x
) == ARG_POINTER_REGNUM
5339 || REGNO (x
) == FRAME_POINTER_REGNUM
5340 || REGNO (x
) == FLAGS_REG
5341 || REGNO (x
) == FPSR_REG
)
5344 if (ASSEMBLER_DIALECT
== 0 || USER_LABEL_PREFIX
[0] == 0)
5347 if (code
== 'w' || MMX_REG_P (x
))
5349 else if (code
== 'b')
5351 else if (code
== 'k')
5353 else if (code
== 'q')
5355 else if (code
== 'y')
5357 else if (code
== 'h')
5360 code
= GET_MODE_SIZE (GET_MODE (x
));
5362 /* Irritatingly, AMD extended registers use different naming convention
5363 from the normal registers. */
5364 if (REX_INT_REG_P (x
))
5371 error ("Extended registers have no high halves\n");
5374 fprintf (file
, "r%ib", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
5377 fprintf (file
, "r%iw", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
5380 fprintf (file
, "r%id", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
5383 fprintf (file
, "r%i", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
5386 error ("Unsupported operand size for extended register.\n");
5394 if (STACK_TOP_P (x
))
5396 fputs ("st(0)", file
);
5403 if (! ANY_FP_REG_P (x
))
5404 putc (code
== 8 && TARGET_64BIT
? 'r' : 'e', file
);
5408 fputs (hi_reg_name
[REGNO (x
)], file
);
5411 fputs (qi_reg_name
[REGNO (x
)], file
);
5414 fputs (qi_high_reg_name
[REGNO (x
)], file
);
5422 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
5423 C -- print opcode suffix for set/cmov insn.
5424 c -- like C, but print reversed condition
5425 F,f -- likewise, but for floating-point.
5426 R -- print the prefix for register names.
5427 z -- print the opcode suffix for the size of the current operand.
5428 * -- print a star (in certain assembler syntax)
5429 A -- print an absolute memory reference.
5430 w -- print the operand as if it's a "word" (HImode) even if it isn't.
5431 s -- print a shift double count, followed by the assemblers argument
5433 b -- print the QImode name of the register for the indicated operand.
5434 %b0 would print %al if operands[0] is reg 0.
5435 w -- likewise, print the HImode name of the register.
5436 k -- likewise, print the SImode name of the register.
5437 q -- likewise, print the DImode name of the register.
5438 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
5439 y -- print "st(0)" instead of "st" as a register.
5440 D -- print condition for SSE cmp instruction.
5441 P -- if PIC, print an @PLT suffix.
5442 X -- don't print any sort of PIC '@' suffix for a symbol.
5446 print_operand (file
, x
, code
)
5456 if (ASSEMBLER_DIALECT
== 0)
5461 if (ASSEMBLER_DIALECT
== 0)
5463 else if (ASSEMBLER_DIALECT
== 1)
5465 /* Intel syntax. For absolute addresses, registers should not
5466 be surrounded by braces. */
5467 if (GET_CODE (x
) != REG
)
5470 PRINT_OPERAND (file
, x
, 0);
5476 PRINT_OPERAND (file
, x
, 0);
5481 if (ASSEMBLER_DIALECT
== 0)
5486 if (ASSEMBLER_DIALECT
== 0)
5491 if (ASSEMBLER_DIALECT
== 0)
5496 if (ASSEMBLER_DIALECT
== 0)
5501 if (ASSEMBLER_DIALECT
== 0)
5506 if (ASSEMBLER_DIALECT
== 0)
5511 /* 387 opcodes don't get size suffixes if the operands are
5514 if (STACK_REG_P (x
))
5517 /* this is the size of op from size of operand */
5518 switch (GET_MODE_SIZE (GET_MODE (x
)))
5521 #ifdef HAVE_GAS_FILDS_FISTS
5527 if (GET_MODE (x
) == SFmode
)
5542 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
5544 #ifdef GAS_MNEMONICS
5570 if (GET_CODE (x
) == CONST_INT
|| ! SHIFT_DOUBLE_OMITS_COUNT
)
5572 PRINT_OPERAND (file
, x
, 0);
5578 /* Little bit of braindamage here. The SSE compare instructions
5579 does use completely different names for the comparisons that the
5580 fp conditional moves. */
5581 switch (GET_CODE (x
))
5596 fputs ("unord", file
);
5600 fputs ("neq", file
);
5604 fputs ("nlt", file
);
5608 fputs ("nle", file
);
5611 fputs ("ord", file
);
5619 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 0, 0, file
);
5622 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 0, 1, file
);
5625 /* Like above, but reverse condition */
5627 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 1, 0, file
);
5630 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 1, 1, file
);
5636 if (!optimize
|| optimize_size
|| !TARGET_BRANCH_PREDICTION_HINTS
)
5639 x
= find_reg_note (current_output_insn
, REG_BR_PROB
, 0);
5642 int pred_val
= INTVAL (XEXP (x
, 0));
5644 if (pred_val
< REG_BR_PROB_BASE
* 45 / 100
5645 || pred_val
> REG_BR_PROB_BASE
* 55 / 100)
5647 int taken
= pred_val
> REG_BR_PROB_BASE
/ 2;
5648 int cputaken
= final_forward_branch_p (current_output_insn
) == 0;
5650 /* Emit hints only in the case default branch prediction
5651 heruistics would fail. */
5652 if (taken
!= cputaken
)
5654 /* We use 3e (DS) prefix for taken branches and
5655 2e (CS) prefix for not taken branches. */
5657 fputs ("ds ; ", file
);
5659 fputs ("cs ; ", file
);
5668 sprintf (str
, "invalid operand code `%c'", code
);
5669 output_operand_lossage (str
);
5674 if (GET_CODE (x
) == REG
)
5676 PRINT_REG (x
, code
, file
);
5679 else if (GET_CODE (x
) == MEM
)
5681 /* No `byte ptr' prefix for call instructions. */
5682 if (ASSEMBLER_DIALECT
!= 0 && code
!= 'X' && code
!= 'P')
5685 switch (GET_MODE_SIZE (GET_MODE (x
)))
5687 case 1: size
= "BYTE"; break;
5688 case 2: size
= "WORD"; break;
5689 case 4: size
= "DWORD"; break;
5690 case 8: size
= "QWORD"; break;
5691 case 12: size
= "XWORD"; break;
5692 case 16: size
= "XMMWORD"; break;
5697 /* Check for explicit size override (codes 'b', 'w' and 'k') */
5700 else if (code
== 'w')
5702 else if (code
== 'k')
5706 fputs (" PTR ", file
);
5710 if (flag_pic
&& CONSTANT_ADDRESS_P (x
))
5711 output_pic_addr_const (file
, x
, code
);
5712 /* Avoid (%rip) for call operands. */
5713 else if (CONSTANT_ADDRESS_P (x
) && code
=='P'
5714 && GET_CODE (x
) != CONST_INT
)
5715 output_addr_const (file
, x
);
5720 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == SFmode
)
5725 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
5726 REAL_VALUE_TO_TARGET_SINGLE (r
, l
);
5728 if (ASSEMBLER_DIALECT
== 0)
5730 fprintf (file
, "0x%lx", l
);
5733 /* These float cases don't actually occur as immediate operands. */
5734 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == DFmode
)
5739 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
5740 REAL_VALUE_TO_DECIMAL (r
, "%.22e", dstr
);
5741 fprintf (file
, "%s", dstr
);
5744 else if (GET_CODE (x
) == CONST_DOUBLE
5745 && (GET_MODE (x
) == XFmode
|| GET_MODE (x
) == TFmode
))
5750 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
5751 REAL_VALUE_TO_DECIMAL (r
, "%.22e", dstr
);
5752 fprintf (file
, "%s", dstr
);
5758 if (GET_CODE (x
) == CONST_INT
|| GET_CODE (x
) == CONST_DOUBLE
)
5760 if (ASSEMBLER_DIALECT
== 0)
5763 else if (GET_CODE (x
) == CONST
|| GET_CODE (x
) == SYMBOL_REF
5764 || GET_CODE (x
) == LABEL_REF
)
5766 if (ASSEMBLER_DIALECT
== 0)
5769 fputs ("OFFSET FLAT:", file
);
5772 if (GET_CODE (x
) == CONST_INT
)
5773 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
5775 output_pic_addr_const (file
, x
, code
);
5777 output_addr_const (file
, x
);
5781 /* Print a memory operand whose address is ADDR. */
5784 print_operand_address (file
, addr
)
5788 struct ix86_address parts
;
5789 rtx base
, index
, disp
;
5792 if (! ix86_decompose_address (addr
, &parts
))
5796 index
= parts
.index
;
5798 scale
= parts
.scale
;
5800 if (!base
&& !index
)
5802 /* Displacement only requires special attention. */
5804 if (GET_CODE (disp
) == CONST_INT
)
5806 if (ASSEMBLER_DIALECT
!= 0)
5808 if (USER_LABEL_PREFIX
[0] == 0)
5810 fputs ("ds:", file
);
5812 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (addr
));
5815 output_pic_addr_const (file
, addr
, 0);
5817 output_addr_const (file
, addr
);
5819 /* Use one byte shorter RIP relative addressing for 64bit mode. */
5820 if (GET_CODE (disp
) != CONST_INT
&& TARGET_64BIT
)
5821 fputs ("(%rip)", file
);
5825 if (ASSEMBLER_DIALECT
== 0)
5830 output_pic_addr_const (file
, disp
, 0);
5831 else if (GET_CODE (disp
) == LABEL_REF
)
5832 output_asm_label (disp
);
5834 output_addr_const (file
, disp
);
5839 PRINT_REG (base
, 0, file
);
5843 PRINT_REG (index
, 0, file
);
5845 fprintf (file
, ",%d", scale
);
5851 rtx offset
= NULL_RTX
;
5855 /* Pull out the offset of a symbol; print any symbol itself. */
5856 if (GET_CODE (disp
) == CONST
5857 && GET_CODE (XEXP (disp
, 0)) == PLUS
5858 && GET_CODE (XEXP (XEXP (disp
, 0), 1)) == CONST_INT
)
5860 offset
= XEXP (XEXP (disp
, 0), 1);
5861 disp
= gen_rtx_CONST (VOIDmode
,
5862 XEXP (XEXP (disp
, 0), 0));
5866 output_pic_addr_const (file
, disp
, 0);
5867 else if (GET_CODE (disp
) == LABEL_REF
)
5868 output_asm_label (disp
);
5869 else if (GET_CODE (disp
) == CONST_INT
)
5872 output_addr_const (file
, disp
);
5878 PRINT_REG (base
, 0, file
);
5881 if (INTVAL (offset
) >= 0)
5883 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
5887 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
5894 PRINT_REG (index
, 0, file
);
5896 fprintf (file
, "*%d", scale
);
5903 /* Split one or more DImode RTL references into pairs of SImode
5904 references. The RTL can be REG, offsettable MEM, integer constant, or
5905 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
5906 split and "num" is its length. lo_half and hi_half are output arrays
5907 that parallel "operands". */
5910 split_di (operands
, num
, lo_half
, hi_half
)
5913 rtx lo_half
[], hi_half
[];
5917 rtx op
= operands
[num
];
5918 if (CONSTANT_P (op
))
5919 split_double (op
, &lo_half
[num
], &hi_half
[num
]);
5920 else if (! reload_completed
)
5922 lo_half
[num
] = gen_lowpart (SImode
, op
);
5923 hi_half
[num
] = gen_highpart (SImode
, op
);
5925 else if (GET_CODE (op
) == REG
)
5929 lo_half
[num
] = gen_rtx_REG (SImode
, REGNO (op
));
5930 hi_half
[num
] = gen_rtx_REG (SImode
, REGNO (op
) + 1);
5932 else if (offsettable_memref_p (op
))
5934 lo_half
[num
] = adjust_address (op
, SImode
, 0);
5935 hi_half
[num
] = adjust_address (op
, SImode
, 4);
5942 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
5943 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
5944 is the expression of the binary operation. The output may either be
5945 emitted here, or returned to the caller, like all output_* functions.
5947 There is no guarantee that the operands are the same mode, as they
5948 might be within FLOAT or FLOAT_EXTEND expressions. */
5950 #ifndef SYSV386_COMPAT
5951 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
5952 wants to fix the assemblers because that causes incompatibility
5953 with gcc. No-one wants to fix gcc because that causes
5954 incompatibility with assemblers... You can use the option of
5955 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
5956 #define SYSV386_COMPAT 1
5960 output_387_binary_op (insn
, operands
)
5964 static char buf
[30];
5967 int is_sse
= SSE_REG_P (operands
[0]) | SSE_REG_P (operands
[1]) | SSE_REG_P (operands
[2]);
5969 #ifdef ENABLE_CHECKING
5970 /* Even if we do not want to check the inputs, this documents input
5971 constraints. Which helps in understanding the following code. */
5972 if (STACK_REG_P (operands
[0])
5973 && ((REG_P (operands
[1])
5974 && REGNO (operands
[0]) == REGNO (operands
[1])
5975 && (STACK_REG_P (operands
[2]) || GET_CODE (operands
[2]) == MEM
))
5976 || (REG_P (operands
[2])
5977 && REGNO (operands
[0]) == REGNO (operands
[2])
5978 && (STACK_REG_P (operands
[1]) || GET_CODE (operands
[1]) == MEM
)))
5979 && (STACK_TOP_P (operands
[1]) || STACK_TOP_P (operands
[2])))
5985 switch (GET_CODE (operands
[3]))
5988 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
5989 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
5997 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
5998 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
6006 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
6007 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
6015 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
6016 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
6030 if (GET_MODE (operands
[0]) == SFmode
)
6031 strcat (buf
, "ss\t{%2, %0|%0, %2}");
6033 strcat (buf
, "sd\t{%2, %0|%0, %2}");
6038 switch (GET_CODE (operands
[3]))
6042 if (REG_P (operands
[2]) && REGNO (operands
[0]) == REGNO (operands
[2]))
6044 rtx temp
= operands
[2];
6045 operands
[2] = operands
[1];
6049 /* know operands[0] == operands[1]. */
6051 if (GET_CODE (operands
[2]) == MEM
)
6057 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
6059 if (STACK_TOP_P (operands
[0]))
6060 /* How is it that we are storing to a dead operand[2]?
6061 Well, presumably operands[1] is dead too. We can't
6062 store the result to st(0) as st(0) gets popped on this
6063 instruction. Instead store to operands[2] (which I
6064 think has to be st(1)). st(1) will be popped later.
6065 gcc <= 2.8.1 didn't have this check and generated
6066 assembly code that the Unixware assembler rejected. */
6067 p
= "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6069 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
6073 if (STACK_TOP_P (operands
[0]))
6074 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
6076 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
6081 if (GET_CODE (operands
[1]) == MEM
)
6087 if (GET_CODE (operands
[2]) == MEM
)
6093 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
6096 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
6097 derived assemblers, confusingly reverse the direction of
6098 the operation for fsub{r} and fdiv{r} when the
6099 destination register is not st(0). The Intel assembler
6100 doesn't have this brain damage. Read !SYSV386_COMPAT to
6101 figure out what the hardware really does. */
6102 if (STACK_TOP_P (operands
[0]))
6103 p
= "{p\t%0, %2|rp\t%2, %0}";
6105 p
= "{rp\t%2, %0|p\t%0, %2}";
6107 if (STACK_TOP_P (operands
[0]))
6108 /* As above for fmul/fadd, we can't store to st(0). */
6109 p
= "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6111 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
6116 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
6119 if (STACK_TOP_P (operands
[0]))
6120 p
= "{rp\t%0, %1|p\t%1, %0}";
6122 p
= "{p\t%1, %0|rp\t%0, %1}";
6124 if (STACK_TOP_P (operands
[0]))
6125 p
= "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
6127 p
= "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
6132 if (STACK_TOP_P (operands
[0]))
6134 if (STACK_TOP_P (operands
[1]))
6135 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
6137 p
= "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
6140 else if (STACK_TOP_P (operands
[1]))
6143 p
= "{\t%1, %0|r\t%0, %1}";
6145 p
= "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
6151 p
= "{r\t%2, %0|\t%0, %2}";
6153 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
6166 /* Output code to initialize control word copies used by
6167 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
6168 is set to control word rounding downwards. */
6170 emit_i387_cw_initialization (normal
, round_down
)
6171 rtx normal
, round_down
;
6173 rtx reg
= gen_reg_rtx (HImode
);
6175 emit_insn (gen_x86_fnstcw_1 (normal
));
6176 emit_move_insn (reg
, normal
);
6177 if (!TARGET_PARTIAL_REG_STALL
&& !optimize_size
6179 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0xc)));
6181 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0xc00)));
6182 emit_move_insn (round_down
, reg
);
6185 /* Output code for INSN to convert a float to a signed int. OPERANDS
6186 are the insn operands. The output may be [HSD]Imode and the input
6187 operand may be [SDX]Fmode. */
6190 output_fix_trunc (insn
, operands
)
6194 int stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
6195 int dimode_p
= GET_MODE (operands
[0]) == DImode
;
6197 /* Jump through a hoop or two for DImode, since the hardware has no
6198 non-popping instruction. We used to do this a different way, but
6199 that was somewhat fragile and broke with post-reload splitters. */
6200 if (dimode_p
&& !stack_top_dies
)
6201 output_asm_insn ("fld\t%y1", operands
);
6203 if (!STACK_TOP_P (operands
[1]))
6206 if (GET_CODE (operands
[0]) != MEM
)
6209 output_asm_insn ("fldcw\t%3", operands
);
6210 if (stack_top_dies
|| dimode_p
)
6211 output_asm_insn ("fistp%z0\t%0", operands
);
6213 output_asm_insn ("fist%z0\t%0", operands
);
6214 output_asm_insn ("fldcw\t%2", operands
);
6219 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
6220 should be used and 2 when fnstsw should be used. UNORDERED_P is true
6221 when fucom should be used. */
6224 output_fp_compare (insn
, operands
, eflags_p
, unordered_p
)
6227 int eflags_p
, unordered_p
;
6230 rtx cmp_op0
= operands
[0];
6231 rtx cmp_op1
= operands
[1];
6232 int is_sse
= SSE_REG_P (operands
[0]) | SSE_REG_P (operands
[1]);
6237 cmp_op1
= operands
[2];
6241 if (GET_MODE (operands
[0]) == SFmode
)
6243 return "ucomiss\t{%1, %0|%0, %1}";
6245 return "comiss\t{%1, %0|%0, %y}";
6248 return "ucomisd\t{%1, %0|%0, %1}";
6250 return "comisd\t{%1, %0|%0, %y}";
6253 if (! STACK_TOP_P (cmp_op0
))
6256 stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
6258 if (STACK_REG_P (cmp_op1
)
6260 && find_regno_note (insn
, REG_DEAD
, REGNO (cmp_op1
))
6261 && REGNO (cmp_op1
) != FIRST_STACK_REG
)
6263 /* If both the top of the 387 stack dies, and the other operand
6264 is also a stack register that dies, then this must be a
6265 `fcompp' float compare */
6269 /* There is no double popping fcomi variant. Fortunately,
6270 eflags is immune from the fstp's cc clobbering. */
6272 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands
);
6274 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands
);
6282 return "fucompp\n\tfnstsw\t%0";
6284 return "fcompp\n\tfnstsw\t%0";
6297 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
6299 static const char * const alt
[24] =
6311 "fcomi\t{%y1, %0|%0, %y1}",
6312 "fcomip\t{%y1, %0|%0, %y1}",
6313 "fucomi\t{%y1, %0|%0, %y1}",
6314 "fucomip\t{%y1, %0|%0, %y1}",
6321 "fcom%z2\t%y2\n\tfnstsw\t%0",
6322 "fcomp%z2\t%y2\n\tfnstsw\t%0",
6323 "fucom%z2\t%y2\n\tfnstsw\t%0",
6324 "fucomp%z2\t%y2\n\tfnstsw\t%0",
6326 "ficom%z2\t%y2\n\tfnstsw\t%0",
6327 "ficomp%z2\t%y2\n\tfnstsw\t%0",
6335 mask
= eflags_p
<< 3;
6336 mask
|= (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
) << 2;
6337 mask
|= unordered_p
<< 1;
6338 mask
|= stack_top_dies
;
6350 /* Output assembler code to FILE to initialize basic-block profiling.
6352 If profile_block_flag == 2
6354 Output code to call the subroutine `__bb_init_trace_func'
6355 and pass two parameters to it. The first parameter is
6356 the address of a block allocated in the object module.
6357 The second parameter is the number of the first basic block
6360 The name of the block is a local symbol made with this statement:
6362 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 0);
6364 Of course, since you are writing the definition of
6365 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
6366 can take a short cut in the definition of this macro and use the
6367 name that you know will result.
6369 The number of the first basic block of the function is
6370 passed to the macro in BLOCK_OR_LABEL.
6372 If described in a virtual assembler language the code to be
6376 parameter2 <- BLOCK_OR_LABEL
6377 call __bb_init_trace_func
6379 else if profile_block_flag != 0
6381 Output code to call the subroutine `__bb_init_func'
6382 and pass one single parameter to it, which is the same
6383 as the first parameter to `__bb_init_trace_func'.
6385 The first word of this parameter is a flag which will be nonzero if
6386 the object module has already been initialized. So test this word
6387 first, and do not call `__bb_init_func' if the flag is nonzero.
6388 Note: When profile_block_flag == 2 the test need not be done
6389 but `__bb_init_trace_func' *must* be called.
6391 BLOCK_OR_LABEL may be used to generate a label number as a
6392 branch destination in case `__bb_init_func' will not be called.
6394 If described in a virtual assembler language the code to be
6405 ix86_output_function_block_profiler (file
, block_or_label
)
6409 static int num_func
= 0;
6411 char block_table
[80], false_label
[80];
6413 ASM_GENERATE_INTERNAL_LABEL (block_table
, "LPBX", 0);
6415 xops
[1] = gen_rtx_SYMBOL_REF (VOIDmode
, block_table
);
6416 xops
[5] = stack_pointer_rtx
;
6417 xops
[7] = gen_rtx_REG (Pmode
, 0); /* eax */
6419 CONSTANT_POOL_ADDRESS_P (xops
[1]) = TRUE
;
6421 switch (profile_block_flag
)
6424 xops
[2] = GEN_INT (block_or_label
);
6425 xops
[3] = gen_rtx_MEM (Pmode
,
6426 gen_rtx_SYMBOL_REF (VOIDmode
, "__bb_init_trace_func"));
6427 xops
[6] = GEN_INT (8);
6429 output_asm_insn ("push{l}\t%2", xops
);
6431 output_asm_insn ("push{l}\t%1", xops
);
6434 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a1}", xops
);
6435 output_asm_insn ("push{l}\t%7", xops
);
6437 output_asm_insn ("call\t%P3", xops
);
6438 output_asm_insn ("add{l}\t{%6, %5|%5, %6}", xops
);
6442 ASM_GENERATE_INTERNAL_LABEL (false_label
, "LPBZ", num_func
);
6444 xops
[0] = const0_rtx
;
6445 xops
[2] = gen_rtx_MEM (Pmode
,
6446 gen_rtx_SYMBOL_REF (VOIDmode
, false_label
));
6447 xops
[3] = gen_rtx_MEM (Pmode
,
6448 gen_rtx_SYMBOL_REF (VOIDmode
, "__bb_init_func"));
6449 xops
[4] = gen_rtx_MEM (Pmode
, xops
[1]);
6450 xops
[6] = GEN_INT (4);
6452 CONSTANT_POOL_ADDRESS_P (xops
[2]) = TRUE
;
6454 output_asm_insn ("cmp{l}\t{%0, %4|%4, %0}", xops
);
6455 output_asm_insn ("jne\t%2", xops
);
6458 output_asm_insn ("push{l}\t%1", xops
);
6461 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a2}", xops
);
6462 output_asm_insn ("push{l}\t%7", xops
);
6464 output_asm_insn ("call\t%P3", xops
);
6465 output_asm_insn ("add{l}\t{%6, %5|%5, %6}", xops
);
6466 ASM_OUTPUT_INTERNAL_LABEL (file
, "LPBZ", num_func
);
6472 /* Output assembler code to FILE to increment a counter associated
6473 with basic block number BLOCKNO.
6475 If profile_block_flag == 2
6477 Output code to initialize the global structure `__bb' and
6478 call the function `__bb_trace_func' which will increment the
6481 `__bb' consists of two words. In the first word the number
6482 of the basic block has to be stored. In the second word
6483 the address of a block allocated in the object module
6486 The basic block number is given by BLOCKNO.
6488 The address of the block is given by the label created with
6490 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 0);
6492 by FUNCTION_BLOCK_PROFILER.
6494 Of course, since you are writing the definition of
6495 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
6496 can take a short cut in the definition of this macro and use the
6497 name that you know will result.
6499 If described in a virtual assembler language the code to be
6502 move BLOCKNO -> (__bb)
6503 move LPBX0 -> (__bb+4)
6504 call __bb_trace_func
6506 Note that function `__bb_trace_func' must not change the
6507 machine state, especially the flag register. To grant
6508 this, you must output code to save and restore registers
6509 either in this macro or in the macros MACHINE_STATE_SAVE
6510 and MACHINE_STATE_RESTORE. The last two macros will be
6511 used in the function `__bb_trace_func', so you must make
6512 sure that the function prologue does not change any
6513 register prior to saving it with MACHINE_STATE_SAVE.
6515 else if profile_block_flag != 0
6517 Output code to increment the counter directly.
6518 Basic blocks are numbered separately from zero within each
6519 compiled object module. The count associated with block number
6520 BLOCKNO is at index BLOCKNO in an array of words; the name of
6521 this array is a local symbol made with this statement:
6523 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 2);
6525 Of course, since you are writing the definition of
6526 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
6527 can take a short cut in the definition of this macro and use the
6528 name that you know will result.
6530 If described in a virtual assembler language the code to be
6533 inc (LPBX2+4*BLOCKNO)
6537 ix86_output_block_profiler (file
, blockno
)
6538 FILE *file ATTRIBUTE_UNUSED
;
6541 rtx xops
[8], cnt_rtx
;
6543 char *block_table
= counts
;
6545 switch (profile_block_flag
)
6548 ASM_GENERATE_INTERNAL_LABEL (block_table
, "LPBX", 0);
6550 xops
[1] = gen_rtx_SYMBOL_REF (VOIDmode
, block_table
);
6551 xops
[2] = GEN_INT (blockno
);
6552 xops
[3] = gen_rtx_MEM (Pmode
,
6553 gen_rtx_SYMBOL_REF (VOIDmode
, "__bb_trace_func"));
6554 xops
[4] = gen_rtx_SYMBOL_REF (VOIDmode
, "__bb");
6555 xops
[5] = plus_constant (xops
[4], 4);
6556 xops
[0] = gen_rtx_MEM (SImode
, xops
[4]);
6557 xops
[6] = gen_rtx_MEM (SImode
, xops
[5]);
6559 CONSTANT_POOL_ADDRESS_P (xops
[1]) = TRUE
;
6561 output_asm_insn ("pushf", xops
);
6562 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops
);
6565 xops
[7] = gen_rtx_REG (Pmode
, 0); /* eax */
6566 output_asm_insn ("push{l}\t%7", xops
);
6567 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a1}", xops
);
6568 output_asm_insn ("mov{l}\t{%7, %6|%6, %7}", xops
);
6569 output_asm_insn ("pop{l}\t%7", xops
);
6572 output_asm_insn ("mov{l}\t{%1, %6|%6, %1}", xops
);
6573 output_asm_insn ("call\t%P3", xops
);
6574 output_asm_insn ("popf", xops
);
6579 ASM_GENERATE_INTERNAL_LABEL (counts
, "LPBX", 2);
6580 cnt_rtx
= gen_rtx_SYMBOL_REF (VOIDmode
, counts
);
6581 SYMBOL_REF_FLAG (cnt_rtx
) = TRUE
;
6584 cnt_rtx
= plus_constant (cnt_rtx
, blockno
*4);
6587 cnt_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, cnt_rtx
);
6589 xops
[0] = gen_rtx_MEM (SImode
, cnt_rtx
);
6590 output_asm_insn ("inc{l}\t%0", xops
);
6597 ix86_expand_move (mode
, operands
)
6598 enum machine_mode mode
;
6601 int strict
= (reload_in_progress
|| reload_completed
);
6604 if (flag_pic
&& mode
== Pmode
&& symbolic_operand (operands
[1], Pmode
))
6606 /* Emit insns to move operands[1] into operands[0]. */
6608 if (GET_CODE (operands
[0]) == MEM
)
6609 operands
[1] = force_reg (Pmode
, operands
[1]);
6612 rtx temp
= operands
[0];
6613 if (GET_CODE (temp
) != REG
)
6614 temp
= gen_reg_rtx (Pmode
);
6615 temp
= legitimize_pic_address (operands
[1], temp
);
6616 if (temp
== operands
[0])
6623 if (GET_CODE (operands
[0]) == MEM
6624 && (GET_MODE (operands
[0]) == QImode
6625 || !push_operand (operands
[0], mode
))
6626 && GET_CODE (operands
[1]) == MEM
)
6627 operands
[1] = force_reg (mode
, operands
[1]);
6629 if (push_operand (operands
[0], mode
)
6630 && ! general_no_elim_operand (operands
[1], mode
))
6631 operands
[1] = copy_to_mode_reg (mode
, operands
[1]);
6633 if (FLOAT_MODE_P (mode
))
6635 /* If we are loading a floating point constant to a register,
6636 force the value to memory now, since we'll get better code
6637 out the back end. */
6641 else if (GET_CODE (operands
[1]) == CONST_DOUBLE
6642 && register_operand (operands
[0], mode
))
6643 operands
[1] = validize_mem (force_const_mem (mode
, operands
[1]));
6647 insn
= gen_rtx_SET (VOIDmode
, operands
[0], operands
[1]);
6652 /* Attempt to expand a binary operator. Make the expansion closer to the
6653 actual machine, then just general_operand, which will allow 3 separate
6654 memory references (one output, two input) in a single insn. */
6657 ix86_expand_binary_operator (code
, mode
, operands
)
6659 enum machine_mode mode
;
6662 int matching_memory
;
6663 rtx src1
, src2
, dst
, op
, clob
;
6669 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
6670 if (GET_RTX_CLASS (code
) == 'c'
6671 && (rtx_equal_p (dst
, src2
)
6672 || immediate_operand (src1
, mode
)))
6679 /* If the destination is memory, and we do not have matching source
6680 operands, do things in registers. */
6681 matching_memory
= 0;
6682 if (GET_CODE (dst
) == MEM
)
6684 if (rtx_equal_p (dst
, src1
))
6685 matching_memory
= 1;
6686 else if (GET_RTX_CLASS (code
) == 'c'
6687 && rtx_equal_p (dst
, src2
))
6688 matching_memory
= 2;
6690 dst
= gen_reg_rtx (mode
);
6693 /* Both source operands cannot be in memory. */
6694 if (GET_CODE (src1
) == MEM
&& GET_CODE (src2
) == MEM
)
6696 if (matching_memory
!= 2)
6697 src2
= force_reg (mode
, src2
);
6699 src1
= force_reg (mode
, src1
);
6702 /* If the operation is not commutable, source 1 cannot be a constant
6703 or non-matching memory. */
6704 if ((CONSTANT_P (src1
)
6705 || (!matching_memory
&& GET_CODE (src1
) == MEM
))
6706 && GET_RTX_CLASS (code
) != 'c')
6707 src1
= force_reg (mode
, src1
);
6709 /* If optimizing, copy to regs to improve CSE */
6710 if (optimize
&& ! no_new_pseudos
)
6712 if (GET_CODE (dst
) == MEM
)
6713 dst
= gen_reg_rtx (mode
);
6714 if (GET_CODE (src1
) == MEM
)
6715 src1
= force_reg (mode
, src1
);
6716 if (GET_CODE (src2
) == MEM
)
6717 src2
= force_reg (mode
, src2
);
6720 /* Emit the instruction. */
6722 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_ee (code
, mode
, src1
, src2
));
6723 if (reload_in_progress
)
6725 /* Reload doesn't know about the flags register, and doesn't know that
6726 it doesn't want to clobber it. We can only do this with PLUS. */
6733 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
6734 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
6737 /* Fix up the destination if needed. */
6738 if (dst
!= operands
[0])
6739 emit_move_insn (operands
[0], dst
);
6742 /* Return TRUE or FALSE depending on whether the binary operator meets the
6743 appropriate constraints. */
6746 ix86_binary_operator_ok (code
, mode
, operands
)
6748 enum machine_mode mode ATTRIBUTE_UNUSED
;
6751 /* Both source operands cannot be in memory. */
6752 if (GET_CODE (operands
[1]) == MEM
&& GET_CODE (operands
[2]) == MEM
)
6754 /* If the operation is not commutable, source 1 cannot be a constant. */
6755 if (CONSTANT_P (operands
[1]) && GET_RTX_CLASS (code
) != 'c')
6757 /* If the destination is memory, we must have a matching source operand. */
6758 if (GET_CODE (operands
[0]) == MEM
6759 && ! (rtx_equal_p (operands
[0], operands
[1])
6760 || (GET_RTX_CLASS (code
) == 'c'
6761 && rtx_equal_p (operands
[0], operands
[2]))))
6763 /* If the operation is not commutable and the source 1 is memory, we must
6764 have a matching destionation. */
6765 if (GET_CODE (operands
[1]) == MEM
6766 && GET_RTX_CLASS (code
) != 'c'
6767 && ! rtx_equal_p (operands
[0], operands
[1]))
6772 /* Attempt to expand a unary operator. Make the expansion closer to the
6773 actual machine, then just general_operand, which will allow 2 separate
6774 memory references (one output, one input) in a single insn. */
6777 ix86_expand_unary_operator (code
, mode
, operands
)
6779 enum machine_mode mode
;
6782 int matching_memory
;
6783 rtx src
, dst
, op
, clob
;
6788 /* If the destination is memory, and we do not have matching source
6789 operands, do things in registers. */
6790 matching_memory
= 0;
6791 if (GET_CODE (dst
) == MEM
)
6793 if (rtx_equal_p (dst
, src
))
6794 matching_memory
= 1;
6796 dst
= gen_reg_rtx (mode
);
6799 /* When source operand is memory, destination must match. */
6800 if (!matching_memory
&& GET_CODE (src
) == MEM
)
6801 src
= force_reg (mode
, src
);
6803 /* If optimizing, copy to regs to improve CSE */
6804 if (optimize
&& ! no_new_pseudos
)
6806 if (GET_CODE (dst
) == MEM
)
6807 dst
= gen_reg_rtx (mode
);
6808 if (GET_CODE (src
) == MEM
)
6809 src
= force_reg (mode
, src
);
6812 /* Emit the instruction. */
6814 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_e (code
, mode
, src
));
6815 if (reload_in_progress
|| code
== NOT
)
6817 /* Reload doesn't know about the flags register, and doesn't know that
6818 it doesn't want to clobber it. */
6825 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
6826 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
6829 /* Fix up the destination if needed. */
6830 if (dst
!= operands
[0])
6831 emit_move_insn (operands
[0], dst
);
6834 /* Return TRUE or FALSE depending on whether the unary operator meets the
6835 appropriate constraints. */
6838 ix86_unary_operator_ok (code
, mode
, operands
)
6839 enum rtx_code code ATTRIBUTE_UNUSED
;
6840 enum machine_mode mode ATTRIBUTE_UNUSED
;
6841 rtx operands
[2] ATTRIBUTE_UNUSED
;
6843 /* If one of operands is memory, source and destination must match. */
6844 if ((GET_CODE (operands
[0]) == MEM
6845 || GET_CODE (operands
[1]) == MEM
)
6846 && ! rtx_equal_p (operands
[0], operands
[1]))
6851 /* Return TRUE or FALSE depending on whether the first SET in INSN
6852 has source and destination with matching CC modes, and that the
6853 CC mode is at least as constrained as REQ_MODE. */
6856 ix86_match_ccmode (insn
, req_mode
)
6858 enum machine_mode req_mode
;
6861 enum machine_mode set_mode
;
6863 set
= PATTERN (insn
);
6864 if (GET_CODE (set
) == PARALLEL
)
6865 set
= XVECEXP (set
, 0, 0);
6866 if (GET_CODE (set
) != SET
)
6868 if (GET_CODE (SET_SRC (set
)) != COMPARE
)
6871 set_mode
= GET_MODE (SET_DEST (set
));
6875 if (req_mode
!= CCNOmode
6876 && (req_mode
!= CCmode
6877 || XEXP (SET_SRC (set
), 1) != const0_rtx
))
6881 if (req_mode
== CCGCmode
)
6885 if (req_mode
== CCGOCmode
|| req_mode
== CCNOmode
)
6889 if (req_mode
== CCZmode
)
6899 return (GET_MODE (SET_SRC (set
)) == set_mode
);
6902 /* Generate insn patterns to do an integer compare of OPERANDS. */
6905 ix86_expand_int_compare (code
, op0
, op1
)
6909 enum machine_mode cmpmode
;
6912 cmpmode
= SELECT_CC_MODE (code
, op0
, op1
);
6913 flags
= gen_rtx_REG (cmpmode
, FLAGS_REG
);
6915 /* This is very simple, but making the interface the same as in the
6916 FP case makes the rest of the code easier. */
6917 tmp
= gen_rtx_COMPARE (cmpmode
, op0
, op1
);
6918 emit_insn (gen_rtx_SET (VOIDmode
, flags
, tmp
));
6920 /* Return the test that should be put into the flags user, i.e.
6921 the bcc, scc, or cmov instruction. */
6922 return gen_rtx_fmt_ee (code
, VOIDmode
, flags
, const0_rtx
);
6925 /* Figure out whether to use ordered or unordered fp comparisons.
6926 Return the appropriate mode to use. */
6929 ix86_fp_compare_mode (code
)
6930 enum rtx_code code ATTRIBUTE_UNUSED
;
6932 /* ??? In order to make all comparisons reversible, we do all comparisons
6933 non-trapping when compiling for IEEE. Once gcc is able to distinguish
6934 all forms trapping and nontrapping comparisons, we can make inequality
6935 comparisons trapping again, since it results in better code when using
6936 FCOM based compares. */
6937 return TARGET_IEEE_FP
? CCFPUmode
: CCFPmode
;
6941 ix86_cc_mode (code
, op0
, op1
)
6945 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_FLOAT
)
6946 return ix86_fp_compare_mode (code
);
6949 /* Only zero flag is needed. */
6951 case NE
: /* ZF!=0 */
6953 /* Codes needing carry flag. */
6954 case GEU
: /* CF=0 */
6955 case GTU
: /* CF=0 & ZF=0 */
6956 case LTU
: /* CF=1 */
6957 case LEU
: /* CF=1 | ZF=1 */
6959 /* Codes possibly doable only with sign flag when
6960 comparing against zero. */
6961 case GE
: /* SF=OF or SF=0 */
6962 case LT
: /* SF<>OF or SF=1 */
6963 if (op1
== const0_rtx
)
6966 /* For other cases Carry flag is not required. */
6968 /* Codes doable only with sign flag when comparing
6969 against zero, but we miss jump instruction for it
6970 so we need to use relational tests agains overflow
6971 that thus needs to be zero. */
6972 case GT
: /* ZF=0 & SF=OF */
6973 case LE
: /* ZF=1 | SF<>OF */
6974 if (op1
== const0_rtx
)
6983 /* Return true if we should use an FCOMI instruction for this fp comparison. */
6986 ix86_use_fcomi_compare (code
)
6987 enum rtx_code code ATTRIBUTE_UNUSED
;
6989 enum rtx_code swapped_code
= swap_condition (code
);
6990 return ((ix86_fp_comparison_cost (code
) == ix86_fp_comparison_fcomi_cost (code
))
6991 || (ix86_fp_comparison_cost (swapped_code
)
6992 == ix86_fp_comparison_fcomi_cost (swapped_code
)));
6995 /* Swap, force into registers, or otherwise massage the two operands
6996 to a fp comparison. The operands are updated in place; the new
6997 comparsion code is returned. */
6999 static enum rtx_code
7000 ix86_prepare_fp_compare_args (code
, pop0
, pop1
)
7004 enum machine_mode fpcmp_mode
= ix86_fp_compare_mode (code
);
7005 rtx op0
= *pop0
, op1
= *pop1
;
7006 enum machine_mode op_mode
= GET_MODE (op0
);
7007 int is_sse
= SSE_REG_P (op0
) | SSE_REG_P (op1
);
7009 /* All of the unordered compare instructions only work on registers.
7010 The same is true of the XFmode compare instructions. The same is
7011 true of the fcomi compare instructions. */
7014 && (fpcmp_mode
== CCFPUmode
7015 || op_mode
== XFmode
7016 || op_mode
== TFmode
7017 || ix86_use_fcomi_compare (code
)))
7019 op0
= force_reg (op_mode
, op0
);
7020 op1
= force_reg (op_mode
, op1
);
7024 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
7025 things around if they appear profitable, otherwise force op0
7028 if (standard_80387_constant_p (op0
) == 0
7029 || (GET_CODE (op0
) == MEM
7030 && ! (standard_80387_constant_p (op1
) == 0
7031 || GET_CODE (op1
) == MEM
)))
7034 tmp
= op0
, op0
= op1
, op1
= tmp
;
7035 code
= swap_condition (code
);
7038 if (GET_CODE (op0
) != REG
)
7039 op0
= force_reg (op_mode
, op0
);
7041 if (CONSTANT_P (op1
))
7043 if (standard_80387_constant_p (op1
))
7044 op1
= force_reg (op_mode
, op1
);
7046 op1
= validize_mem (force_const_mem (op_mode
, op1
));
7050 /* Try to rearrange the comparison to make it cheaper. */
7051 if (ix86_fp_comparison_cost (code
)
7052 > ix86_fp_comparison_cost (swap_condition (code
))
7053 && (GET_CODE (op0
) == REG
|| !reload_completed
))
7056 tmp
= op0
, op0
= op1
, op1
= tmp
;
7057 code
= swap_condition (code
);
7058 if (GET_CODE (op0
) != REG
)
7059 op0
= force_reg (op_mode
, op0
);
7067 /* Convert comparison codes we use to represent FP comparison to integer
7068 code that will result in proper branch. Return UNKNOWN if no such code
7070 static enum rtx_code
7071 ix86_fp_compare_code_to_integer (code
)
7101 /* Split comparison code CODE into comparisons we can do using branch
7102 instructions. BYPASS_CODE is comparison code for branch that will
7103 branch around FIRST_CODE and SECOND_CODE. If some of branches
7104 is not required, set value to NIL.
7105 We never require more than two branches. */
7107 ix86_fp_comparison_codes (code
, bypass_code
, first_code
, second_code
)
7108 enum rtx_code code
, *bypass_code
, *first_code
, *second_code
;
7114 /* The fcomi comparison sets flags as follows:
7124 case GT
: /* GTU - CF=0 & ZF=0 */
7125 case GE
: /* GEU - CF=0 */
7126 case ORDERED
: /* PF=0 */
7127 case UNORDERED
: /* PF=1 */
7128 case UNEQ
: /* EQ - ZF=1 */
7129 case UNLT
: /* LTU - CF=1 */
7130 case UNLE
: /* LEU - CF=1 | ZF=1 */
7131 case LTGT
: /* EQ - ZF=0 */
7133 case LT
: /* LTU - CF=1 - fails on unordered */
7135 *bypass_code
= UNORDERED
;
7137 case LE
: /* LEU - CF=1 | ZF=1 - fails on unordered */
7139 *bypass_code
= UNORDERED
;
7141 case EQ
: /* EQ - ZF=1 - fails on unordered */
7143 *bypass_code
= UNORDERED
;
7145 case NE
: /* NE - ZF=0 - fails on unordered */
7147 *second_code
= UNORDERED
;
7149 case UNGE
: /* GEU - CF=0 - fails on unordered */
7151 *second_code
= UNORDERED
;
7153 case UNGT
: /* GTU - CF=0 & ZF=0 - fails on unordered */
7155 *second_code
= UNORDERED
;
7160 if (!TARGET_IEEE_FP
)
7167 /* Return cost of comparison done fcom + arithmetics operations on AX.
7168 All following functions do use number of instructions as an cost metrics.
7169 In future this should be tweaked to compute bytes for optimize_size and
7170 take into account performance of various instructions on various CPUs. */
7172 ix86_fp_comparison_arithmetics_cost (code
)
7175 if (!TARGET_IEEE_FP
)
7177 /* The cost of code output by ix86_expand_fp_compare. */
7205 /* Return cost of comparison done using fcomi operation.
7206 See ix86_fp_comparison_arithmetics_cost for the metrics. */
7208 ix86_fp_comparison_fcomi_cost (code
)
7211 enum rtx_code bypass_code
, first_code
, second_code
;
7212 /* Return arbitarily high cost when instruction is not supported - this
7213 prevents gcc from using it. */
7216 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
7217 return (bypass_code
!= NIL
|| second_code
!= NIL
) + 2;
7220 /* Return cost of comparison done using sahf operation.
7221 See ix86_fp_comparison_arithmetics_cost for the metrics. */
7223 ix86_fp_comparison_sahf_cost (code
)
7226 enum rtx_code bypass_code
, first_code
, second_code
;
7227 /* Return arbitarily high cost when instruction is not preferred - this
7228 avoids gcc from using it. */
7229 if (!TARGET_USE_SAHF
&& !optimize_size
)
7231 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
7232 return (bypass_code
!= NIL
|| second_code
!= NIL
) + 3;
7235 /* Compute cost of the comparison done using any method.
7236 See ix86_fp_comparison_arithmetics_cost for the metrics. */
7238 ix86_fp_comparison_cost (code
)
7241 int fcomi_cost
, sahf_cost
, arithmetics_cost
= 1024;
7244 fcomi_cost
= ix86_fp_comparison_fcomi_cost (code
);
7245 sahf_cost
= ix86_fp_comparison_sahf_cost (code
);
7247 min
= arithmetics_cost
= ix86_fp_comparison_arithmetics_cost (code
);
7248 if (min
> sahf_cost
)
7250 if (min
> fcomi_cost
)
7255 /* Generate insn patterns to do a floating point compare of OPERANDS. */
7258 ix86_expand_fp_compare (code
, op0
, op1
, scratch
, second_test
, bypass_test
)
7260 rtx op0
, op1
, scratch
;
7264 enum machine_mode fpcmp_mode
, intcmp_mode
;
7266 int cost
= ix86_fp_comparison_cost (code
);
7267 enum rtx_code bypass_code
, first_code
, second_code
;
7269 fpcmp_mode
= ix86_fp_compare_mode (code
);
7270 code
= ix86_prepare_fp_compare_args (code
, &op0
, &op1
);
7273 *second_test
= NULL_RTX
;
7275 *bypass_test
= NULL_RTX
;
7277 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
7279 /* Do fcomi/sahf based test when profitable. */
7280 if ((bypass_code
== NIL
|| bypass_test
)
7281 && (second_code
== NIL
|| second_test
)
7282 && ix86_fp_comparison_arithmetics_cost (code
) > cost
)
7286 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
7287 tmp
= gen_rtx_SET (VOIDmode
, gen_rtx_REG (fpcmp_mode
, FLAGS_REG
),
7293 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
7294 tmp2
= gen_rtx_UNSPEC (HImode
, gen_rtvec (1, tmp
), 9);
7296 scratch
= gen_reg_rtx (HImode
);
7297 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, tmp2
));
7298 emit_insn (gen_x86_sahf_1 (scratch
));
7301 /* The FP codes work out to act like unsigned. */
7302 intcmp_mode
= fpcmp_mode
;
7304 if (bypass_code
!= NIL
)
7305 *bypass_test
= gen_rtx_fmt_ee (bypass_code
, VOIDmode
,
7306 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
7308 if (second_code
!= NIL
)
7309 *second_test
= gen_rtx_fmt_ee (second_code
, VOIDmode
,
7310 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
7315 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
7316 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
7317 tmp2
= gen_rtx_UNSPEC (HImode
, gen_rtvec (1, tmp
), 9);
7319 scratch
= gen_reg_rtx (HImode
);
7320 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, tmp2
));
7322 /* In the unordered case, we have to check C2 for NaN's, which
7323 doesn't happen to work out to anything nice combination-wise.
7324 So do some bit twiddling on the value we've got in AH to come
7325 up with an appropriate set of condition codes. */
7327 intcmp_mode
= CCNOmode
;
7332 if (code
== GT
|| !TARGET_IEEE_FP
)
7334 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
7339 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
7340 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
7341 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x44)));
7342 intcmp_mode
= CCmode
;
7348 if (code
== LT
&& TARGET_IEEE_FP
)
7350 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
7351 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x01)));
7352 intcmp_mode
= CCmode
;
7357 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x01)));
7363 if (code
== GE
|| !TARGET_IEEE_FP
)
7365 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x05)));
7370 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
7371 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
,
7378 if (code
== LE
&& TARGET_IEEE_FP
)
7380 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
7381 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
7382 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
7383 intcmp_mode
= CCmode
;
7388 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
7394 if (code
== EQ
&& TARGET_IEEE_FP
)
7396 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
7397 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
7398 intcmp_mode
= CCmode
;
7403 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
7410 if (code
== NE
&& TARGET_IEEE_FP
)
7412 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
7413 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
,
7419 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
7425 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
7429 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
7438 /* Return the test that should be put into the flags user, i.e.
7439 the bcc, scc, or cmov instruction. */
7440 return gen_rtx_fmt_ee (code
, VOIDmode
,
7441 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
7446 ix86_expand_compare (code
, second_test
, bypass_test
)
7448 rtx
*second_test
, *bypass_test
;
7451 op0
= ix86_compare_op0
;
7452 op1
= ix86_compare_op1
;
7455 *second_test
= NULL_RTX
;
7457 *bypass_test
= NULL_RTX
;
7459 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_FLOAT
)
7460 ret
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
,
7461 second_test
, bypass_test
);
7463 ret
= ix86_expand_int_compare (code
, op0
, op1
);
7468 /* Return true if the CODE will result in nontrivial jump sequence. */
7470 ix86_fp_jump_nontrivial_p (code
)
7473 enum rtx_code bypass_code
, first_code
, second_code
;
7476 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
7477 return bypass_code
!= NIL
|| second_code
!= NIL
;
7481 ix86_expand_branch (code
, label
)
7487 switch (GET_MODE (ix86_compare_op0
))
7493 tmp
= ix86_expand_compare (code
, NULL
, NULL
);
7494 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
7495 gen_rtx_LABEL_REF (VOIDmode
, label
),
7497 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
7507 enum rtx_code bypass_code
, first_code
, second_code
;
7509 code
= ix86_prepare_fp_compare_args (code
, &ix86_compare_op0
,
7512 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
7514 /* Check whether we will use the natural sequence with one jump. If
7515 so, we can expand jump early. Otherwise delay expansion by
7516 creating compound insn to not confuse optimizers. */
7517 if (bypass_code
== NIL
&& second_code
== NIL
7520 ix86_split_fp_branch (code
, ix86_compare_op0
, ix86_compare_op1
,
7521 gen_rtx_LABEL_REF (VOIDmode
, label
),
7526 tmp
= gen_rtx_fmt_ee (code
, VOIDmode
,
7527 ix86_compare_op0
, ix86_compare_op1
);
7528 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
7529 gen_rtx_LABEL_REF (VOIDmode
, label
),
7531 tmp
= gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
);
7533 use_fcomi
= ix86_use_fcomi_compare (code
);
7534 vec
= rtvec_alloc (3 + !use_fcomi
);
7535 RTVEC_ELT (vec
, 0) = tmp
;
7537 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCFPmode
, 18));
7539 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCFPmode
, 17));
7542 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (HImode
));
7544 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode
, vec
));
7552 /* Expand DImode branch into multiple compare+branch. */
7554 rtx lo
[2], hi
[2], label2
;
7555 enum rtx_code code1
, code2
, code3
;
7557 if (CONSTANT_P (ix86_compare_op0
) && ! CONSTANT_P (ix86_compare_op1
))
7559 tmp
= ix86_compare_op0
;
7560 ix86_compare_op0
= ix86_compare_op1
;
7561 ix86_compare_op1
= tmp
;
7562 code
= swap_condition (code
);
7564 split_di (&ix86_compare_op0
, 1, lo
+0, hi
+0);
7565 split_di (&ix86_compare_op1
, 1, lo
+1, hi
+1);
7567 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
7568 avoid two branches. This costs one extra insn, so disable when
7569 optimizing for size. */
7571 if ((code
== EQ
|| code
== NE
)
7573 || hi
[1] == const0_rtx
|| lo
[1] == const0_rtx
))
7578 if (hi
[1] != const0_rtx
)
7579 xor1
= expand_binop (SImode
, xor_optab
, xor1
, hi
[1],
7580 NULL_RTX
, 0, OPTAB_WIDEN
);
7583 if (lo
[1] != const0_rtx
)
7584 xor0
= expand_binop (SImode
, xor_optab
, xor0
, lo
[1],
7585 NULL_RTX
, 0, OPTAB_WIDEN
);
7587 tmp
= expand_binop (SImode
, ior_optab
, xor1
, xor0
,
7588 NULL_RTX
, 0, OPTAB_WIDEN
);
7590 ix86_compare_op0
= tmp
;
7591 ix86_compare_op1
= const0_rtx
;
7592 ix86_expand_branch (code
, label
);
7596 /* Otherwise, if we are doing less-than or greater-or-equal-than,
7597 op1 is a constant and the low word is zero, then we can just
7598 examine the high word. */
7600 if (GET_CODE (hi
[1]) == CONST_INT
&& lo
[1] == const0_rtx
)
7603 case LT
: case LTU
: case GE
: case GEU
:
7604 ix86_compare_op0
= hi
[0];
7605 ix86_compare_op1
= hi
[1];
7606 ix86_expand_branch (code
, label
);
7612 /* Otherwise, we need two or three jumps. */
7614 label2
= gen_label_rtx ();
7617 code2
= swap_condition (code
);
7618 code3
= unsigned_condition (code
);
7622 case LT
: case GT
: case LTU
: case GTU
:
7625 case LE
: code1
= LT
; code2
= GT
; break;
7626 case GE
: code1
= GT
; code2
= LT
; break;
7627 case LEU
: code1
= LTU
; code2
= GTU
; break;
7628 case GEU
: code1
= GTU
; code2
= LTU
; break;
7630 case EQ
: code1
= NIL
; code2
= NE
; break;
7631 case NE
: code2
= NIL
; break;
7639 * if (hi(a) < hi(b)) goto true;
7640 * if (hi(a) > hi(b)) goto false;
7641 * if (lo(a) < lo(b)) goto true;
7645 ix86_compare_op0
= hi
[0];
7646 ix86_compare_op1
= hi
[1];
7649 ix86_expand_branch (code1
, label
);
7651 ix86_expand_branch (code2
, label2
);
7653 ix86_compare_op0
= lo
[0];
7654 ix86_compare_op1
= lo
[1];
7655 ix86_expand_branch (code3
, label
);
7658 emit_label (label2
);
7667 /* Split branch based on floating point condition. */
7669 ix86_split_fp_branch (code
, op1
, op2
, target1
, target2
, tmp
)
7671 rtx op1
, op2
, target1
, target2
, tmp
;
7674 rtx label
= NULL_RTX
;
7676 int bypass_probability
= -1, second_probability
= -1, probability
= -1;
7679 if (target2
!= pc_rtx
)
7682 code
= reverse_condition_maybe_unordered (code
);
7687 condition
= ix86_expand_fp_compare (code
, op1
, op2
,
7688 tmp
, &second
, &bypass
);
7690 if (split_branch_probability
>= 0)
7692 /* Distribute the probabilities across the jumps.
7693 Assume the BYPASS and SECOND to be always test
7695 probability
= split_branch_probability
;
7697 /* Value of 1 is low enought to make no need for probability
7698 to be updated. Later we may run some experiments and see
7699 if unordered values are more frequent in practice. */
7701 bypass_probability
= 1;
7703 second_probability
= 1;
7705 if (bypass
!= NULL_RTX
)
7707 label
= gen_label_rtx ();
7708 i
= emit_jump_insn (gen_rtx_SET
7710 gen_rtx_IF_THEN_ELSE (VOIDmode
,
7712 gen_rtx_LABEL_REF (VOIDmode
,
7715 if (bypass_probability
>= 0)
7717 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
7718 GEN_INT (bypass_probability
),
7721 i
= emit_jump_insn (gen_rtx_SET
7723 gen_rtx_IF_THEN_ELSE (VOIDmode
,
7724 condition
, target1
, target2
)));
7725 if (probability
>= 0)
7727 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
7728 GEN_INT (probability
),
7730 if (second
!= NULL_RTX
)
7732 i
= emit_jump_insn (gen_rtx_SET
7734 gen_rtx_IF_THEN_ELSE (VOIDmode
, second
, target1
,
7736 if (second_probability
>= 0)
7738 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
7739 GEN_INT (second_probability
),
7742 if (label
!= NULL_RTX
)
7747 ix86_expand_setcc (code
, dest
)
7751 rtx ret
, tmp
, tmpreg
;
7752 rtx second_test
, bypass_test
;
7755 if (GET_MODE (ix86_compare_op0
) == DImode
7757 return 0; /* FAIL */
7759 /* Three modes of generation:
7760 0 -- destination does not overlap compare sources:
7761 clear dest first, emit strict_low_part setcc.
7762 1 -- destination does overlap compare sources:
7763 emit subreg setcc, zero extend.
7764 2 -- destination is in QImode:
7767 We don't use mode 0 early in compilation because it confuses CSE.
7768 There are peepholes to turn mode 1 into mode 0 if things work out
7769 nicely after reload. */
7771 type
= cse_not_expected
? 0 : 1;
7773 if (GET_MODE (dest
) == QImode
)
7775 else if (reg_overlap_mentioned_p (dest
, ix86_compare_op0
)
7776 || reg_overlap_mentioned_p (dest
, ix86_compare_op1
))
7780 emit_move_insn (dest
, const0_rtx
);
7782 ret
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
7783 PUT_MODE (ret
, QImode
);
7789 tmp
= gen_lowpart (QImode
, dest
);
7791 tmp
= gen_rtx_STRICT_LOW_PART (VOIDmode
, tmp
);
7795 if (!cse_not_expected
)
7796 tmp
= gen_reg_rtx (QImode
);
7798 tmp
= gen_lowpart (QImode
, dest
);
7802 emit_insn (gen_rtx_SET (VOIDmode
, tmp
, ret
));
7803 if (bypass_test
|| second_test
)
7805 rtx test
= second_test
;
7807 rtx tmp2
= gen_reg_rtx (QImode
);
7814 PUT_CODE (test
, reverse_condition_maybe_unordered (GET_CODE (test
)));
7816 PUT_MODE (test
, QImode
);
7817 emit_insn (gen_rtx_SET (VOIDmode
, tmp2
, test
));
7820 emit_insn (gen_andqi3 (tmp
, tmpreg
, tmp2
));
7822 emit_insn (gen_iorqi3 (tmp
, tmpreg
, tmp2
));
7829 tmp
= gen_rtx_ZERO_EXTEND (GET_MODE (dest
), tmp
);
7830 tmp
= gen_rtx_SET (VOIDmode
, dest
, tmp
);
7831 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
7832 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, clob
));
7836 return 1; /* DONE */
7840 ix86_expand_int_movcc (operands
)
7843 enum rtx_code code
= GET_CODE (operands
[1]), compare_code
;
7844 rtx compare_seq
, compare_op
;
7845 rtx second_test
, bypass_test
;
7847 /* When the compare code is not LTU or GEU, we can not use sbbl case.
7848 In case comparsion is done with immediate, we can convert it to LTU or
7849 GEU by altering the integer. */
7851 if ((code
== LEU
|| code
== GTU
)
7852 && GET_CODE (ix86_compare_op1
) == CONST_INT
7853 && GET_MODE (operands
[0]) != HImode
7854 && (unsigned int)INTVAL (ix86_compare_op1
) != 0xffffffff
7855 && GET_CODE (operands
[2]) == CONST_INT
7856 && GET_CODE (operands
[3]) == CONST_INT
)
7862 ix86_compare_op1
= GEN_INT (INTVAL (ix86_compare_op1
) + 1);
7866 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
7867 compare_seq
= gen_sequence ();
7870 compare_code
= GET_CODE (compare_op
);
7872 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
7873 HImode insns, we'd be swallowed in word prefix ops. */
7875 if (GET_MODE (operands
[0]) != HImode
7876 && (GET_MODE (operands
[0]) != DImode
|| TARGET_64BIT
)
7877 && GET_CODE (operands
[2]) == CONST_INT
7878 && GET_CODE (operands
[3]) == CONST_INT
)
7880 rtx out
= operands
[0];
7881 HOST_WIDE_INT ct
= INTVAL (operands
[2]);
7882 HOST_WIDE_INT cf
= INTVAL (operands
[3]);
7885 if ((compare_code
== LTU
|| compare_code
== GEU
)
7886 && !second_test
&& !bypass_test
)
7889 /* Detect overlap between destination and compare sources. */
7892 /* To simplify rest of code, restrict to the GEU case. */
7893 if (compare_code
== LTU
)
7898 compare_code
= reverse_condition (compare_code
);
7899 code
= reverse_condition (code
);
7903 if (reg_overlap_mentioned_p (out
, ix86_compare_op0
)
7904 || reg_overlap_mentioned_p (out
, ix86_compare_op1
))
7905 tmp
= gen_reg_rtx (GET_MODE (operands
[0]));
7907 emit_insn (compare_seq
);
7908 if (GET_MODE (tmp
) == DImode
)
7909 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp
));
7911 emit_insn (gen_x86_movsicc_0_m1 (tmp
));
7924 if (GET_MODE (tmp
) == DImode
)
7925 emit_insn (gen_adddi3 (tmp
, tmp
, GEN_INT (ct
)));
7927 emit_insn (gen_addsi3 (tmp
, tmp
, GEN_INT (ct
)));
7939 if (GET_MODE (tmp
) == DImode
)
7940 emit_insn (gen_iordi3 (tmp
, tmp
, GEN_INT (ct
)));
7942 emit_insn (gen_iorsi3 (tmp
, tmp
, GEN_INT (ct
)));
7944 else if (diff
== -1 && ct
)
7954 if (GET_MODE (tmp
) == DImode
)
7956 emit_insn (gen_one_cmpldi2 (tmp
, tmp
));
7958 emit_insn (gen_adddi3 (tmp
, tmp
, GEN_INT (cf
)));
7962 emit_insn (gen_one_cmplsi2 (tmp
, tmp
));
7964 emit_insn (gen_addsi3 (tmp
, tmp
, GEN_INT (cf
)));
7972 * andl cf - ct, dest
7977 if (GET_MODE (tmp
) == DImode
)
7979 emit_insn (gen_anddi3 (tmp
, tmp
, GEN_INT (trunc_int_for_mode
7980 (cf
- ct
, DImode
))));
7982 emit_insn (gen_adddi3 (tmp
, tmp
, GEN_INT (ct
)));
7986 emit_insn (gen_andsi3 (tmp
, tmp
, GEN_INT (trunc_int_for_mode
7987 (cf
- ct
, SImode
))));
7989 emit_insn (gen_addsi3 (tmp
, tmp
, GEN_INT (ct
)));
7994 emit_move_insn (out
, tmp
);
7996 return 1; /* DONE */
8003 tmp
= ct
, ct
= cf
, cf
= tmp
;
8005 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0
)))
8007 /* We may be reversing unordered compare to normal compare, that
8008 is not valid in general (we may convert non-trapping condition
8009 to trapping one), however on i386 we currently emit all
8010 comparisons unordered. */
8011 compare_code
= reverse_condition_maybe_unordered (compare_code
);
8012 code
= reverse_condition_maybe_unordered (code
);
8016 compare_code
= reverse_condition (compare_code
);
8017 code
= reverse_condition (code
);
8020 if (diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
8021 || diff
== 3 || diff
== 5 || diff
== 9)
8027 * lea cf(dest*(ct-cf)),dest
8031 * This also catches the degenerate setcc-only case.
8037 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
8038 ix86_compare_op1
, VOIDmode
, 0, 1);
8041 /* On x86_64 the lea instruction operates on Pmode, so we need to get arithmetics
8042 done in proper mode to match. */
8049 tmp
= gen_rtx_MULT (GET_MODE (out
), out1
, GEN_INT (diff
& ~1));
8053 tmp
= gen_rtx_PLUS (GET_MODE (out
), tmp
, out1
);
8059 tmp
= gen_rtx_PLUS (GET_MODE (out
), tmp
, GEN_INT (cf
));
8063 && (GET_CODE (tmp
) != SUBREG
|| SUBREG_REG (tmp
) != out
))
8069 clob
= gen_rtx_REG (CCmode
, FLAGS_REG
);
8070 clob
= gen_rtx_CLOBBER (VOIDmode
, clob
);
8072 tmp
= gen_rtx_SET (VOIDmode
, out
, tmp
);
8073 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, clob
));
8077 emit_insn (gen_rtx_SET (VOIDmode
, out
, tmp
));
8079 if (out
!= operands
[0])
8080 emit_move_insn (operands
[0], out
);
8082 return 1; /* DONE */
8086 * General case: Jumpful:
8087 * xorl dest,dest cmpl op1, op2
8088 * cmpl op1, op2 movl ct, dest
8090 * decl dest movl cf, dest
8091 * andl (cf-ct),dest 1:
8096 * This is reasonably steep, but branch mispredict costs are
8097 * high on modern cpus, so consider failing only if optimizing
8100 * %%% Parameterize branch_cost on the tuning architecture, then
8101 * use that. The 80386 couldn't care less about mispredicts.
8104 if (!optimize_size
&& !TARGET_CMOVE
)
8110 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0
)))
8112 /* We may be reversing unordered compare to normal compare,
8113 that is not valid in general (we may convert non-trapping
8114 condition to trapping one), however on i386 we currently
8115 emit all comparisons unordered. */
8116 compare_code
= reverse_condition_maybe_unordered (compare_code
);
8117 code
= reverse_condition_maybe_unordered (code
);
8121 compare_code
= reverse_condition (compare_code
);
8122 code
= reverse_condition (code
);
8126 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
8127 ix86_compare_op1
, VOIDmode
, 0, 1);
8129 emit_insn (gen_addsi3 (out
, out
, constm1_rtx
));
8130 emit_insn (gen_andsi3 (out
, out
, GEN_INT (trunc_int_for_mode
8131 (cf
- ct
, SImode
))));
8133 emit_insn (gen_addsi3 (out
, out
, GEN_INT (ct
)));
8134 if (out
!= operands
[0])
8135 emit_move_insn (operands
[0], out
);
8137 return 1; /* DONE */
8143 /* Try a few things more with specific constants and a variable. */
8146 rtx var
, orig_out
, out
, tmp
;
8149 return 0; /* FAIL */
8151 /* If one of the two operands is an interesting constant, load a
8152 constant with the above and mask it in with a logical operation. */
8154 if (GET_CODE (operands
[2]) == CONST_INT
)
8157 if (INTVAL (operands
[2]) == 0)
8158 operands
[3] = constm1_rtx
, op
= and_optab
;
8159 else if (INTVAL (operands
[2]) == -1)
8160 operands
[3] = const0_rtx
, op
= ior_optab
;
8162 return 0; /* FAIL */
8164 else if (GET_CODE (operands
[3]) == CONST_INT
)
8167 if (INTVAL (operands
[3]) == 0)
8168 operands
[2] = constm1_rtx
, op
= and_optab
;
8169 else if (INTVAL (operands
[3]) == -1)
8170 operands
[2] = const0_rtx
, op
= ior_optab
;
8172 return 0; /* FAIL */
8175 return 0; /* FAIL */
8177 orig_out
= operands
[0];
8178 tmp
= gen_reg_rtx (GET_MODE (orig_out
));
8181 /* Recurse to get the constant loaded. */
8182 if (ix86_expand_int_movcc (operands
) == 0)
8183 return 0; /* FAIL */
8185 /* Mask in the interesting variable. */
8186 out
= expand_binop (GET_MODE (orig_out
), op
, var
, tmp
, orig_out
, 0,
8188 if (out
!= orig_out
)
8189 emit_move_insn (orig_out
, out
);
8191 return 1; /* DONE */
8195 * For comparison with above,
8205 if (! nonimmediate_operand (operands
[2], GET_MODE (operands
[0])))
8206 operands
[2] = force_reg (GET_MODE (operands
[0]), operands
[2]);
8207 if (! nonimmediate_operand (operands
[3], GET_MODE (operands
[0])))
8208 operands
[3] = force_reg (GET_MODE (operands
[0]), operands
[3]);
8210 if (bypass_test
&& reg_overlap_mentioned_p (operands
[0], operands
[3]))
8212 rtx tmp
= gen_reg_rtx (GET_MODE (operands
[0]));
8213 emit_move_insn (tmp
, operands
[3]);
8216 if (second_test
&& reg_overlap_mentioned_p (operands
[0], operands
[2]))
8218 rtx tmp
= gen_reg_rtx (GET_MODE (operands
[0]));
8219 emit_move_insn (tmp
, operands
[2]);
8222 if (! register_operand (operands
[2], VOIDmode
)
8223 && ! register_operand (operands
[3], VOIDmode
))
8224 operands
[2] = force_reg (GET_MODE (operands
[0]), operands
[2]);
8226 emit_insn (compare_seq
);
8227 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
8228 gen_rtx_IF_THEN_ELSE (GET_MODE (operands
[0]),
8229 compare_op
, operands
[2],
8232 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
8233 gen_rtx_IF_THEN_ELSE (GET_MODE (operands
[0]),
8238 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
8239 gen_rtx_IF_THEN_ELSE (GET_MODE (operands
[0]),
8244 return 1; /* DONE */
8248 ix86_expand_fp_movcc (operands
)
8253 rtx compare_op
, second_test
, bypass_test
;
8255 /* For SF/DFmode conditional moves based on comparisons
8256 in same mode, we may want to use SSE min/max instructions. */
8257 if (((TARGET_SSE
&& GET_MODE (operands
[0]) == SFmode
)
8258 || (TARGET_SSE2
&& GET_MODE (operands
[0]) == DFmode
))
8259 && GET_MODE (ix86_compare_op0
) == GET_MODE (operands
[0])
8260 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
8262 || (GET_CODE (operands
[1]) != LTGT
&& GET_CODE (operands
[1]) != UNEQ
))
8263 /* We may be called from the post-reload splitter. */
8264 && (!REG_P (operands
[0])
8265 || SSE_REG_P (operands
[0])
8266 || REGNO (operands
[0]) >= FIRST_PSEUDO_REGISTER
))
8268 rtx op0
= ix86_compare_op0
, op1
= ix86_compare_op1
;
8269 code
= GET_CODE (operands
[1]);
8271 /* See if we have (cross) match between comparison operands and
8272 conditional move operands. */
8273 if (rtx_equal_p (operands
[2], op1
))
8278 code
= reverse_condition_maybe_unordered (code
);
8280 if (rtx_equal_p (operands
[2], op0
) && rtx_equal_p (operands
[3], op1
))
8282 /* Check for min operation. */
8285 operands
[0] = force_reg (GET_MODE (operands
[0]), operands
[0]);
8286 if (memory_operand (op0
, VOIDmode
))
8287 op0
= force_reg (GET_MODE (operands
[0]), op0
);
8288 if (GET_MODE (operands
[0]) == SFmode
)
8289 emit_insn (gen_minsf3 (operands
[0], op0
, op1
));
8291 emit_insn (gen_mindf3 (operands
[0], op0
, op1
));
8294 /* Check for max operation. */
8297 operands
[0] = force_reg (GET_MODE (operands
[0]), operands
[0]);
8298 if (memory_operand (op0
, VOIDmode
))
8299 op0
= force_reg (GET_MODE (operands
[0]), op0
);
8300 if (GET_MODE (operands
[0]) == SFmode
)
8301 emit_insn (gen_maxsf3 (operands
[0], op0
, op1
));
8303 emit_insn (gen_maxdf3 (operands
[0], op0
, op1
));
8307 /* Manage condition to be sse_comparison_operator. In case we are
8308 in non-ieee mode, try to canonicalize the destination operand
8309 to be first in the comparison - this helps reload to avoid extra
8311 if (!sse_comparison_operator (operands
[1], VOIDmode
)
8312 || (rtx_equal_p (operands
[0], ix86_compare_op1
) && !TARGET_IEEE_FP
))
8314 rtx tmp
= ix86_compare_op0
;
8315 ix86_compare_op0
= ix86_compare_op1
;
8316 ix86_compare_op1
= tmp
;
8317 operands
[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands
[1])),
8318 VOIDmode
, ix86_compare_op0
,
8321 /* Similary try to manage result to be first operand of conditional
8322 move. We also don't support the NE comparison on SSE, so try to
8324 if ((rtx_equal_p (operands
[0], operands
[3])
8325 && (!TARGET_IEEE_FP
|| GET_CODE (operands
[1]) != EQ
))
8326 || (GET_CODE (operands
[1]) == NE
&& TARGET_IEEE_FP
))
8328 rtx tmp
= operands
[2];
8329 operands
[2] = operands
[3];
8331 operands
[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
8332 (GET_CODE (operands
[1])),
8333 VOIDmode
, ix86_compare_op0
,
8336 if (GET_MODE (operands
[0]) == SFmode
)
8337 emit_insn (gen_sse_movsfcc (operands
[0], operands
[1],
8338 operands
[2], operands
[3],
8339 ix86_compare_op0
, ix86_compare_op1
));
8341 emit_insn (gen_sse_movdfcc (operands
[0], operands
[1],
8342 operands
[2], operands
[3],
8343 ix86_compare_op0
, ix86_compare_op1
));
8347 /* The floating point conditional move instructions don't directly
8348 support conditions resulting from a signed integer comparison. */
8350 code
= GET_CODE (operands
[1]);
8351 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
8353 /* The floating point conditional move instructions don't directly
8354 support signed integer comparisons. */
8356 if (!fcmov_comparison_operator (compare_op
, VOIDmode
))
8358 if (second_test
!= NULL
|| bypass_test
!= NULL
)
8360 tmp
= gen_reg_rtx (QImode
);
8361 ix86_expand_setcc (code
, tmp
);
8363 ix86_compare_op0
= tmp
;
8364 ix86_compare_op1
= const0_rtx
;
8365 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
8367 if (bypass_test
&& reg_overlap_mentioned_p (operands
[0], operands
[3]))
8369 tmp
= gen_reg_rtx (GET_MODE (operands
[0]));
8370 emit_move_insn (tmp
, operands
[3]);
8373 if (second_test
&& reg_overlap_mentioned_p (operands
[0], operands
[2]))
8375 tmp
= gen_reg_rtx (GET_MODE (operands
[0]));
8376 emit_move_insn (tmp
, operands
[2]);
8380 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
8381 gen_rtx_IF_THEN_ELSE (GET_MODE (operands
[0]),
8386 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
8387 gen_rtx_IF_THEN_ELSE (GET_MODE (operands
[0]),
8392 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
8393 gen_rtx_IF_THEN_ELSE (GET_MODE (operands
[0]),
8401 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
8402 works for floating pointer parameters and nonoffsetable memories.
8403 For pushes, it returns just stack offsets; the values will be saved
8404 in the right order. Maximally three parts are generated. */
8407 ix86_split_to_parts (operand
, parts
, mode
)
8410 enum machine_mode mode
;
8415 size
= mode
== TFmode
? 3 : (GET_MODE_SIZE (mode
) / 4);
8417 size
= (GET_MODE_SIZE (mode
) + 4) / 8;
8419 if (GET_CODE (operand
) == REG
&& MMX_REGNO_P (REGNO (operand
)))
8421 if (size
< 2 || size
> 3)
8424 /* Optimize constant pool reference to immediates. This is used by fp moves,
8425 that force all constants to memory to allow combining. */
8427 if (GET_CODE (operand
) == MEM
8428 && GET_CODE (XEXP (operand
, 0)) == SYMBOL_REF
8429 && CONSTANT_POOL_ADDRESS_P (XEXP (operand
, 0)))
8430 operand
= get_pool_constant (XEXP (operand
, 0));
8432 if (GET_CODE (operand
) == MEM
&& !offsettable_memref_p (operand
))
8434 /* The only non-offsetable memories we handle are pushes. */
8435 if (! push_operand (operand
, VOIDmode
))
8438 operand
= copy_rtx (operand
);
8439 PUT_MODE (operand
, Pmode
);
8440 parts
[0] = parts
[1] = parts
[2] = operand
;
8442 else if (!TARGET_64BIT
)
8445 split_di (&operand
, 1, &parts
[0], &parts
[1]);
8448 if (REG_P (operand
))
8450 if (!reload_completed
)
8452 parts
[0] = gen_rtx_REG (SImode
, REGNO (operand
) + 0);
8453 parts
[1] = gen_rtx_REG (SImode
, REGNO (operand
) + 1);
8455 parts
[2] = gen_rtx_REG (SImode
, REGNO (operand
) + 2);
8457 else if (offsettable_memref_p (operand
))
8459 operand
= adjust_address (operand
, SImode
, 0);
8461 parts
[1] = adjust_address (operand
, SImode
, 4);
8463 parts
[2] = adjust_address (operand
, SImode
, 8);
8465 else if (GET_CODE (operand
) == CONST_DOUBLE
)
8470 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
8475 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r
, l
);
8476 parts
[2] = GEN_INT (trunc_int_for_mode (l
[2], SImode
));
8479 REAL_VALUE_TO_TARGET_DOUBLE (r
, l
);
8484 parts
[1] = GEN_INT (trunc_int_for_mode (l
[1], SImode
));
8485 parts
[0] = GEN_INT (trunc_int_for_mode (l
[0], SImode
));
8493 if (mode
== XFmode
|| mode
== TFmode
)
8495 if (REG_P (operand
))
8497 if (!reload_completed
)
8499 parts
[0] = gen_rtx_REG (DImode
, REGNO (operand
) + 0);
8500 parts
[1] = gen_rtx_REG (SImode
, REGNO (operand
) + 1);
8502 else if (offsettable_memref_p (operand
))
8504 operand
= adjust_address (operand
, DImode
, 0);
8506 parts
[1] = adjust_address (operand
, SImode
, 8);
8508 else if (GET_CODE (operand
) == CONST_DOUBLE
)
8513 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
8514 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r
, l
);
8515 /* Do not use shift by 32 to avoid warning on 32bit systems. */
8516 if (HOST_BITS_PER_WIDE_INT
>= 64)
8518 = GEN_INT (trunc_int_for_mode (l
[0] + ((l
[1] << 31) << 1),
8521 parts
[0] = immed_double_const (l
[0], l
[1], DImode
);
8522 parts
[1] = GEN_INT (trunc_int_for_mode (l
[2], SImode
));
8532 /* Emit insns to perform a move or push of DI, DF, and XF values.
8533 Return false when normal moves are needed; true when all required
8534 insns have been emitted. Operands 2-4 contain the input values
8535 int the correct order; operands 5-7 contain the output values. */
8538 ix86_split_long_move (operands
)
8545 enum machine_mode mode
= GET_MODE (operands
[0]);
8547 /* The DFmode expanders may ask us to move double.
8548 For 64bit target this is single move. By hiding the fact
8549 here we simplify i386.md splitters. */
8550 if (GET_MODE_SIZE (GET_MODE (operands
[0])) == 8 && TARGET_64BIT
)
8552 /* Optimize constant pool reference to immediates. This is used by fp moves,
8553 that force all constants to memory to allow combining. */
8555 if (GET_CODE (operands
[1]) == MEM
8556 && GET_CODE (XEXP (operands
[1], 0)) == SYMBOL_REF
8557 && CONSTANT_POOL_ADDRESS_P (XEXP (operands
[1], 0)))
8558 operands
[1] = get_pool_constant (XEXP (operands
[1], 0));
8559 if (push_operand (operands
[0], VOIDmode
))
8561 operands
[0] = copy_rtx (operands
[0]);
8562 PUT_MODE (operands
[0], Pmode
);
8565 operands
[0] = gen_lowpart (DImode
, operands
[0]);
8566 operands
[1] = gen_lowpart (DImode
, operands
[1]);
8567 emit_move_insn (operands
[0], operands
[1]);
8571 /* The only non-offsettable memory we handle is push. */
8572 if (push_operand (operands
[0], VOIDmode
))
8574 else if (GET_CODE (operands
[0]) == MEM
8575 && ! offsettable_memref_p (operands
[0]))
8578 nparts
= ix86_split_to_parts (operands
[1], part
[1], GET_MODE (operands
[0]));
8579 ix86_split_to_parts (operands
[0], part
[0], GET_MODE (operands
[0]));
8581 /* When emitting push, take care for source operands on the stack. */
8582 if (push
&& GET_CODE (operands
[1]) == MEM
8583 && reg_overlap_mentioned_p (stack_pointer_rtx
, operands
[1]))
8586 part
[1][1] = change_address (part
[1][1], GET_MODE (part
[1][1]),
8587 XEXP (part
[1][2], 0));
8588 part
[1][0] = change_address (part
[1][0], GET_MODE (part
[1][0]),
8589 XEXP (part
[1][1], 0));
8592 /* We need to do copy in the right order in case an address register
8593 of the source overlaps the destination. */
8594 if (REG_P (part
[0][0]) && GET_CODE (part
[1][0]) == MEM
)
8596 if (reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0)))
8598 if (reg_overlap_mentioned_p (part
[0][1], XEXP (part
[1][0], 0)))
8601 && reg_overlap_mentioned_p (part
[0][2], XEXP (part
[1][0], 0)))
8604 /* Collision in the middle part can be handled by reordering. */
8605 if (collisions
== 1 && nparts
== 3
8606 && reg_overlap_mentioned_p (part
[0][1], XEXP (part
[1][0], 0)))
8609 tmp
= part
[0][1]; part
[0][1] = part
[0][2]; part
[0][2] = tmp
;
8610 tmp
= part
[1][1]; part
[1][1] = part
[1][2]; part
[1][2] = tmp
;
8613 /* If there are more collisions, we can't handle it by reordering.
8614 Do an lea to the last part and use only one colliding move. */
8615 else if (collisions
> 1)
8618 emit_insn (gen_rtx_SET (VOIDmode
, part
[0][nparts
- 1],
8619 XEXP (part
[1][0], 0)));
8620 part
[1][0] = change_address (part
[1][0],
8621 TARGET_64BIT
? DImode
: SImode
,
8622 part
[0][nparts
- 1]);
8623 part
[1][1] = adjust_address (part
[1][0], VOIDmode
, UNITS_PER_WORD
);
8625 part
[1][2] = adjust_address (part
[1][0], VOIDmode
, 8);
8635 /* We use only first 12 bytes of TFmode value, but for pushing we
8636 are required to adjust stack as if we were pushing real 16byte
8638 if (mode
== TFmode
&& !TARGET_64BIT
)
8639 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
8641 emit_move_insn (part
[0][2], part
[1][2]);
8646 /* In 64bit mode we don't have 32bit push available. In case this is
8647 register, it is OK - we will just use larger counterpart. We also
8648 retype memory - these comes from attempt to avoid REX prefix on
8649 moving of second half of TFmode value. */
8650 if (GET_MODE (part
[1][1]) == SImode
)
8652 if (GET_CODE (part
[1][1]) == MEM
)
8653 part
[1][1] = adjust_address (part
[1][1], DImode
, 0);
8654 else if (REG_P (part
[1][1]))
8655 part
[1][1] = gen_rtx_REG (DImode
, REGNO (part
[1][1]));
8658 if (GET_MODE (part
[1][0]) == SImode
)
8659 part
[1][0] = part
[1][1];
8662 emit_move_insn (part
[0][1], part
[1][1]);
8663 emit_move_insn (part
[0][0], part
[1][0]);
8667 /* Choose correct order to not overwrite the source before it is copied. */
8668 if ((REG_P (part
[0][0])
8669 && REG_P (part
[1][1])
8670 && (REGNO (part
[0][0]) == REGNO (part
[1][1])
8672 && REGNO (part
[0][0]) == REGNO (part
[1][2]))))
8674 && reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0))))
8678 operands
[2] = part
[0][2];
8679 operands
[3] = part
[0][1];
8680 operands
[4] = part
[0][0];
8681 operands
[5] = part
[1][2];
8682 operands
[6] = part
[1][1];
8683 operands
[7] = part
[1][0];
8687 operands
[2] = part
[0][1];
8688 operands
[3] = part
[0][0];
8689 operands
[5] = part
[1][1];
8690 operands
[6] = part
[1][0];
8697 operands
[2] = part
[0][0];
8698 operands
[3] = part
[0][1];
8699 operands
[4] = part
[0][2];
8700 operands
[5] = part
[1][0];
8701 operands
[6] = part
[1][1];
8702 operands
[7] = part
[1][2];
8706 operands
[2] = part
[0][0];
8707 operands
[3] = part
[0][1];
8708 operands
[5] = part
[1][0];
8709 operands
[6] = part
[1][1];
8712 emit_move_insn (operands
[2], operands
[5]);
8713 emit_move_insn (operands
[3], operands
[6]);
8715 emit_move_insn (operands
[4], operands
[7]);
8721 ix86_split_ashldi (operands
, scratch
)
8722 rtx
*operands
, scratch
;
8724 rtx low
[2], high
[2];
8727 if (GET_CODE (operands
[2]) == CONST_INT
)
8729 split_di (operands
, 2, low
, high
);
8730 count
= INTVAL (operands
[2]) & 63;
8734 emit_move_insn (high
[0], low
[1]);
8735 emit_move_insn (low
[0], const0_rtx
);
8738 emit_insn (gen_ashlsi3 (high
[0], high
[0], GEN_INT (count
- 32)));
8742 if (!rtx_equal_p (operands
[0], operands
[1]))
8743 emit_move_insn (operands
[0], operands
[1]);
8744 emit_insn (gen_x86_shld_1 (high
[0], low
[0], GEN_INT (count
)));
8745 emit_insn (gen_ashlsi3 (low
[0], low
[0], GEN_INT (count
)));
8750 if (!rtx_equal_p (operands
[0], operands
[1]))
8751 emit_move_insn (operands
[0], operands
[1]);
8753 split_di (operands
, 1, low
, high
);
8755 emit_insn (gen_x86_shld_1 (high
[0], low
[0], operands
[2]));
8756 emit_insn (gen_ashlsi3 (low
[0], low
[0], operands
[2]));
8758 if (TARGET_CMOVE
&& (! no_new_pseudos
|| scratch
))
8760 if (! no_new_pseudos
)
8761 scratch
= force_reg (SImode
, const0_rtx
);
8763 emit_move_insn (scratch
, const0_rtx
);
8765 emit_insn (gen_x86_shift_adj_1 (high
[0], low
[0], operands
[2],
8769 emit_insn (gen_x86_shift_adj_2 (high
[0], low
[0], operands
[2]));
8774 ix86_split_ashrdi (operands
, scratch
)
8775 rtx
*operands
, scratch
;
8777 rtx low
[2], high
[2];
8780 if (GET_CODE (operands
[2]) == CONST_INT
)
8782 split_di (operands
, 2, low
, high
);
8783 count
= INTVAL (operands
[2]) & 63;
8787 emit_move_insn (low
[0], high
[1]);
8789 if (! reload_completed
)
8790 emit_insn (gen_ashrsi3 (high
[0], low
[0], GEN_INT (31)));
8793 emit_move_insn (high
[0], low
[0]);
8794 emit_insn (gen_ashrsi3 (high
[0], high
[0], GEN_INT (31)));
8798 emit_insn (gen_ashrsi3 (low
[0], low
[0], GEN_INT (count
- 32)));
8802 if (!rtx_equal_p (operands
[0], operands
[1]))
8803 emit_move_insn (operands
[0], operands
[1]);
8804 emit_insn (gen_x86_shrd_1 (low
[0], high
[0], GEN_INT (count
)));
8805 emit_insn (gen_ashrsi3 (high
[0], high
[0], GEN_INT (count
)));
8810 if (!rtx_equal_p (operands
[0], operands
[1]))
8811 emit_move_insn (operands
[0], operands
[1]);
8813 split_di (operands
, 1, low
, high
);
8815 emit_insn (gen_x86_shrd_1 (low
[0], high
[0], operands
[2]));
8816 emit_insn (gen_ashrsi3 (high
[0], high
[0], operands
[2]));
8818 if (TARGET_CMOVE
&& (! no_new_pseudos
|| scratch
))
8820 if (! no_new_pseudos
)
8821 scratch
= gen_reg_rtx (SImode
);
8822 emit_move_insn (scratch
, high
[0]);
8823 emit_insn (gen_ashrsi3 (scratch
, scratch
, GEN_INT (31)));
8824 emit_insn (gen_x86_shift_adj_1 (low
[0], high
[0], operands
[2],
8828 emit_insn (gen_x86_shift_adj_3 (low
[0], high
[0], operands
[2]));
8833 ix86_split_lshrdi (operands
, scratch
)
8834 rtx
*operands
, scratch
;
8836 rtx low
[2], high
[2];
8839 if (GET_CODE (operands
[2]) == CONST_INT
)
8841 split_di (operands
, 2, low
, high
);
8842 count
= INTVAL (operands
[2]) & 63;
8846 emit_move_insn (low
[0], high
[1]);
8847 emit_move_insn (high
[0], const0_rtx
);
8850 emit_insn (gen_lshrsi3 (low
[0], low
[0], GEN_INT (count
- 32)));
8854 if (!rtx_equal_p (operands
[0], operands
[1]))
8855 emit_move_insn (operands
[0], operands
[1]);
8856 emit_insn (gen_x86_shrd_1 (low
[0], high
[0], GEN_INT (count
)));
8857 emit_insn (gen_lshrsi3 (high
[0], high
[0], GEN_INT (count
)));
8862 if (!rtx_equal_p (operands
[0], operands
[1]))
8863 emit_move_insn (operands
[0], operands
[1]);
8865 split_di (operands
, 1, low
, high
);
8867 emit_insn (gen_x86_shrd_1 (low
[0], high
[0], operands
[2]));
8868 emit_insn (gen_lshrsi3 (high
[0], high
[0], operands
[2]));
8870 /* Heh. By reversing the arguments, we can reuse this pattern. */
8871 if (TARGET_CMOVE
&& (! no_new_pseudos
|| scratch
))
8873 if (! no_new_pseudos
)
8874 scratch
= force_reg (SImode
, const0_rtx
);
8876 emit_move_insn (scratch
, const0_rtx
);
8878 emit_insn (gen_x86_shift_adj_1 (low
[0], high
[0], operands
[2],
8882 emit_insn (gen_x86_shift_adj_2 (low
[0], high
[0], operands
[2]));
8886 /* Helper function for the string operations below. Dest VARIABLE whether
8887 it is aligned to VALUE bytes. If true, jump to the label. */
8889 ix86_expand_aligntest (variable
, value
)
8893 rtx label
= gen_label_rtx ();
8894 rtx tmpcount
= gen_reg_rtx (GET_MODE (variable
));
8895 if (GET_MODE (variable
) == DImode
)
8896 emit_insn (gen_anddi3 (tmpcount
, variable
, GEN_INT (value
)));
8898 emit_insn (gen_andsi3 (tmpcount
, variable
, GEN_INT (value
)));
8899 emit_cmp_and_jump_insns (tmpcount
, const0_rtx
, EQ
, 0, GET_MODE (variable
),
8904 /* Adjust COUNTER by the VALUE. */
8906 ix86_adjust_counter (countreg
, value
)
8908 HOST_WIDE_INT value
;
8910 if (GET_MODE (countreg
) == DImode
)
8911 emit_insn (gen_adddi3 (countreg
, countreg
, GEN_INT (-value
)));
8913 emit_insn (gen_addsi3 (countreg
, countreg
, GEN_INT (-value
)));
8916 /* Zero extend possibly SImode EXP to Pmode register. */
8918 ix86_zero_extend_to_Pmode (exp
)
8922 if (GET_MODE (exp
) == VOIDmode
)
8923 return force_reg (Pmode
, exp
);
8924 if (GET_MODE (exp
) == Pmode
)
8925 return copy_to_mode_reg (Pmode
, exp
);
8926 r
= gen_reg_rtx (Pmode
);
8927 emit_insn (gen_zero_extendsidi2 (r
, exp
));
8931 /* Expand string move (memcpy) operation. Use i386 string operations when
8932 profitable. expand_clrstr contains similar code. */
8934 ix86_expand_movstr (dst
, src
, count_exp
, align_exp
)
8935 rtx dst
, src
, count_exp
, align_exp
;
8937 rtx srcreg
, destreg
, countreg
;
8938 enum machine_mode counter_mode
;
8939 HOST_WIDE_INT align
= 0;
8940 unsigned HOST_WIDE_INT count
= 0;
8945 if (GET_CODE (align_exp
) == CONST_INT
)
8946 align
= INTVAL (align_exp
);
8948 /* This simple hack avoids all inlining code and simplifies code bellow. */
8949 if (!TARGET_ALIGN_STRINGOPS
)
8952 if (GET_CODE (count_exp
) == CONST_INT
)
8953 count
= INTVAL (count_exp
);
8955 /* Figure out proper mode for counter. For 32bits it is always SImode,
8956 for 64bits use SImode when possible, otherwise DImode.
8957 Set count to number of bytes copied when known at compile time. */
8958 if (!TARGET_64BIT
|| GET_MODE (count_exp
) == SImode
8959 || x86_64_zero_extended_value (count_exp
))
8960 counter_mode
= SImode
;
8962 counter_mode
= DImode
;
8964 if (counter_mode
!= SImode
&& counter_mode
!= DImode
)
8967 destreg
= copy_to_mode_reg (Pmode
, XEXP (dst
, 0));
8968 srcreg
= copy_to_mode_reg (Pmode
, XEXP (src
, 0));
8970 emit_insn (gen_cld ());
8972 /* When optimizing for size emit simple rep ; movsb instruction for
8973 counts not divisible by 4. */
8975 if ((!optimize
|| optimize_size
) && (count
== 0 || (count
& 0x03)))
8977 countreg
= ix86_zero_extend_to_Pmode (count_exp
);
8979 emit_insn (gen_rep_movqi_rex64 (destreg
, srcreg
, countreg
,
8980 destreg
, srcreg
, countreg
));
8982 emit_insn (gen_rep_movqi (destreg
, srcreg
, countreg
,
8983 destreg
, srcreg
, countreg
));
8986 /* For constant aligned (or small unaligned) copies use rep movsl
8987 followed by code copying the rest. For PentiumPro ensure 8 byte
8988 alignment to allow rep movsl acceleration. */
8992 || (!TARGET_PENTIUMPRO
&& !TARGET_64BIT
&& align
>= 4)
8993 || optimize_size
|| count
< (unsigned int)64))
8995 int size
= TARGET_64BIT
&& !optimize_size
? 8 : 4;
8996 if (count
& ~(size
- 1))
8998 countreg
= copy_to_mode_reg (counter_mode
,
8999 GEN_INT ((count
>> (size
== 4 ? 2 : 3))
9000 & (TARGET_64BIT
? -1 : 0x3fffffff)));
9001 countreg
= ix86_zero_extend_to_Pmode (countreg
);
9005 emit_insn (gen_rep_movsi_rex64 (destreg
, srcreg
, countreg
,
9006 destreg
, srcreg
, countreg
));
9008 emit_insn (gen_rep_movsi (destreg
, srcreg
, countreg
,
9009 destreg
, srcreg
, countreg
));
9012 emit_insn (gen_rep_movdi_rex64 (destreg
, srcreg
, countreg
,
9013 destreg
, srcreg
, countreg
));
9015 if (size
== 8 && (count
& 0x04))
9016 emit_insn (gen_strmovsi (destreg
, srcreg
));
9018 emit_insn (gen_strmovhi (destreg
, srcreg
));
9020 emit_insn (gen_strmovqi (destreg
, srcreg
));
9022 /* The generic code based on the glibc implementation:
9023 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
9024 allowing accelerated copying there)
9025 - copy the data using rep movsl
9032 /* In case we don't know anything about the alignment, default to
9033 library version, since it is usually equally fast and result in
9035 if (!TARGET_INLINE_ALL_STRINGOPS
&& align
< UNITS_PER_WORD
)
9041 if (TARGET_SINGLE_STRINGOP
)
9042 emit_insn (gen_cld ());
9044 countreg2
= gen_reg_rtx (Pmode
);
9045 countreg
= copy_to_mode_reg (counter_mode
, count_exp
);
9047 /* We don't use loops to align destination and to copy parts smaller
9048 than 4 bytes, because gcc is able to optimize such code better (in
9049 the case the destination or the count really is aligned, gcc is often
9050 able to predict the branches) and also it is friendlier to the
9051 hardware branch prediction.
9053 Using loops is benefical for generic case, because we can
9054 handle small counts using the loops. Many CPUs (such as Athlon)
9055 have large REP prefix setup costs.
9057 This is quite costy. Maybe we can revisit this decision later or
9058 add some customizability to this code. */
9061 && align
< (TARGET_PENTIUMPRO
&& (count
== 0
9062 || count
>= (unsigned int)260)
9063 ? 8 : UNITS_PER_WORD
))
9065 label
= gen_label_rtx ();
9066 emit_cmp_and_jump_insns (countreg
, GEN_INT (UNITS_PER_WORD
- 1),
9067 LEU
, 0, counter_mode
, 1, 0, label
);
9071 rtx label
= ix86_expand_aligntest (destreg
, 1);
9072 emit_insn (gen_strmovqi (destreg
, srcreg
));
9073 ix86_adjust_counter (countreg
, 1);
9075 LABEL_NUSES (label
) = 1;
9079 rtx label
= ix86_expand_aligntest (destreg
, 2);
9080 emit_insn (gen_strmovhi (destreg
, srcreg
));
9081 ix86_adjust_counter (countreg
, 2);
9083 LABEL_NUSES (label
) = 1;
9086 && ((TARGET_PENTIUMPRO
&& (count
== 0
9087 || count
>= (unsigned int)260))
9090 rtx label
= ix86_expand_aligntest (destreg
, 4);
9091 emit_insn (gen_strmovsi (destreg
, srcreg
));
9092 ix86_adjust_counter (countreg
, 4);
9094 LABEL_NUSES (label
) = 1;
9097 if (!TARGET_SINGLE_STRINGOP
)
9098 emit_insn (gen_cld ());
9101 emit_insn (gen_lshrdi3 (countreg2
, ix86_zero_extend_to_Pmode (countreg
),
9103 emit_insn (gen_rep_movdi_rex64 (destreg
, srcreg
, countreg2
,
9104 destreg
, srcreg
, countreg2
));
9108 emit_insn (gen_lshrsi3 (countreg2
, countreg
, GEN_INT (2)));
9109 emit_insn (gen_rep_movsi (destreg
, srcreg
, countreg2
,
9110 destreg
, srcreg
, countreg2
));
9116 LABEL_NUSES (label
) = 1;
9118 if (TARGET_64BIT
&& align
> 4 && count
!= 0 && (count
& 4))
9119 emit_insn (gen_strmovsi (destreg
, srcreg
));
9120 if ((align
<= 4 || count
== 0) && TARGET_64BIT
)
9122 rtx label
= ix86_expand_aligntest (countreg
, 4);
9123 emit_insn (gen_strmovsi (destreg
, srcreg
));
9125 LABEL_NUSES (label
) = 1;
9127 if (align
> 2 && count
!= 0 && (count
& 2))
9128 emit_insn (gen_strmovhi (destreg
, srcreg
));
9129 if (align
<= 2 || count
== 0)
9131 rtx label
= ix86_expand_aligntest (countreg
, 2);
9132 emit_insn (gen_strmovhi (destreg
, srcreg
));
9134 LABEL_NUSES (label
) = 1;
9136 if (align
> 1 && count
!= 0 && (count
& 1))
9137 emit_insn (gen_strmovqi (destreg
, srcreg
));
9138 if (align
<= 1 || count
== 0)
9140 rtx label
= ix86_expand_aligntest (countreg
, 1);
9141 emit_insn (gen_strmovqi (destreg
, srcreg
));
9143 LABEL_NUSES (label
) = 1;
9147 insns
= get_insns ();
9150 ix86_set_move_mem_attrs (insns
, dst
, src
, destreg
, srcreg
);
9155 /* Expand string clear operation (bzero). Use i386 string operations when
9156 profitable. expand_movstr contains similar code. */
9158 ix86_expand_clrstr (src
, count_exp
, align_exp
)
9159 rtx src
, count_exp
, align_exp
;
9161 rtx destreg
, zeroreg
, countreg
;
9162 enum machine_mode counter_mode
;
9163 HOST_WIDE_INT align
= 0;
9164 unsigned HOST_WIDE_INT count
= 0;
9166 if (GET_CODE (align_exp
) == CONST_INT
)
9167 align
= INTVAL (align_exp
);
9169 /* This simple hack avoids all inlining code and simplifies code bellow. */
9170 if (!TARGET_ALIGN_STRINGOPS
)
9173 if (GET_CODE (count_exp
) == CONST_INT
)
9174 count
= INTVAL (count_exp
);
9175 /* Figure out proper mode for counter. For 32bits it is always SImode,
9176 for 64bits use SImode when possible, otherwise DImode.
9177 Set count to number of bytes copied when known at compile time. */
9178 if (!TARGET_64BIT
|| GET_MODE (count_exp
) == SImode
9179 || x86_64_zero_extended_value (count_exp
))
9180 counter_mode
= SImode
;
9182 counter_mode
= DImode
;
9184 destreg
= copy_to_mode_reg (Pmode
, XEXP (src
, 0));
9186 emit_insn (gen_cld ());
9188 /* When optimizing for size emit simple rep ; movsb instruction for
9189 counts not divisible by 4. */
9191 if ((!optimize
|| optimize_size
) && (count
== 0 || (count
& 0x03)))
9193 countreg
= ix86_zero_extend_to_Pmode (count_exp
);
9194 zeroreg
= copy_to_mode_reg (QImode
, const0_rtx
);
9196 emit_insn (gen_rep_stosqi_rex64 (destreg
, countreg
, zeroreg
,
9197 destreg
, countreg
));
9199 emit_insn (gen_rep_stosqi (destreg
, countreg
, zeroreg
,
9200 destreg
, countreg
));
9204 || (!TARGET_PENTIUMPRO
&& !TARGET_64BIT
&& align
>= 4)
9205 || optimize_size
|| count
< (unsigned int)64))
9207 int size
= TARGET_64BIT
&& !optimize_size
? 8 : 4;
9208 zeroreg
= copy_to_mode_reg (size
== 4 ? SImode
: DImode
, const0_rtx
);
9209 if (count
& ~(size
- 1))
9211 countreg
= copy_to_mode_reg (counter_mode
,
9212 GEN_INT ((count
>> (size
== 4 ? 2 : 3))
9213 & (TARGET_64BIT
? -1 : 0x3fffffff)));
9214 countreg
= ix86_zero_extend_to_Pmode (countreg
);
9218 emit_insn (gen_rep_stossi_rex64 (destreg
, countreg
, zeroreg
,
9219 destreg
, countreg
));
9221 emit_insn (gen_rep_stossi (destreg
, countreg
, zeroreg
,
9222 destreg
, countreg
));
9225 emit_insn (gen_rep_stosdi_rex64 (destreg
, countreg
, zeroreg
,
9226 destreg
, countreg
));
9228 if (size
== 8 && (count
& 0x04))
9229 emit_insn (gen_strsetsi (destreg
,
9230 gen_rtx_SUBREG (SImode
, zeroreg
, 0)));
9232 emit_insn (gen_strsethi (destreg
,
9233 gen_rtx_SUBREG (HImode
, zeroreg
, 0)));
9235 emit_insn (gen_strsetqi (destreg
,
9236 gen_rtx_SUBREG (QImode
, zeroreg
, 0)));
9243 /* In case we don't know anything about the alignment, default to
9244 library version, since it is usually equally fast and result in
9246 if (!TARGET_INLINE_ALL_STRINGOPS
&& align
< UNITS_PER_WORD
)
9249 if (TARGET_SINGLE_STRINGOP
)
9250 emit_insn (gen_cld ());
9252 countreg2
= gen_reg_rtx (Pmode
);
9253 countreg
= copy_to_mode_reg (counter_mode
, count_exp
);
9254 zeroreg
= copy_to_mode_reg (Pmode
, const0_rtx
);
9257 && align
< (TARGET_PENTIUMPRO
&& (count
== 0
9258 || count
>= (unsigned int)260)
9259 ? 8 : UNITS_PER_WORD
))
9261 label
= gen_label_rtx ();
9262 emit_cmp_and_jump_insns (countreg
, GEN_INT (UNITS_PER_WORD
- 1),
9263 LEU
, 0, counter_mode
, 1, 0, label
);
9267 rtx label
= ix86_expand_aligntest (destreg
, 1);
9268 emit_insn (gen_strsetqi (destreg
,
9269 gen_rtx_SUBREG (QImode
, zeroreg
, 0)));
9270 ix86_adjust_counter (countreg
, 1);
9272 LABEL_NUSES (label
) = 1;
9276 rtx label
= ix86_expand_aligntest (destreg
, 2);
9277 emit_insn (gen_strsethi (destreg
,
9278 gen_rtx_SUBREG (HImode
, zeroreg
, 0)));
9279 ix86_adjust_counter (countreg
, 2);
9281 LABEL_NUSES (label
) = 1;
9283 if (align
<= 4 && TARGET_PENTIUMPRO
&& (count
== 0
9284 || count
>= (unsigned int)260))
9286 rtx label
= ix86_expand_aligntest (destreg
, 4);
9287 emit_insn (gen_strsetsi (destreg
, (TARGET_64BIT
9288 ? gen_rtx_SUBREG (SImode
, zeroreg
, 0)
9290 ix86_adjust_counter (countreg
, 4);
9292 LABEL_NUSES (label
) = 1;
9295 if (!TARGET_SINGLE_STRINGOP
)
9296 emit_insn (gen_cld ());
9299 emit_insn (gen_lshrdi3 (countreg2
, ix86_zero_extend_to_Pmode (countreg
),
9301 emit_insn (gen_rep_stosdi_rex64 (destreg
, countreg2
, zeroreg
,
9302 destreg
, countreg2
));
9306 emit_insn (gen_lshrsi3 (countreg2
, countreg
, GEN_INT (2)));
9307 emit_insn (gen_rep_stossi (destreg
, countreg2
, zeroreg
,
9308 destreg
, countreg2
));
9314 LABEL_NUSES (label
) = 1;
9316 if (TARGET_64BIT
&& align
> 4 && count
!= 0 && (count
& 4))
9317 emit_insn (gen_strsetsi (destreg
,
9318 gen_rtx_SUBREG (SImode
, zeroreg
, 0)));
9319 if (TARGET_64BIT
&& (align
<= 4 || count
== 0))
9321 rtx label
= ix86_expand_aligntest (destreg
, 2);
9322 emit_insn (gen_strsetsi (destreg
,
9323 gen_rtx_SUBREG (SImode
, zeroreg
, 0)));
9325 LABEL_NUSES (label
) = 1;
9327 if (align
> 2 && count
!= 0 && (count
& 2))
9328 emit_insn (gen_strsethi (destreg
,
9329 gen_rtx_SUBREG (HImode
, zeroreg
, 0)));
9330 if (align
<= 2 || count
== 0)
9332 rtx label
= ix86_expand_aligntest (destreg
, 2);
9333 emit_insn (gen_strsethi (destreg
,
9334 gen_rtx_SUBREG (HImode
, zeroreg
, 0)));
9336 LABEL_NUSES (label
) = 1;
9338 if (align
> 1 && count
!= 0 && (count
& 1))
9339 emit_insn (gen_strsetqi (destreg
,
9340 gen_rtx_SUBREG (QImode
, zeroreg
, 0)));
9341 if (align
<= 1 || count
== 0)
9343 rtx label
= ix86_expand_aligntest (destreg
, 1);
9344 emit_insn (gen_strsetqi (destreg
,
9345 gen_rtx_SUBREG (QImode
, zeroreg
, 0)));
9347 LABEL_NUSES (label
) = 1;
9352 /* Expand strlen. */
9354 ix86_expand_strlen (out
, src
, eoschar
, align
)
9355 rtx out
, src
, eoschar
, align
;
9357 rtx addr
, scratch1
, scratch2
, scratch3
, scratch4
;
9359 /* The generic case of strlen expander is long. Avoid it's
9360 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
9362 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
9363 && !TARGET_INLINE_ALL_STRINGOPS
9365 && (GET_CODE (align
) != CONST_INT
|| INTVAL (align
) < 4))
9368 addr
= force_reg (Pmode
, XEXP (src
, 0));
9369 scratch1
= gen_reg_rtx (Pmode
);
9371 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
9374 /* Well it seems that some optimizer does not combine a call like
9375 foo(strlen(bar), strlen(bar));
9376 when the move and the subtraction is done here. It does calculate
9377 the length just once when these instructions are done inside of
9378 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
9379 often used and I use one fewer register for the lifetime of
9380 output_strlen_unroll() this is better. */
9382 emit_move_insn (out
, addr
);
9384 ix86_expand_strlensi_unroll_1 (out
, align
);
9386 /* strlensi_unroll_1 returns the address of the zero at the end of
9387 the string, like memchr(), so compute the length by subtracting
9388 the start address. */
9390 emit_insn (gen_subdi3 (out
, out
, addr
));
9392 emit_insn (gen_subsi3 (out
, out
, addr
));
9396 scratch2
= gen_reg_rtx (Pmode
);
9397 scratch3
= gen_reg_rtx (Pmode
);
9398 scratch4
= force_reg (Pmode
, constm1_rtx
);
9400 emit_move_insn (scratch3
, addr
);
9401 eoschar
= force_reg (QImode
, eoschar
);
9403 emit_insn (gen_cld ());
9406 emit_insn (gen_strlenqi_rex_1 (scratch1
, scratch3
, eoschar
,
9407 align
, scratch4
, scratch3
));
9408 emit_insn (gen_one_cmpldi2 (scratch2
, scratch1
));
9409 emit_insn (gen_adddi3 (out
, scratch2
, constm1_rtx
));
9413 emit_insn (gen_strlenqi_1 (scratch1
, scratch3
, eoschar
,
9414 align
, scratch4
, scratch3
));
9415 emit_insn (gen_one_cmplsi2 (scratch2
, scratch1
));
9416 emit_insn (gen_addsi3 (out
, scratch2
, constm1_rtx
));
9422 /* Expand the appropriate insns for doing strlen if not just doing
9425 out = result, initialized with the start address
9426 align_rtx = alignment of the address.
9427 scratch = scratch register, initialized with the startaddress when
9428 not aligned, otherwise undefined
9430 This is just the body. It needs the initialisations mentioned above and
9431 some address computing at the end. These things are done in i386.md. */
9434 ix86_expand_strlensi_unroll_1 (out
, align_rtx
)
9439 rtx align_2_label
= NULL_RTX
;
9440 rtx align_3_label
= NULL_RTX
;
9441 rtx align_4_label
= gen_label_rtx ();
9442 rtx end_0_label
= gen_label_rtx ();
9444 rtx tmpreg
= gen_reg_rtx (SImode
);
9445 rtx scratch
= gen_reg_rtx (SImode
);
9448 if (GET_CODE (align_rtx
) == CONST_INT
)
9449 align
= INTVAL (align_rtx
);
9451 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
9453 /* Is there a known alignment and is it less than 4? */
9456 rtx scratch1
= gen_reg_rtx (Pmode
);
9457 emit_move_insn (scratch1
, out
);
9458 /* Is there a known alignment and is it not 2? */
9461 align_3_label
= gen_label_rtx (); /* Label when aligned to 3-byte */
9462 align_2_label
= gen_label_rtx (); /* Label when aligned to 2-byte */
9464 /* Leave just the 3 lower bits. */
9465 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, GEN_INT (3),
9466 NULL_RTX
, 0, OPTAB_WIDEN
);
9468 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
9469 Pmode
, 1, 0, align_4_label
);
9470 emit_cmp_and_jump_insns (align_rtx
, GEN_INT (2), EQ
, NULL
,
9471 Pmode
, 1, 0, align_2_label
);
9472 emit_cmp_and_jump_insns (align_rtx
, GEN_INT (2), GTU
, NULL
,
9473 Pmode
, 1, 0, align_3_label
);
9477 /* Since the alignment is 2, we have to check 2 or 0 bytes;
9478 check if is aligned to 4 - byte. */
9480 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, GEN_INT (2),
9481 NULL_RTX
, 0, OPTAB_WIDEN
);
9483 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
9484 Pmode
, 1, 0, align_4_label
);
9487 mem
= gen_rtx_MEM (QImode
, out
);
9489 /* Now compare the bytes. */
9491 /* Compare the first n unaligned byte on a byte per byte basis. */
9492 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
,
9493 QImode
, 1, 0, end_0_label
);
9495 /* Increment the address. */
9497 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
9499 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
9501 /* Not needed with an alignment of 2 */
9504 emit_label (align_2_label
);
9506 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
,
9507 QImode
, 1, 0, end_0_label
);
9510 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
9512 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
9514 emit_label (align_3_label
);
9517 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
,
9518 QImode
, 1, 0, end_0_label
);
9521 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
9523 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
9526 /* Generate loop to check 4 bytes at a time. It is not a good idea to
9527 align this loop. It gives only huge programs, but does not help to
9529 emit_label (align_4_label
);
9531 mem
= gen_rtx_MEM (SImode
, out
);
9532 emit_move_insn (scratch
, mem
);
9534 emit_insn (gen_adddi3 (out
, out
, GEN_INT (4)));
9536 emit_insn (gen_addsi3 (out
, out
, GEN_INT (4)));
9538 /* This formula yields a nonzero result iff one of the bytes is zero.
9539 This saves three branches inside loop and many cycles. */
9541 emit_insn (gen_addsi3 (tmpreg
, scratch
, GEN_INT (-0x01010101)));
9542 emit_insn (gen_one_cmplsi2 (scratch
, scratch
));
9543 emit_insn (gen_andsi3 (tmpreg
, tmpreg
, scratch
));
9544 emit_insn (gen_andsi3 (tmpreg
, tmpreg
,
9545 GEN_INT (trunc_int_for_mode
9546 (0x80808080, SImode
))));
9547 emit_cmp_and_jump_insns (tmpreg
, const0_rtx
, EQ
, 0,
9548 SImode
, 1, 0, align_4_label
);
9552 rtx reg
= gen_reg_rtx (SImode
);
9553 rtx reg2
= gen_reg_rtx (Pmode
);
9554 emit_move_insn (reg
, tmpreg
);
9555 emit_insn (gen_lshrsi3 (reg
, reg
, GEN_INT (16)));
9557 /* If zero is not in the first two bytes, move two bytes forward. */
9558 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
9559 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
9560 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
9561 emit_insn (gen_rtx_SET (VOIDmode
, tmpreg
,
9562 gen_rtx_IF_THEN_ELSE (SImode
, tmp
,
9565 /* Emit lea manually to avoid clobbering of flags. */
9566 emit_insn (gen_rtx_SET (SImode
, reg2
,
9567 gen_rtx_PLUS (Pmode
, out
, GEN_INT (2))));
9569 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
9570 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
9571 emit_insn (gen_rtx_SET (VOIDmode
, out
,
9572 gen_rtx_IF_THEN_ELSE (Pmode
, tmp
,
9579 rtx end_2_label
= gen_label_rtx ();
9580 /* Is zero in the first two bytes? */
9582 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
9583 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
9584 tmp
= gen_rtx_NE (VOIDmode
, tmp
, const0_rtx
);
9585 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
9586 gen_rtx_LABEL_REF (VOIDmode
, end_2_label
),
9588 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
9589 JUMP_LABEL (tmp
) = end_2_label
;
9591 /* Not in the first two. Move two bytes forward. */
9592 emit_insn (gen_lshrsi3 (tmpreg
, tmpreg
, GEN_INT (16)));
9594 emit_insn (gen_adddi3 (out
, out
, GEN_INT (2)));
9596 emit_insn (gen_addsi3 (out
, out
, GEN_INT (2)));
9598 emit_label (end_2_label
);
9602 /* Avoid branch in fixing the byte. */
9603 tmpreg
= gen_lowpart (QImode
, tmpreg
);
9604 emit_insn (gen_addqi3_cc (tmpreg
, tmpreg
, tmpreg
));
9606 emit_insn (gen_subdi3_carry_rex64 (out
, out
, GEN_INT (3)));
9608 emit_insn (gen_subsi3_carry (out
, out
, GEN_INT (3)));
9610 emit_label (end_0_label
);
9613 /* Clear stack slot assignments remembered from previous functions.
9614 This is called from INIT_EXPANDERS once before RTL is emitted for each
9618 ix86_init_machine_status (p
)
9621 p
->machine
= (struct machine_function
*)
9622 xcalloc (1, sizeof (struct machine_function
));
9625 /* Mark machine specific bits of P for GC. */
9627 ix86_mark_machine_status (p
)
9630 struct machine_function
*machine
= p
->machine
;
9631 enum machine_mode mode
;
9637 for (mode
= VOIDmode
; (int) mode
< (int) MAX_MACHINE_MODE
;
9638 mode
= (enum machine_mode
) ((int) mode
+ 1))
9639 for (n
= 0; n
< MAX_386_STACK_LOCALS
; n
++)
9640 ggc_mark_rtx (machine
->stack_locals
[(int) mode
][n
]);
9644 ix86_free_machine_status (p
)
9651 /* Return a MEM corresponding to a stack slot with mode MODE.
9652 Allocate a new slot if necessary.
9654 The RTL for a function can have several slots available: N is
9655 which slot to use. */
9658 assign_386_stack_local (mode
, n
)
9659 enum machine_mode mode
;
9662 if (n
< 0 || n
>= MAX_386_STACK_LOCALS
)
9665 if (ix86_stack_locals
[(int) mode
][n
] == NULL_RTX
)
9666 ix86_stack_locals
[(int) mode
][n
]
9667 = assign_stack_local (mode
, GET_MODE_SIZE (mode
), 0);
9669 return ix86_stack_locals
[(int) mode
][n
];
9672 /* Calculate the length of the memory address in the instruction
9673 encoding. Does not include the one-byte modrm, opcode, or prefix. */
9676 memory_address_length (addr
)
9679 struct ix86_address parts
;
9680 rtx base
, index
, disp
;
9683 if (GET_CODE (addr
) == PRE_DEC
9684 || GET_CODE (addr
) == POST_INC
9685 || GET_CODE (addr
) == PRE_MODIFY
9686 || GET_CODE (addr
) == POST_MODIFY
)
9689 if (! ix86_decompose_address (addr
, &parts
))
9693 index
= parts
.index
;
9697 /* Register Indirect. */
9698 if (base
&& !index
&& !disp
)
9700 /* Special cases: ebp and esp need the two-byte modrm form. */
9701 if (addr
== stack_pointer_rtx
9702 || addr
== arg_pointer_rtx
9703 || addr
== frame_pointer_rtx
9704 || addr
== hard_frame_pointer_rtx
)
9708 /* Direct Addressing. */
9709 else if (disp
&& !base
&& !index
)
9714 /* Find the length of the displacement constant. */
9717 if (GET_CODE (disp
) == CONST_INT
9718 && CONST_OK_FOR_LETTER_P (INTVAL (disp
), 'K'))
9724 /* An index requires the two-byte modrm form. */
9732 /* Compute default value for "length_immediate" attribute. When SHORTFORM is set
9733 expect that insn have 8bit immediate alternative. */
9735 ix86_attr_length_immediate_default (insn
, shortform
)
9741 extract_insn_cached (insn
);
9742 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
9743 if (CONSTANT_P (recog_data
.operand
[i
]))
9748 && GET_CODE (recog_data
.operand
[i
]) == CONST_INT
9749 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data
.operand
[i
]), 'K'))
9753 switch (get_attr_mode (insn
))
9764 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
9769 fatal_insn ("Unknown insn mode", insn
);
9775 /* Compute default value for "length_address" attribute. */
9777 ix86_attr_length_address_default (insn
)
9781 extract_insn_cached (insn
);
9782 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
9783 if (GET_CODE (recog_data
.operand
[i
]) == MEM
)
9785 return memory_address_length (XEXP (recog_data
.operand
[i
], 0));
9791 /* Return the maximum number of instructions a cpu can issue. */
9798 case PROCESSOR_PENTIUM
:
9802 case PROCESSOR_PENTIUMPRO
:
9803 case PROCESSOR_PENTIUM4
:
9804 case PROCESSOR_ATHLON
:
9812 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
9813 by DEP_INSN and nothing set by DEP_INSN. */
9816 ix86_flags_dependant (insn
, dep_insn
, insn_type
)
9818 enum attr_type insn_type
;
9822 /* Simplify the test for uninteresting insns. */
9823 if (insn_type
!= TYPE_SETCC
9824 && insn_type
!= TYPE_ICMOV
9825 && insn_type
!= TYPE_FCMOV
9826 && insn_type
!= TYPE_IBR
)
9829 if ((set
= single_set (dep_insn
)) != 0)
9831 set
= SET_DEST (set
);
9834 else if (GET_CODE (PATTERN (dep_insn
)) == PARALLEL
9835 && XVECLEN (PATTERN (dep_insn
), 0) == 2
9836 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 0)) == SET
9837 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 1)) == SET
)
9839 set
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
9840 set2
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
9845 if (GET_CODE (set
) != REG
|| REGNO (set
) != FLAGS_REG
)
9848 /* This test is true if the dependant insn reads the flags but
9849 not any other potentially set register. */
9850 if (!reg_overlap_mentioned_p (set
, PATTERN (insn
)))
9853 if (set2
&& reg_overlap_mentioned_p (set2
, PATTERN (insn
)))
9859 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
9860 address with operands set by DEP_INSN. */
9863 ix86_agi_dependant (insn
, dep_insn
, insn_type
)
9865 enum attr_type insn_type
;
9869 if (insn_type
== TYPE_LEA
9872 addr
= PATTERN (insn
);
9873 if (GET_CODE (addr
) == SET
)
9875 else if (GET_CODE (addr
) == PARALLEL
9876 && GET_CODE (XVECEXP (addr
, 0, 0)) == SET
)
9877 addr
= XVECEXP (addr
, 0, 0);
9880 addr
= SET_SRC (addr
);
9885 extract_insn_cached (insn
);
9886 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
9887 if (GET_CODE (recog_data
.operand
[i
]) == MEM
)
9889 addr
= XEXP (recog_data
.operand
[i
], 0);
9896 return modified_in_p (addr
, dep_insn
);
9900 ix86_adjust_cost (insn
, link
, dep_insn
, cost
)
9901 rtx insn
, link
, dep_insn
;
9904 enum attr_type insn_type
, dep_insn_type
;
9905 enum attr_memory memory
, dep_memory
;
9907 int dep_insn_code_number
;
9909 /* Anti and output depenancies have zero cost on all CPUs. */
9910 if (REG_NOTE_KIND (link
) != 0)
9913 dep_insn_code_number
= recog_memoized (dep_insn
);
9915 /* If we can't recognize the insns, we can't really do anything. */
9916 if (dep_insn_code_number
< 0 || recog_memoized (insn
) < 0)
9919 insn_type
= get_attr_type (insn
);
9920 dep_insn_type
= get_attr_type (dep_insn
);
9924 case PROCESSOR_PENTIUM
:
9925 /* Address Generation Interlock adds a cycle of latency. */
9926 if (ix86_agi_dependant (insn
, dep_insn
, insn_type
))
9929 /* ??? Compares pair with jump/setcc. */
9930 if (ix86_flags_dependant (insn
, dep_insn
, insn_type
))
9933 /* Floating point stores require value to be ready one cycle ealier. */
9934 if (insn_type
== TYPE_FMOV
9935 && get_attr_memory (insn
) == MEMORY_STORE
9936 && !ix86_agi_dependant (insn
, dep_insn
, insn_type
))
9940 case PROCESSOR_PENTIUMPRO
:
9941 memory
= get_attr_memory (insn
);
9942 dep_memory
= get_attr_memory (dep_insn
);
9944 /* Since we can't represent delayed latencies of load+operation,
9945 increase the cost here for non-imov insns. */
9946 if (dep_insn_type
!= TYPE_IMOV
9947 && dep_insn_type
!= TYPE_FMOV
9948 && (dep_memory
== MEMORY_LOAD
|| dep_memory
== MEMORY_BOTH
))
9951 /* INT->FP conversion is expensive. */
9952 if (get_attr_fp_int_src (dep_insn
))
9955 /* There is one cycle extra latency between an FP op and a store. */
9956 if (insn_type
== TYPE_FMOV
9957 && (set
= single_set (dep_insn
)) != NULL_RTX
9958 && (set2
= single_set (insn
)) != NULL_RTX
9959 && rtx_equal_p (SET_DEST (set
), SET_SRC (set2
))
9960 && GET_CODE (SET_DEST (set2
)) == MEM
)
9963 /* Show ability of reorder buffer to hide latency of load by executing
9964 in parallel with previous instruction in case
9965 previous instruction is not needed to compute the address. */
9966 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
9967 && !ix86_agi_dependant (insn
, dep_insn
, insn_type
))
9969 /* Claim moves to take one cycle, as core can issue one load
9970 at time and the next load can start cycle later. */
9971 if (dep_insn_type
== TYPE_IMOV
9972 || dep_insn_type
== TYPE_FMOV
)
9980 memory
= get_attr_memory (insn
);
9981 dep_memory
= get_attr_memory (dep_insn
);
9982 /* The esp dependency is resolved before the instruction is really
9984 if ((insn_type
== TYPE_PUSH
|| insn_type
== TYPE_POP
)
9985 && (dep_insn_type
== TYPE_PUSH
|| dep_insn_type
== TYPE_POP
))
9988 /* Since we can't represent delayed latencies of load+operation,
9989 increase the cost here for non-imov insns. */
9990 if (dep_memory
== MEMORY_LOAD
|| dep_memory
== MEMORY_BOTH
)
9991 cost
+= (dep_insn_type
!= TYPE_IMOV
) ? 2 : 1;
9993 /* INT->FP conversion is expensive. */
9994 if (get_attr_fp_int_src (dep_insn
))
9997 /* Show ability of reorder buffer to hide latency of load by executing
9998 in parallel with previous instruction in case
9999 previous instruction is not needed to compute the address. */
10000 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
10001 && !ix86_agi_dependant (insn
, dep_insn
, insn_type
))
10003 /* Claim moves to take one cycle, as core can issue one load
10004 at time and the next load can start cycle later. */
10005 if (dep_insn_type
== TYPE_IMOV
10006 || dep_insn_type
== TYPE_FMOV
)
10015 case PROCESSOR_ATHLON
:
10016 memory
= get_attr_memory (insn
);
10017 dep_memory
= get_attr_memory (dep_insn
);
10019 if (dep_memory
== MEMORY_LOAD
|| dep_memory
== MEMORY_BOTH
)
10021 if (dep_insn_type
== TYPE_IMOV
|| dep_insn_type
== TYPE_FMOV
)
10026 /* Show ability of reorder buffer to hide latency of load by executing
10027 in parallel with previous instruction in case
10028 previous instruction is not needed to compute the address. */
10029 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
10030 && !ix86_agi_dependant (insn
, dep_insn
, insn_type
))
10032 /* Claim moves to take one cycle, as core can issue one load
10033 at time and the next load can start cycle later. */
10034 if (dep_insn_type
== TYPE_IMOV
10035 || dep_insn_type
== TYPE_FMOV
)
10037 else if (cost
>= 3)
10052 struct ppro_sched_data
10055 int issued_this_cycle
;
10060 ix86_safe_length (insn
)
10063 if (recog_memoized (insn
) >= 0)
10064 return get_attr_length(insn
);
10070 ix86_safe_length_prefix (insn
)
10073 if (recog_memoized (insn
) >= 0)
10074 return get_attr_length(insn
);
10079 static enum attr_memory
10080 ix86_safe_memory (insn
)
10083 if (recog_memoized (insn
) >= 0)
10084 return get_attr_memory(insn
);
10086 return MEMORY_UNKNOWN
;
10089 static enum attr_pent_pair
10090 ix86_safe_pent_pair (insn
)
10093 if (recog_memoized (insn
) >= 0)
10094 return get_attr_pent_pair(insn
);
10096 return PENT_PAIR_NP
;
10099 static enum attr_ppro_uops
10100 ix86_safe_ppro_uops (insn
)
10103 if (recog_memoized (insn
) >= 0)
10104 return get_attr_ppro_uops (insn
);
10106 return PPRO_UOPS_MANY
;
10110 ix86_dump_ppro_packet (dump
)
10113 if (ix86_sched_data
.ppro
.decode
[0])
10115 fprintf (dump
, "PPRO packet: %d",
10116 INSN_UID (ix86_sched_data
.ppro
.decode
[0]));
10117 if (ix86_sched_data
.ppro
.decode
[1])
10118 fprintf (dump
, " %d", INSN_UID (ix86_sched_data
.ppro
.decode
[1]));
10119 if (ix86_sched_data
.ppro
.decode
[2])
10120 fprintf (dump
, " %d", INSN_UID (ix86_sched_data
.ppro
.decode
[2]));
10121 fputc ('\n', dump
);
10125 /* We're beginning a new block. Initialize data structures as necessary. */
10128 ix86_sched_init (dump
, sched_verbose
, veclen
)
10129 FILE *dump ATTRIBUTE_UNUSED
;
10130 int sched_verbose ATTRIBUTE_UNUSED
;
10131 int veclen ATTRIBUTE_UNUSED
;
10133 memset (&ix86_sched_data
, 0, sizeof (ix86_sched_data
));
10136 /* Shift INSN to SLOT, and shift everything else down. */
10139 ix86_reorder_insn (insnp
, slot
)
10146 insnp
[0] = insnp
[1];
10147 while (++insnp
!= slot
);
10152 /* Find an instruction with given pairability and minimal amount of cycles
10153 lost by the fact that the CPU waits for both pipelines to finish before
10154 reading next instructions. Also take care that both instructions together
10155 can not exceed 7 bytes. */
10158 ix86_pent_find_pair (e_ready
, ready
, type
, first
)
10161 enum attr_pent_pair type
;
10164 int mincycles
, cycles
;
10165 enum attr_pent_pair tmp
;
10166 enum attr_memory memory
;
10167 rtx
*insnp
, *bestinsnp
= NULL
;
10169 if (ix86_safe_length (first
) > 7 + ix86_safe_length_prefix (first
))
10172 memory
= ix86_safe_memory (first
);
10173 cycles
= result_ready_cost (first
);
10174 mincycles
= INT_MAX
;
10176 for (insnp
= e_ready
; insnp
>= ready
&& mincycles
; --insnp
)
10177 if ((tmp
= ix86_safe_pent_pair (*insnp
)) == type
10178 && ix86_safe_length (*insnp
) <= 7 + ix86_safe_length_prefix (*insnp
))
10180 enum attr_memory second_memory
;
10181 int secondcycles
, currentcycles
;
10183 second_memory
= ix86_safe_memory (*insnp
);
10184 secondcycles
= result_ready_cost (*insnp
);
10185 currentcycles
= abs (cycles
- secondcycles
);
10187 if (secondcycles
>= 1 && cycles
>= 1)
10189 /* Two read/modify/write instructions together takes two
10191 if (memory
== MEMORY_BOTH
&& second_memory
== MEMORY_BOTH
)
10192 currentcycles
+= 2;
10194 /* Read modify/write instruction followed by read/modify
10195 takes one cycle longer. */
10196 if (memory
== MEMORY_BOTH
&& second_memory
== MEMORY_LOAD
10197 && tmp
!= PENT_PAIR_UV
10198 && ix86_safe_pent_pair (first
) != PENT_PAIR_UV
)
10199 currentcycles
+= 1;
10201 if (currentcycles
< mincycles
)
10202 bestinsnp
= insnp
, mincycles
= currentcycles
;
10208 /* Subroutines of ix86_sched_reorder. */
10211 ix86_sched_reorder_pentium (ready
, e_ready
)
10215 enum attr_pent_pair pair1
, pair2
;
10218 /* This wouldn't be necessary if Haifa knew that static insn ordering
10219 is important to which pipe an insn is issued to. So we have to make
10220 some minor rearrangements. */
10222 pair1
= ix86_safe_pent_pair (*e_ready
);
10224 /* If the first insn is non-pairable, let it be. */
10225 if (pair1
== PENT_PAIR_NP
)
10228 pair2
= PENT_PAIR_NP
;
10231 /* If the first insn is UV or PV pairable, search for a PU
10232 insn to go with. */
10233 if (pair1
== PENT_PAIR_UV
|| pair1
== PENT_PAIR_PV
)
10235 insnp
= ix86_pent_find_pair (e_ready
-1, ready
,
10236 PENT_PAIR_PU
, *e_ready
);
10238 pair2
= PENT_PAIR_PU
;
10241 /* If the first insn is PU or UV pairable, search for a PV
10242 insn to go with. */
10243 if (pair2
== PENT_PAIR_NP
10244 && (pair1
== PENT_PAIR_PU
|| pair1
== PENT_PAIR_UV
))
10246 insnp
= ix86_pent_find_pair (e_ready
-1, ready
,
10247 PENT_PAIR_PV
, *e_ready
);
10249 pair2
= PENT_PAIR_PV
;
10252 /* If the first insn is pairable, search for a UV
10253 insn to go with. */
10254 if (pair2
== PENT_PAIR_NP
)
10256 insnp
= ix86_pent_find_pair (e_ready
-1, ready
,
10257 PENT_PAIR_UV
, *e_ready
);
10259 pair2
= PENT_PAIR_UV
;
10262 if (pair2
== PENT_PAIR_NP
)
10265 /* Found something! Decide if we need to swap the order. */
10266 if (pair1
== PENT_PAIR_PV
|| pair2
== PENT_PAIR_PU
10267 || (pair1
== PENT_PAIR_UV
&& pair2
== PENT_PAIR_UV
10268 && ix86_safe_memory (*e_ready
) == MEMORY_BOTH
10269 && ix86_safe_memory (*insnp
) == MEMORY_LOAD
))
10270 ix86_reorder_insn (insnp
, e_ready
);
10272 ix86_reorder_insn (insnp
, e_ready
- 1);
10276 ix86_sched_reorder_ppro (ready
, e_ready
)
10281 enum attr_ppro_uops cur_uops
;
10282 int issued_this_cycle
;
10286 /* At this point .ppro.decode contains the state of the three
10287 decoders from last "cycle". That is, those insns that were
10288 actually independent. But here we're scheduling for the
10289 decoder, and we may find things that are decodable in the
10292 memcpy (decode
, ix86_sched_data
.ppro
.decode
, sizeof (decode
));
10293 issued_this_cycle
= 0;
10296 cur_uops
= ix86_safe_ppro_uops (*insnp
);
10298 /* If the decoders are empty, and we've a complex insn at the
10299 head of the priority queue, let it issue without complaint. */
10300 if (decode
[0] == NULL
)
10302 if (cur_uops
== PPRO_UOPS_MANY
)
10304 decode
[0] = *insnp
;
10308 /* Otherwise, search for a 2-4 uop unsn to issue. */
10309 while (cur_uops
!= PPRO_UOPS_FEW
)
10311 if (insnp
== ready
)
10313 cur_uops
= ix86_safe_ppro_uops (*--insnp
);
10316 /* If so, move it to the head of the line. */
10317 if (cur_uops
== PPRO_UOPS_FEW
)
10318 ix86_reorder_insn (insnp
, e_ready
);
10320 /* Issue the head of the queue. */
10321 issued_this_cycle
= 1;
10322 decode
[0] = *e_ready
--;
10325 /* Look for simple insns to fill in the other two slots. */
10326 for (i
= 1; i
< 3; ++i
)
10327 if (decode
[i
] == NULL
)
10329 if (ready
>= e_ready
)
10333 cur_uops
= ix86_safe_ppro_uops (*insnp
);
10334 while (cur_uops
!= PPRO_UOPS_ONE
)
10336 if (insnp
== ready
)
10338 cur_uops
= ix86_safe_ppro_uops (*--insnp
);
10341 /* Found one. Move it to the head of the queue and issue it. */
10342 if (cur_uops
== PPRO_UOPS_ONE
)
10344 ix86_reorder_insn (insnp
, e_ready
);
10345 decode
[i
] = *e_ready
--;
10346 issued_this_cycle
++;
10350 /* ??? Didn't find one. Ideally, here we would do a lazy split
10351 of 2-uop insns, issue one and queue the other. */
10355 if (issued_this_cycle
== 0)
10356 issued_this_cycle
= 1;
10357 ix86_sched_data
.ppro
.issued_this_cycle
= issued_this_cycle
;
10360 /* We are about to being issuing insns for this clock cycle.
10361 Override the default sort algorithm to better slot instructions. */
10363 ix86_sched_reorder (dump
, sched_verbose
, ready
, n_readyp
, clock_var
)
10364 FILE *dump ATTRIBUTE_UNUSED
;
10365 int sched_verbose ATTRIBUTE_UNUSED
;
10368 int clock_var ATTRIBUTE_UNUSED
;
10370 int n_ready
= *n_readyp
;
10371 rtx
*e_ready
= ready
+ n_ready
- 1;
10381 case PROCESSOR_PENTIUM
:
10382 ix86_sched_reorder_pentium (ready
, e_ready
);
10385 case PROCESSOR_PENTIUMPRO
:
10386 ix86_sched_reorder_ppro (ready
, e_ready
);
10391 return ix86_issue_rate ();
10394 /* We are about to issue INSN. Return the number of insns left on the
10395 ready queue that can be issued this cycle. */
10398 ix86_variable_issue (dump
, sched_verbose
, insn
, can_issue_more
)
10402 int can_issue_more
;
10408 return can_issue_more
- 1;
10410 case PROCESSOR_PENTIUMPRO
:
10412 enum attr_ppro_uops uops
= ix86_safe_ppro_uops (insn
);
10414 if (uops
== PPRO_UOPS_MANY
)
10417 ix86_dump_ppro_packet (dump
);
10418 ix86_sched_data
.ppro
.decode
[0] = insn
;
10419 ix86_sched_data
.ppro
.decode
[1] = NULL
;
10420 ix86_sched_data
.ppro
.decode
[2] = NULL
;
10422 ix86_dump_ppro_packet (dump
);
10423 ix86_sched_data
.ppro
.decode
[0] = NULL
;
10425 else if (uops
== PPRO_UOPS_FEW
)
10428 ix86_dump_ppro_packet (dump
);
10429 ix86_sched_data
.ppro
.decode
[0] = insn
;
10430 ix86_sched_data
.ppro
.decode
[1] = NULL
;
10431 ix86_sched_data
.ppro
.decode
[2] = NULL
;
10435 for (i
= 0; i
< 3; ++i
)
10436 if (ix86_sched_data
.ppro
.decode
[i
] == NULL
)
10438 ix86_sched_data
.ppro
.decode
[i
] = insn
;
10446 ix86_dump_ppro_packet (dump
);
10447 ix86_sched_data
.ppro
.decode
[0] = NULL
;
10448 ix86_sched_data
.ppro
.decode
[1] = NULL
;
10449 ix86_sched_data
.ppro
.decode
[2] = NULL
;
10453 return --ix86_sched_data
.ppro
.issued_this_cycle
;
10457 /* Walk through INSNS and look for MEM references whose address is DSTREG or
10458 SRCREG and set the memory attribute to those of DSTREF and SRCREF, as
10462 ix86_set_move_mem_attrs (insns
, dstref
, srcref
, dstreg
, srcreg
)
10464 rtx dstref
, srcref
, dstreg
, srcreg
;
10468 for (insn
= insns
; insn
!= 0 ; insn
= NEXT_INSN (insn
))
10470 ix86_set_move_mem_attrs_1 (PATTERN (insn
), dstref
, srcref
,
10474 /* Subroutine of above to actually do the updating by recursively walking
10478 ix86_set_move_mem_attrs_1 (x
, dstref
, srcref
, dstreg
, srcreg
)
10480 rtx dstref
, srcref
, dstreg
, srcreg
;
10482 enum rtx_code code
= GET_CODE (x
);
10483 const char *format_ptr
= GET_RTX_FORMAT (code
);
10486 if (code
== MEM
&& XEXP (x
, 0) == dstreg
)
10487 MEM_COPY_ATTRIBUTES (x
, dstref
);
10488 else if (code
== MEM
&& XEXP (x
, 0) == srcreg
)
10489 MEM_COPY_ATTRIBUTES (x
, srcref
);
10491 for (i
= 0; i
< GET_RTX_LENGTH (code
); i
++, format_ptr
++)
10493 if (*format_ptr
== 'e')
10494 ix86_set_move_mem_attrs_1 (XEXP (x
, i
), dstref
, srcref
,
10496 else if (*format_ptr
== 'E')
10497 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
10498 ix86_set_move_mem_attrs_1 (XVECEXP (x
, i
, j
), dstref
, srcref
,
10503 /* Compute the alignment given to a constant that is being placed in memory.
10504 EXP is the constant and ALIGN is the alignment that the object would
10506 The value of this function is used instead of that alignment to align
10510 ix86_constant_alignment (exp
, align
)
10514 if (TREE_CODE (exp
) == REAL_CST
)
10516 if (TYPE_MODE (TREE_TYPE (exp
)) == DFmode
&& align
< 64)
10518 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp
))) && align
< 128)
10521 else if (TREE_CODE (exp
) == STRING_CST
&& TREE_STRING_LENGTH (exp
) >= 31
10528 /* Compute the alignment for a static variable.
10529 TYPE is the data type, and ALIGN is the alignment that
10530 the object would ordinarily have. The value of this function is used
10531 instead of that alignment to align the object. */
10534 ix86_data_alignment (type
, align
)
10538 if (AGGREGATE_TYPE_P (type
)
10539 && TYPE_SIZE (type
)
10540 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
10541 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 256
10542 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 256)
10545 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
10546 to 16byte boundary. */
10549 if (AGGREGATE_TYPE_P (type
)
10550 && TYPE_SIZE (type
)
10551 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
10552 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 128
10553 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
10557 if (TREE_CODE (type
) == ARRAY_TYPE
)
10559 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
10561 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
10564 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
10567 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
10569 if (TYPE_MODE (type
) == XCmode
&& align
< 128)
10572 else if ((TREE_CODE (type
) == RECORD_TYPE
10573 || TREE_CODE (type
) == UNION_TYPE
10574 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
10575 && TYPE_FIELDS (type
))
10577 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
10579 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
10582 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
10583 || TREE_CODE (type
) == INTEGER_TYPE
)
10585 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
10587 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
10594 /* Compute the alignment for a local variable.
10595 TYPE is the data type, and ALIGN is the alignment that
10596 the object would ordinarily have. The value of this macro is used
10597 instead of that alignment to align the object. */
10600 ix86_local_alignment (type
, align
)
10604 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
10605 to 16byte boundary. */
10608 if (AGGREGATE_TYPE_P (type
)
10609 && TYPE_SIZE (type
)
10610 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
10611 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 16
10612 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
10615 if (TREE_CODE (type
) == ARRAY_TYPE
)
10617 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
10619 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
10622 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
10624 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
10626 if (TYPE_MODE (type
) == XCmode
&& align
< 128)
10629 else if ((TREE_CODE (type
) == RECORD_TYPE
10630 || TREE_CODE (type
) == UNION_TYPE
10631 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
10632 && TYPE_FIELDS (type
))
10634 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
10636 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
10639 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
10640 || TREE_CODE (type
) == INTEGER_TYPE
)
10643 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
10645 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
10651 /* Emit RTL insns to initialize the variable parts of a trampoline.
10652 FNADDR is an RTX for the address of the function's pure code.
10653 CXT is an RTX for the static chain value for the function. */
10655 x86_initialize_trampoline (tramp
, fnaddr
, cxt
)
10656 rtx tramp
, fnaddr
, cxt
;
10660 /* Compute offset from the end of the jmp to the target function. */
10661 rtx disp
= expand_binop (SImode
, sub_optab
, fnaddr
,
10662 plus_constant (tramp
, 10),
10663 NULL_RTX
, 1, OPTAB_DIRECT
);
10664 emit_move_insn (gen_rtx_MEM (QImode
, tramp
),
10665 GEN_INT (trunc_int_for_mode (0xb9, QImode
)));
10666 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, 1)), cxt
);
10667 emit_move_insn (gen_rtx_MEM (QImode
, plus_constant (tramp
, 5)),
10668 GEN_INT (trunc_int_for_mode (0xe9, QImode
)));
10669 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, 6)), disp
);
10674 /* Try to load address using shorter movl instead of movabs.
10675 We may want to support movq for kernel mode, but kernel does not use
10676 trampolines at the moment. */
10677 if (x86_64_zero_extended_value (fnaddr
))
10679 fnaddr
= copy_to_mode_reg (DImode
, fnaddr
);
10680 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
10681 GEN_INT (trunc_int_for_mode (0xbb41, HImode
)));
10682 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, offset
+ 2)),
10683 gen_lowpart (SImode
, fnaddr
));
10688 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
10689 GEN_INT (trunc_int_for_mode (0xbb49, HImode
)));
10690 emit_move_insn (gen_rtx_MEM (DImode
, plus_constant (tramp
, offset
+ 2)),
10694 /* Load static chain using movabs to r10. */
10695 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
10696 GEN_INT (trunc_int_for_mode (0xba49, HImode
)));
10697 emit_move_insn (gen_rtx_MEM (DImode
, plus_constant (tramp
, offset
+ 2)),
10700 /* Jump to the r11 */
10701 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
10702 GEN_INT (trunc_int_for_mode (0xff49, HImode
)));
10703 emit_move_insn (gen_rtx_MEM (QImode
, plus_constant (tramp
, offset
+2)),
10704 GEN_INT (trunc_int_for_mode (0xe3, HImode
)));
10706 if (offset
> TRAMPOLINE_SIZE
)
10711 #define def_builtin(MASK, NAME, TYPE, CODE) \
10713 if ((MASK) & target_flags) \
10714 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, NULL); \
10717 struct builtin_description
10719 const unsigned int mask
;
10720 const enum insn_code icode
;
10721 const char *const name
;
10722 const enum ix86_builtins code
;
10723 const enum rtx_code comparison
;
10724 const unsigned int flag
;
10727 static const struct builtin_description bdesc_comi
[] =
10729 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS
, EQ
, 0 },
10730 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS
, LT
, 0 },
10731 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS
, LE
, 0 },
10732 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS
, LT
, 1 },
10733 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS
, LE
, 1 },
10734 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS
, NE
, 0 },
10735 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS
, EQ
, 0 },
10736 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS
, LT
, 0 },
10737 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS
, LE
, 0 },
10738 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS
, LT
, 1 },
10739 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS
, LE
, 1 },
10740 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS
, NE
, 0 }
10743 static const struct builtin_description bdesc_2arg
[] =
10746 { MASK_SSE
, CODE_FOR_addv4sf3
, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS
, 0, 0 },
10747 { MASK_SSE
, CODE_FOR_subv4sf3
, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS
, 0, 0 },
10748 { MASK_SSE
, CODE_FOR_mulv4sf3
, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS
, 0, 0 },
10749 { MASK_SSE
, CODE_FOR_divv4sf3
, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS
, 0, 0 },
10750 { MASK_SSE
, CODE_FOR_vmaddv4sf3
, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS
, 0, 0 },
10751 { MASK_SSE
, CODE_FOR_vmsubv4sf3
, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS
, 0, 0 },
10752 { MASK_SSE
, CODE_FOR_vmmulv4sf3
, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS
, 0, 0 },
10753 { MASK_SSE
, CODE_FOR_vmdivv4sf3
, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS
, 0, 0 },
10755 { MASK_SSE
, CODE_FOR_maskcmpv4sf3
, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS
, EQ
, 0 },
10756 { MASK_SSE
, CODE_FOR_maskcmpv4sf3
, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS
, LT
, 0 },
10757 { MASK_SSE
, CODE_FOR_maskcmpv4sf3
, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS
, LE
, 0 },
10758 { MASK_SSE
, CODE_FOR_maskcmpv4sf3
, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS
, LT
, 1 },
10759 { MASK_SSE
, CODE_FOR_maskcmpv4sf3
, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS
, LE
, 1 },
10760 { MASK_SSE
, CODE_FOR_maskcmpv4sf3
, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS
, UNORDERED
, 0 },
10761 { MASK_SSE
, CODE_FOR_maskncmpv4sf3
, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS
, EQ
, 0 },
10762 { MASK_SSE
, CODE_FOR_maskncmpv4sf3
, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS
, LT
, 0 },
10763 { MASK_SSE
, CODE_FOR_maskncmpv4sf3
, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS
, LE
, 0 },
10764 { MASK_SSE
, CODE_FOR_maskncmpv4sf3
, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS
, LT
, 1 },
10765 { MASK_SSE
, CODE_FOR_maskncmpv4sf3
, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS
, LE
, 1 },
10766 { MASK_SSE
, CODE_FOR_maskncmpv4sf3
, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS
, UNORDERED
, 0 },
10767 { MASK_SSE
, CODE_FOR_vmmaskcmpv4sf3
, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS
, EQ
, 0 },
10768 { MASK_SSE
, CODE_FOR_vmmaskcmpv4sf3
, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS
, LT
, 0 },
10769 { MASK_SSE
, CODE_FOR_vmmaskcmpv4sf3
, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS
, LE
, 0 },
10770 { MASK_SSE
, CODE_FOR_vmmaskcmpv4sf3
, "__builtin_ia32_cmpgtss", IX86_BUILTIN_CMPGTSS
, LT
, 1 },
10771 { MASK_SSE
, CODE_FOR_vmmaskcmpv4sf3
, "__builtin_ia32_cmpgess", IX86_BUILTIN_CMPGESS
, LE
, 1 },
10772 { MASK_SSE
, CODE_FOR_vmmaskcmpv4sf3
, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS
, UNORDERED
, 0 },
10773 { MASK_SSE
, CODE_FOR_vmmaskncmpv4sf3
, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS
, EQ
, 0 },
10774 { MASK_SSE
, CODE_FOR_vmmaskncmpv4sf3
, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS
, LT
, 0 },
10775 { MASK_SSE
, CODE_FOR_vmmaskncmpv4sf3
, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS
, LE
, 0 },
10776 { MASK_SSE
, CODE_FOR_vmmaskncmpv4sf3
, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS
, LT
, 1 },
10777 { MASK_SSE
, CODE_FOR_vmmaskncmpv4sf3
, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS
, LE
, 1 },
10778 { MASK_SSE
, CODE_FOR_vmmaskncmpv4sf3
, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS
, UNORDERED
, 0 },
10780 { MASK_SSE
, CODE_FOR_sminv4sf3
, "__builtin_ia32_minps", IX86_BUILTIN_MINPS
, 0, 0 },
10781 { MASK_SSE
, CODE_FOR_smaxv4sf3
, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS
, 0, 0 },
10782 { MASK_SSE
, CODE_FOR_vmsminv4sf3
, "__builtin_ia32_minss", IX86_BUILTIN_MINSS
, 0, 0 },
10783 { MASK_SSE
, CODE_FOR_vmsmaxv4sf3
, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS
, 0, 0 },
10785 { MASK_SSE
, CODE_FOR_sse_andti3
, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS
, 0, 0 },
10786 { MASK_SSE
, CODE_FOR_sse_nandti3
, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS
, 0, 0 },
10787 { MASK_SSE
, CODE_FOR_sse_iorti3
, "__builtin_ia32_orps", IX86_BUILTIN_ORPS
, 0, 0 },
10788 { MASK_SSE
, CODE_FOR_sse_xorti3
, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS
, 0, 0 },
10790 { MASK_SSE
, CODE_FOR_sse_movss
, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS
, 0, 0 },
10791 { MASK_SSE
, CODE_FOR_sse_movhlps
, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS
, 0, 0 },
10792 { MASK_SSE
, CODE_FOR_sse_movlhps
, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS
, 0, 0 },
10793 { MASK_SSE
, CODE_FOR_sse_unpckhps
, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS
, 0, 0 },
10794 { MASK_SSE
, CODE_FOR_sse_unpcklps
, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS
, 0, 0 },
10797 { MASK_MMX
, CODE_FOR_addv8qi3
, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB
, 0, 0 },
10798 { MASK_MMX
, CODE_FOR_addv4hi3
, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW
, 0, 0 },
10799 { MASK_MMX
, CODE_FOR_addv2si3
, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD
, 0, 0 },
10800 { MASK_MMX
, CODE_FOR_subv8qi3
, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB
, 0, 0 },
10801 { MASK_MMX
, CODE_FOR_subv4hi3
, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW
, 0, 0 },
10802 { MASK_MMX
, CODE_FOR_subv2si3
, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD
, 0, 0 },
10804 { MASK_MMX
, CODE_FOR_ssaddv8qi3
, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB
, 0, 0 },
10805 { MASK_MMX
, CODE_FOR_ssaddv4hi3
, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW
, 0, 0 },
10806 { MASK_MMX
, CODE_FOR_sssubv8qi3
, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB
, 0, 0 },
10807 { MASK_MMX
, CODE_FOR_sssubv4hi3
, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW
, 0, 0 },
10808 { MASK_MMX
, CODE_FOR_usaddv8qi3
, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB
, 0, 0 },
10809 { MASK_MMX
, CODE_FOR_usaddv4hi3
, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW
, 0, 0 },
10810 { MASK_MMX
, CODE_FOR_ussubv8qi3
, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB
, 0, 0 },
10811 { MASK_MMX
, CODE_FOR_ussubv4hi3
, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW
, 0, 0 },
10813 { MASK_MMX
, CODE_FOR_mulv4hi3
, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW
, 0, 0 },
10814 { MASK_MMX
, CODE_FOR_smulv4hi3_highpart
, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW
, 0, 0 },
10815 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_umulv4hi3_highpart
, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW
, 0, 0 },
10817 { MASK_MMX
, CODE_FOR_mmx_anddi3
, "__builtin_ia32_pand", IX86_BUILTIN_PAND
, 0, 0 },
10818 { MASK_MMX
, CODE_FOR_mmx_nanddi3
, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN
, 0, 0 },
10819 { MASK_MMX
, CODE_FOR_mmx_iordi3
, "__builtin_ia32_por", IX86_BUILTIN_POR
, 0, 0 },
10820 { MASK_MMX
, CODE_FOR_mmx_xordi3
, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR
, 0, 0 },
10822 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_uavgv8qi3
, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB
, 0, 0 },
10823 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_uavgv4hi3
, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW
, 0, 0 },
10825 { MASK_MMX
, CODE_FOR_eqv8qi3
, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB
, 0, 0 },
10826 { MASK_MMX
, CODE_FOR_eqv4hi3
, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW
, 0, 0 },
10827 { MASK_MMX
, CODE_FOR_eqv2si3
, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD
, 0, 0 },
10828 { MASK_MMX
, CODE_FOR_gtv8qi3
, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB
, 0, 0 },
10829 { MASK_MMX
, CODE_FOR_gtv4hi3
, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW
, 0, 0 },
10830 { MASK_MMX
, CODE_FOR_gtv2si3
, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD
, 0, 0 },
10832 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_umaxv8qi3
, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB
, 0, 0 },
10833 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_smaxv4hi3
, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW
, 0, 0 },
10834 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_uminv8qi3
, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB
, 0, 0 },
10835 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_sminv4hi3
, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW
, 0, 0 },
10837 { MASK_MMX
, CODE_FOR_mmx_punpckhbw
, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW
, 0, 0 },
10838 { MASK_MMX
, CODE_FOR_mmx_punpckhwd
, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD
, 0, 0 },
10839 { MASK_MMX
, CODE_FOR_mmx_punpckhdq
, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ
, 0, 0 },
10840 { MASK_MMX
, CODE_FOR_mmx_punpcklbw
, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW
, 0, 0 },
10841 { MASK_MMX
, CODE_FOR_mmx_punpcklwd
, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD
, 0, 0 },
10842 { MASK_MMX
, CODE_FOR_mmx_punpckldq
, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ
, 0, 0 },
10845 { MASK_MMX
, CODE_FOR_mmx_packsswb
, 0, IX86_BUILTIN_PACKSSWB
, 0, 0 },
10846 { MASK_MMX
, CODE_FOR_mmx_packssdw
, 0, IX86_BUILTIN_PACKSSDW
, 0, 0 },
10847 { MASK_MMX
, CODE_FOR_mmx_packuswb
, 0, IX86_BUILTIN_PACKUSWB
, 0, 0 },
10849 { MASK_SSE
, CODE_FOR_cvtpi2ps
, 0, IX86_BUILTIN_CVTPI2PS
, 0, 0 },
10850 { MASK_SSE
, CODE_FOR_cvtsi2ss
, 0, IX86_BUILTIN_CVTSI2SS
, 0, 0 },
10852 { MASK_MMX
, CODE_FOR_ashlv4hi3
, 0, IX86_BUILTIN_PSLLW
, 0, 0 },
10853 { MASK_MMX
, CODE_FOR_ashlv4hi3
, 0, IX86_BUILTIN_PSLLWI
, 0, 0 },
10854 { MASK_MMX
, CODE_FOR_ashlv2si3
, 0, IX86_BUILTIN_PSLLD
, 0, 0 },
10855 { MASK_MMX
, CODE_FOR_ashlv2si3
, 0, IX86_BUILTIN_PSLLDI
, 0, 0 },
10856 { MASK_MMX
, CODE_FOR_mmx_ashldi3
, 0, IX86_BUILTIN_PSLLQ
, 0, 0 },
10857 { MASK_MMX
, CODE_FOR_mmx_ashldi3
, 0, IX86_BUILTIN_PSLLQI
, 0, 0 },
10859 { MASK_MMX
, CODE_FOR_lshrv4hi3
, 0, IX86_BUILTIN_PSRLW
, 0, 0 },
10860 { MASK_MMX
, CODE_FOR_lshrv4hi3
, 0, IX86_BUILTIN_PSRLWI
, 0, 0 },
10861 { MASK_MMX
, CODE_FOR_lshrv2si3
, 0, IX86_BUILTIN_PSRLD
, 0, 0 },
10862 { MASK_MMX
, CODE_FOR_lshrv2si3
, 0, IX86_BUILTIN_PSRLDI
, 0, 0 },
10863 { MASK_MMX
, CODE_FOR_mmx_lshrdi3
, 0, IX86_BUILTIN_PSRLQ
, 0, 0 },
10864 { MASK_MMX
, CODE_FOR_mmx_lshrdi3
, 0, IX86_BUILTIN_PSRLQI
, 0, 0 },
10866 { MASK_MMX
, CODE_FOR_ashrv4hi3
, 0, IX86_BUILTIN_PSRAW
, 0, 0 },
10867 { MASK_MMX
, CODE_FOR_ashrv4hi3
, 0, IX86_BUILTIN_PSRAWI
, 0, 0 },
10868 { MASK_MMX
, CODE_FOR_ashrv2si3
, 0, IX86_BUILTIN_PSRAD
, 0, 0 },
10869 { MASK_MMX
, CODE_FOR_ashrv2si3
, 0, IX86_BUILTIN_PSRADI
, 0, 0 },
10871 { MASK_SSE
, CODE_FOR_mmx_psadbw
, 0, IX86_BUILTIN_PSADBW
, 0, 0 },
10872 { MASK_MMX
, CODE_FOR_mmx_pmaddwd
, 0, IX86_BUILTIN_PMADDWD
, 0, 0 }
10876 static const struct builtin_description bdesc_1arg
[] =
10878 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_pmovmskb
, 0, IX86_BUILTIN_PMOVMSKB
, 0, 0 },
10879 { MASK_SSE
, CODE_FOR_sse_movmskps
, 0, IX86_BUILTIN_MOVMSKPS
, 0, 0 },
10881 { MASK_SSE
, CODE_FOR_sqrtv4sf2
, 0, IX86_BUILTIN_SQRTPS
, 0, 0 },
10882 { MASK_SSE
, CODE_FOR_rsqrtv4sf2
, 0, IX86_BUILTIN_RSQRTPS
, 0, 0 },
10883 { MASK_SSE
, CODE_FOR_rcpv4sf2
, 0, IX86_BUILTIN_RCPPS
, 0, 0 },
10885 { MASK_SSE
, CODE_FOR_cvtps2pi
, 0, IX86_BUILTIN_CVTPS2PI
, 0, 0 },
10886 { MASK_SSE
, CODE_FOR_cvtss2si
, 0, IX86_BUILTIN_CVTSS2SI
, 0, 0 },
10887 { MASK_SSE
, CODE_FOR_cvttps2pi
, 0, IX86_BUILTIN_CVTTPS2PI
, 0, 0 },
10888 { MASK_SSE
, CODE_FOR_cvttss2si
, 0, IX86_BUILTIN_CVTTSS2SI
, 0, 0 }
10893 ix86_init_builtins ()
10896 ix86_init_mmx_sse_builtins ();
10899 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
10900 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
10903 ix86_init_mmx_sse_builtins ()
10905 const struct builtin_description
* d
;
10907 tree endlink
= void_list_node
;
10909 tree pchar_type_node
= build_pointer_type (char_type_node
);
10910 tree pfloat_type_node
= build_pointer_type (float_type_node
);
10911 tree pv2si_type_node
= build_pointer_type (V2SI_type_node
);
10912 tree pdi_type_node
= build_pointer_type (long_long_unsigned_type_node
);
10915 tree int_ftype_v4sf_v4sf
10916 = build_function_type (integer_type_node
,
10917 tree_cons (NULL_TREE
, V4SF_type_node
,
10918 tree_cons (NULL_TREE
,
10921 tree v4si_ftype_v4sf_v4sf
10922 = build_function_type (V4SI_type_node
,
10923 tree_cons (NULL_TREE
, V4SF_type_node
,
10924 tree_cons (NULL_TREE
,
10927 /* MMX/SSE/integer conversions. */
10928 tree int_ftype_v4sf
10929 = build_function_type (integer_type_node
,
10930 tree_cons (NULL_TREE
, V4SF_type_node
,
10932 tree int_ftype_v8qi
10933 = build_function_type (integer_type_node
,
10934 tree_cons (NULL_TREE
, V8QI_type_node
,
10936 tree int_ftype_v2si
10937 = build_function_type (integer_type_node
,
10938 tree_cons (NULL_TREE
, V2SI_type_node
,
10940 tree v2si_ftype_int
10941 = build_function_type (V2SI_type_node
,
10942 tree_cons (NULL_TREE
, integer_type_node
,
10944 tree v4sf_ftype_v4sf_int
10945 = build_function_type (V4SF_type_node
,
10946 tree_cons (NULL_TREE
, V4SF_type_node
,
10947 tree_cons (NULL_TREE
, integer_type_node
,
10949 tree v4sf_ftype_v4sf_v2si
10950 = build_function_type (V4SF_type_node
,
10951 tree_cons (NULL_TREE
, V4SF_type_node
,
10952 tree_cons (NULL_TREE
, V2SI_type_node
,
10954 tree int_ftype_v4hi_int
10955 = build_function_type (integer_type_node
,
10956 tree_cons (NULL_TREE
, V4HI_type_node
,
10957 tree_cons (NULL_TREE
, integer_type_node
,
10959 tree v4hi_ftype_v4hi_int_int
10960 = build_function_type (V4HI_type_node
,
10961 tree_cons (NULL_TREE
, V4HI_type_node
,
10962 tree_cons (NULL_TREE
, integer_type_node
,
10963 tree_cons (NULL_TREE
,
10966 /* Miscellaneous. */
10967 tree v8qi_ftype_v4hi_v4hi
10968 = build_function_type (V8QI_type_node
,
10969 tree_cons (NULL_TREE
, V4HI_type_node
,
10970 tree_cons (NULL_TREE
, V4HI_type_node
,
10972 tree v4hi_ftype_v2si_v2si
10973 = build_function_type (V4HI_type_node
,
10974 tree_cons (NULL_TREE
, V2SI_type_node
,
10975 tree_cons (NULL_TREE
, V2SI_type_node
,
10977 tree v4sf_ftype_v4sf_v4sf_int
10978 = build_function_type (V4SF_type_node
,
10979 tree_cons (NULL_TREE
, V4SF_type_node
,
10980 tree_cons (NULL_TREE
, V4SF_type_node
,
10981 tree_cons (NULL_TREE
,
10984 tree v4hi_ftype_v8qi_v8qi
10985 = build_function_type (V4HI_type_node
,
10986 tree_cons (NULL_TREE
, V8QI_type_node
,
10987 tree_cons (NULL_TREE
, V8QI_type_node
,
10989 tree v2si_ftype_v4hi_v4hi
10990 = build_function_type (V2SI_type_node
,
10991 tree_cons (NULL_TREE
, V4HI_type_node
,
10992 tree_cons (NULL_TREE
, V4HI_type_node
,
10994 tree v4hi_ftype_v4hi_int
10995 = build_function_type (V4HI_type_node
,
10996 tree_cons (NULL_TREE
, V4HI_type_node
,
10997 tree_cons (NULL_TREE
, integer_type_node
,
10999 tree v4hi_ftype_v4hi_di
11000 = build_function_type (V4HI_type_node
,
11001 tree_cons (NULL_TREE
, V4HI_type_node
,
11002 tree_cons (NULL_TREE
,
11003 long_long_integer_type_node
,
11005 tree v2si_ftype_v2si_di
11006 = build_function_type (V2SI_type_node
,
11007 tree_cons (NULL_TREE
, V2SI_type_node
,
11008 tree_cons (NULL_TREE
,
11009 long_long_integer_type_node
,
11011 tree void_ftype_void
11012 = build_function_type (void_type_node
, endlink
);
11013 tree void_ftype_pchar_int
11014 = build_function_type (void_type_node
,
11015 tree_cons (NULL_TREE
, pchar_type_node
,
11016 tree_cons (NULL_TREE
, integer_type_node
,
11018 tree void_ftype_unsigned
11019 = build_function_type (void_type_node
,
11020 tree_cons (NULL_TREE
, unsigned_type_node
,
11022 tree unsigned_ftype_void
11023 = build_function_type (unsigned_type_node
, endlink
);
11025 = build_function_type (long_long_unsigned_type_node
, endlink
);
11027 = build_function_type (intTI_type_node
, endlink
);
11028 tree v2si_ftype_v4sf
11029 = build_function_type (V2SI_type_node
,
11030 tree_cons (NULL_TREE
, V4SF_type_node
,
11032 /* Loads/stores. */
11033 tree maskmovq_args
= tree_cons (NULL_TREE
, V8QI_type_node
,
11034 tree_cons (NULL_TREE
, V8QI_type_node
,
11035 tree_cons (NULL_TREE
,
11038 tree void_ftype_v8qi_v8qi_pchar
11039 = build_function_type (void_type_node
, maskmovq_args
);
11040 tree v4sf_ftype_pfloat
11041 = build_function_type (V4SF_type_node
,
11042 tree_cons (NULL_TREE
, pfloat_type_node
,
11044 tree v4sf_ftype_float
11045 = build_function_type (V4SF_type_node
,
11046 tree_cons (NULL_TREE
, float_type_node
,
11048 tree v4sf_ftype_float_float_float_float
11049 = build_function_type (V4SF_type_node
,
11050 tree_cons (NULL_TREE
, float_type_node
,
11051 tree_cons (NULL_TREE
, float_type_node
,
11052 tree_cons (NULL_TREE
,
11054 tree_cons (NULL_TREE
,
11057 /* @@@ the type is bogus */
11058 tree v4sf_ftype_v4sf_pv2si
11059 = build_function_type (V4SF_type_node
,
11060 tree_cons (NULL_TREE
, V4SF_type_node
,
11061 tree_cons (NULL_TREE
, pv2si_type_node
,
11063 tree void_ftype_pv2si_v4sf
11064 = build_function_type (void_type_node
,
11065 tree_cons (NULL_TREE
, pv2si_type_node
,
11066 tree_cons (NULL_TREE
, V4SF_type_node
,
11068 tree void_ftype_pfloat_v4sf
11069 = build_function_type (void_type_node
,
11070 tree_cons (NULL_TREE
, pfloat_type_node
,
11071 tree_cons (NULL_TREE
, V4SF_type_node
,
11073 tree void_ftype_pdi_di
11074 = build_function_type (void_type_node
,
11075 tree_cons (NULL_TREE
, pdi_type_node
,
11076 tree_cons (NULL_TREE
,
11077 long_long_unsigned_type_node
,
11079 /* Normal vector unops. */
11080 tree v4sf_ftype_v4sf
11081 = build_function_type (V4SF_type_node
,
11082 tree_cons (NULL_TREE
, V4SF_type_node
,
11085 /* Normal vector binops. */
11086 tree v4sf_ftype_v4sf_v4sf
11087 = build_function_type (V4SF_type_node
,
11088 tree_cons (NULL_TREE
, V4SF_type_node
,
11089 tree_cons (NULL_TREE
, V4SF_type_node
,
11091 tree v8qi_ftype_v8qi_v8qi
11092 = build_function_type (V8QI_type_node
,
11093 tree_cons (NULL_TREE
, V8QI_type_node
,
11094 tree_cons (NULL_TREE
, V8QI_type_node
,
11096 tree v4hi_ftype_v4hi_v4hi
11097 = build_function_type (V4HI_type_node
,
11098 tree_cons (NULL_TREE
, V4HI_type_node
,
11099 tree_cons (NULL_TREE
, V4HI_type_node
,
11101 tree v2si_ftype_v2si_v2si
11102 = build_function_type (V2SI_type_node
,
11103 tree_cons (NULL_TREE
, V2SI_type_node
,
11104 tree_cons (NULL_TREE
, V2SI_type_node
,
11106 tree ti_ftype_ti_ti
11107 = build_function_type (intTI_type_node
,
11108 tree_cons (NULL_TREE
, intTI_type_node
,
11109 tree_cons (NULL_TREE
, intTI_type_node
,
11111 tree di_ftype_di_di
11112 = build_function_type (long_long_unsigned_type_node
,
11113 tree_cons (NULL_TREE
, long_long_unsigned_type_node
,
11114 tree_cons (NULL_TREE
,
11115 long_long_unsigned_type_node
,
11118 tree v2si_ftype_v2sf
11119 = build_function_type (V2SI_type_node
,
11120 tree_cons (NULL_TREE
, V2SF_type_node
,
11122 tree v2sf_ftype_v2si
11123 = build_function_type (V2SF_type_node
,
11124 tree_cons (NULL_TREE
, V2SI_type_node
,
11126 tree v2si_ftype_v2si
11127 = build_function_type (V2SI_type_node
,
11128 tree_cons (NULL_TREE
, V2SI_type_node
,
11130 tree v2sf_ftype_v2sf
11131 = build_function_type (V2SF_type_node
,
11132 tree_cons (NULL_TREE
, V2SF_type_node
,
11134 tree v2sf_ftype_v2sf_v2sf
11135 = build_function_type (V2SF_type_node
,
11136 tree_cons (NULL_TREE
, V2SF_type_node
,
11137 tree_cons (NULL_TREE
,
11140 tree v2si_ftype_v2sf_v2sf
11141 = build_function_type (V2SI_type_node
,
11142 tree_cons (NULL_TREE
, V2SF_type_node
,
11143 tree_cons (NULL_TREE
,
11147 tree void_ftype_pchar
11148 = build_function_type (void_type_node
,
11149 tree_cons (NULL_TREE
, pchar_type_node
,
11152 /* Add all builtins that are more or less simple operations on two
11154 for (i
= 0, d
= bdesc_2arg
; i
< sizeof (bdesc_2arg
) / sizeof *d
; i
++, d
++)
11156 /* Use one of the operands; the target can have a different mode for
11157 mask-generating compares. */
11158 enum machine_mode mode
;
11163 mode
= insn_data
[d
->icode
].operand
[1].mode
;
11168 type
= v4sf_ftype_v4sf_v4sf
;
11171 type
= v8qi_ftype_v8qi_v8qi
;
11174 type
= v4hi_ftype_v4hi_v4hi
;
11177 type
= v2si_ftype_v2si_v2si
;
11180 type
= ti_ftype_ti_ti
;
11183 type
= di_ftype_di_di
;
11190 /* Override for comparisons. */
11191 if (d
->icode
== CODE_FOR_maskcmpv4sf3
11192 || d
->icode
== CODE_FOR_maskncmpv4sf3
11193 || d
->icode
== CODE_FOR_vmmaskcmpv4sf3
11194 || d
->icode
== CODE_FOR_vmmaskncmpv4sf3
)
11195 type
= v4si_ftype_v4sf_v4sf
;
11197 def_builtin (d
->mask
, d
->name
, type
, d
->code
);
11200 /* Add the remaining MMX insns with somewhat more complicated types. */
11201 def_builtin (MASK_MMX
, "__builtin_ia32_m_from_int", v2si_ftype_int
, IX86_BUILTIN_M_FROM_INT
);
11202 def_builtin (MASK_MMX
, "__builtin_ia32_m_to_int", int_ftype_v2si
, IX86_BUILTIN_M_TO_INT
);
11203 def_builtin (MASK_MMX
, "__builtin_ia32_mmx_zero", di_ftype_void
, IX86_BUILTIN_MMX_ZERO
);
11204 def_builtin (MASK_MMX
, "__builtin_ia32_emms", void_ftype_void
, IX86_BUILTIN_EMMS
);
11205 def_builtin (MASK_MMX
, "__builtin_ia32_ldmxcsr", void_ftype_unsigned
, IX86_BUILTIN_LDMXCSR
);
11206 def_builtin (MASK_MMX
, "__builtin_ia32_stmxcsr", unsigned_ftype_void
, IX86_BUILTIN_STMXCSR
);
11207 def_builtin (MASK_MMX
, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSLLW
);
11208 def_builtin (MASK_MMX
, "__builtin_ia32_pslld", v2si_ftype_v2si_di
, IX86_BUILTIN_PSLLD
);
11209 def_builtin (MASK_MMX
, "__builtin_ia32_psllq", di_ftype_di_di
, IX86_BUILTIN_PSLLQ
);
11211 def_builtin (MASK_MMX
, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSRLW
);
11212 def_builtin (MASK_MMX
, "__builtin_ia32_psrld", v2si_ftype_v2si_di
, IX86_BUILTIN_PSRLD
);
11213 def_builtin (MASK_MMX
, "__builtin_ia32_psrlq", di_ftype_di_di
, IX86_BUILTIN_PSRLQ
);
11215 def_builtin (MASK_MMX
, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSRAW
);
11216 def_builtin (MASK_MMX
, "__builtin_ia32_psrad", v2si_ftype_v2si_di
, IX86_BUILTIN_PSRAD
);
11218 def_builtin (MASK_MMX
, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int
, IX86_BUILTIN_PSHUFW
);
11219 def_builtin (MASK_MMX
, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi
, IX86_BUILTIN_PMADDWD
);
11221 /* comi/ucomi insns. */
11222 for (i
= 0, d
= bdesc_comi
; i
< sizeof (bdesc_comi
) / sizeof *d
; i
++, d
++)
11223 def_builtin (d
->mask
, d
->name
, int_ftype_v4sf_v4sf
, d
->code
);
11225 def_builtin (MASK_MMX
, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi
, IX86_BUILTIN_PACKSSWB
);
11226 def_builtin (MASK_MMX
, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si
, IX86_BUILTIN_PACKSSDW
);
11227 def_builtin (MASK_MMX
, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi
, IX86_BUILTIN_PACKUSWB
);
11229 def_builtin (MASK_SSE
, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si
, IX86_BUILTIN_CVTPI2PS
);
11230 def_builtin (MASK_SSE
, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf
, IX86_BUILTIN_CVTPS2PI
);
11231 def_builtin (MASK_SSE
, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int
, IX86_BUILTIN_CVTSI2SS
);
11232 def_builtin (MASK_SSE
, "__builtin_ia32_cvtss2si", int_ftype_v4sf
, IX86_BUILTIN_CVTSS2SI
);
11233 def_builtin (MASK_SSE
, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf
, IX86_BUILTIN_CVTTPS2PI
);
11234 def_builtin (MASK_SSE
, "__builtin_ia32_cvttss2si", int_ftype_v4sf
, IX86_BUILTIN_CVTTSS2SI
);
11236 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_pextrw", int_ftype_v4hi_int
, IX86_BUILTIN_PEXTRW
);
11237 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int
, IX86_BUILTIN_PINSRW
);
11239 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar
, IX86_BUILTIN_MASKMOVQ
);
11241 def_builtin (MASK_SSE
, "__builtin_ia32_loadaps", v4sf_ftype_pfloat
, IX86_BUILTIN_LOADAPS
);
11242 def_builtin (MASK_SSE
, "__builtin_ia32_loadups", v4sf_ftype_pfloat
, IX86_BUILTIN_LOADUPS
);
11243 def_builtin (MASK_SSE
, "__builtin_ia32_loadss", v4sf_ftype_pfloat
, IX86_BUILTIN_LOADSS
);
11244 def_builtin (MASK_SSE
, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf
, IX86_BUILTIN_STOREAPS
);
11245 def_builtin (MASK_SSE
, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf
, IX86_BUILTIN_STOREUPS
);
11246 def_builtin (MASK_SSE
, "__builtin_ia32_storess", void_ftype_pfloat_v4sf
, IX86_BUILTIN_STORESS
);
11248 def_builtin (MASK_SSE
, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si
, IX86_BUILTIN_LOADHPS
);
11249 def_builtin (MASK_SSE
, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si
, IX86_BUILTIN_LOADLPS
);
11250 def_builtin (MASK_SSE
, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf
, IX86_BUILTIN_STOREHPS
);
11251 def_builtin (MASK_SSE
, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf
, IX86_BUILTIN_STORELPS
);
11253 def_builtin (MASK_SSE
, "__builtin_ia32_movmskps", int_ftype_v4sf
, IX86_BUILTIN_MOVMSKPS
);
11254 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_pmovmskb", int_ftype_v8qi
, IX86_BUILTIN_PMOVMSKB
);
11255 def_builtin (MASK_SSE
, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf
, IX86_BUILTIN_MOVNTPS
);
11256 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_movntq", void_ftype_pdi_di
, IX86_BUILTIN_MOVNTQ
);
11258 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_sfence", void_ftype_void
, IX86_BUILTIN_SFENCE
);
11259 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_prefetch", void_ftype_pchar_int
, IX86_BUILTIN_PREFETCH
);
11261 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_psadbw", v4hi_ftype_v8qi_v8qi
, IX86_BUILTIN_PSADBW
);
11263 def_builtin (MASK_SSE
, "__builtin_ia32_rcpps", v4sf_ftype_v4sf
, IX86_BUILTIN_RCPPS
);
11264 def_builtin (MASK_SSE
, "__builtin_ia32_rcpss", v4sf_ftype_v4sf
, IX86_BUILTIN_RCPSS
);
11265 def_builtin (MASK_SSE
, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf
, IX86_BUILTIN_RSQRTPS
);
11266 def_builtin (MASK_SSE
, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf
, IX86_BUILTIN_RSQRTSS
);
11267 def_builtin (MASK_SSE
, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf
, IX86_BUILTIN_SQRTPS
);
11268 def_builtin (MASK_SSE
, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf
, IX86_BUILTIN_SQRTSS
);
11270 def_builtin (MASK_SSE
, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int
, IX86_BUILTIN_SHUFPS
);
11272 /* Original 3DNow! */
11273 def_builtin (MASK_3DNOW
, "__builtin_ia32_femms", void_ftype_void
, IX86_BUILTIN_FEMMS
);
11274 def_builtin (MASK_3DNOW
, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi
, IX86_BUILTIN_PAVGUSB
);
11275 def_builtin (MASK_3DNOW
, "__builtin_ia32_pf2id", v2si_ftype_v2sf
, IX86_BUILTIN_PF2ID
);
11276 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFACC
);
11277 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFADD
);
11278 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPEQ
);
11279 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPGE
);
11280 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPGT
);
11281 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMAX
);
11282 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMIN
);
11283 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMUL
);
11284 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf
, IX86_BUILTIN_PFRCP
);
11285 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRCPIT1
);
11286 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRCPIT2
);
11287 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf
, IX86_BUILTIN_PFRSQRT
);
11288 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRSQIT1
);
11289 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFSUB
);
11290 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFSUBR
);
11291 def_builtin (MASK_3DNOW
, "__builtin_ia32_pi2fd", v2sf_ftype_v2si
, IX86_BUILTIN_PI2FD
);
11292 def_builtin (MASK_3DNOW
, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi
, IX86_BUILTIN_PMULHRW
);
11293 def_builtin (MASK_3DNOW
, "__builtin_ia32_prefetch_3dnow", void_ftype_pchar
, IX86_BUILTIN_PREFETCH_3DNOW
);
11294 def_builtin (MASK_3DNOW
, "__builtin_ia32_prefetchw", void_ftype_pchar
, IX86_BUILTIN_PREFETCHW
);
11296 /* 3DNow! extension as used in the Athlon CPU. */
11297 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pf2iw", v2si_ftype_v2sf
, IX86_BUILTIN_PF2IW
);
11298 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFNACC
);
11299 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFPNACC
);
11300 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pi2fw", v2sf_ftype_v2si
, IX86_BUILTIN_PI2FW
);
11301 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf
, IX86_BUILTIN_PSWAPDSF
);
11302 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pswapdsi", v2si_ftype_v2si
, IX86_BUILTIN_PSWAPDSI
);
11304 /* Composite intrinsics. */
11305 def_builtin (MASK_SSE
, "__builtin_ia32_setps1", v4sf_ftype_float
, IX86_BUILTIN_SETPS1
);
11306 def_builtin (MASK_SSE
, "__builtin_ia32_setps", v4sf_ftype_float_float_float_float
, IX86_BUILTIN_SETPS
);
11307 def_builtin (MASK_SSE
, "__builtin_ia32_setzerops", ti_ftype_void
, IX86_BUILTIN_CLRPS
);
11308 def_builtin (MASK_SSE
, "__builtin_ia32_loadps1", v4sf_ftype_pfloat
, IX86_BUILTIN_LOADPS1
);
11309 def_builtin (MASK_SSE
, "__builtin_ia32_loadrps", v4sf_ftype_pfloat
, IX86_BUILTIN_LOADRPS
);
11310 def_builtin (MASK_SSE
, "__builtin_ia32_storeps1", void_ftype_pfloat_v4sf
, IX86_BUILTIN_STOREPS1
);
11311 def_builtin (MASK_SSE
, "__builtin_ia32_storerps", void_ftype_pfloat_v4sf
, IX86_BUILTIN_STORERPS
);
11314 /* Errors in the source file can cause expand_expr to return const0_rtx
11315 where we expect a vector. To avoid crashing, use one of the vector
11316 clear instructions. */
11318 safe_vector_operand (x
, mode
)
11320 enum machine_mode mode
;
11322 if (x
!= const0_rtx
)
11324 x
= gen_reg_rtx (mode
);
11326 if (VALID_MMX_REG_MODE (mode
) || VALID_MMX_REG_MODE_3DNOW (mode
))
11327 emit_insn (gen_mmx_clrdi (mode
== DImode
? x
11328 : gen_rtx_SUBREG (DImode
, x
, 0)));
11330 emit_insn (gen_sse_clrti (mode
== TImode
? x
11331 : gen_rtx_SUBREG (TImode
, x
, 0)));
11335 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
11338 ix86_expand_binop_builtin (icode
, arglist
, target
)
11339 enum insn_code icode
;
11344 tree arg0
= TREE_VALUE (arglist
);
11345 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
11346 rtx op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
11347 rtx op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
11348 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
11349 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
11350 enum machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
11352 if (VECTOR_MODE_P (mode0
))
11353 op0
= safe_vector_operand (op0
, mode0
);
11354 if (VECTOR_MODE_P (mode1
))
11355 op1
= safe_vector_operand (op1
, mode1
);
11358 || GET_MODE (target
) != tmode
11359 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
11360 target
= gen_reg_rtx (tmode
);
11362 /* In case the insn wants input operands in modes different from
11363 the result, abort. */
11364 if (GET_MODE (op0
) != mode0
|| GET_MODE (op1
) != mode1
)
11367 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
11368 op0
= copy_to_mode_reg (mode0
, op0
);
11369 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
11370 op1
= copy_to_mode_reg (mode1
, op1
);
11372 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
11379 /* Subroutine of ix86_expand_builtin to take care of stores. */
11382 ix86_expand_store_builtin (icode
, arglist
, shuffle
)
11383 enum insn_code icode
;
11388 tree arg0
= TREE_VALUE (arglist
);
11389 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
11390 rtx op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
11391 rtx op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
11392 enum machine_mode mode0
= insn_data
[icode
].operand
[0].mode
;
11393 enum machine_mode mode1
= insn_data
[icode
].operand
[1].mode
;
11395 if (VECTOR_MODE_P (mode1
))
11396 op1
= safe_vector_operand (op1
, mode1
);
11398 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
11399 if (shuffle
>= 0 || ! (*insn_data
[icode
].operand
[1].predicate
) (op1
, mode1
))
11400 op1
= copy_to_mode_reg (mode1
, op1
);
11402 emit_insn (gen_sse_shufps (op1
, op1
, op1
, GEN_INT (shuffle
)));
11403 pat
= GEN_FCN (icode
) (op0
, op1
);
11409 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
11412 ix86_expand_unop_builtin (icode
, arglist
, target
, do_load
)
11413 enum insn_code icode
;
11419 tree arg0
= TREE_VALUE (arglist
);
11420 rtx op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
11421 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
11422 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
11425 || GET_MODE (target
) != tmode
11426 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
11427 target
= gen_reg_rtx (tmode
);
11429 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
11432 if (VECTOR_MODE_P (mode0
))
11433 op0
= safe_vector_operand (op0
, mode0
);
11435 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
11436 op0
= copy_to_mode_reg (mode0
, op0
);
11439 pat
= GEN_FCN (icode
) (target
, op0
);
11446 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
11447 sqrtss, rsqrtss, rcpss. */
11450 ix86_expand_unop1_builtin (icode
, arglist
, target
)
11451 enum insn_code icode
;
11456 tree arg0
= TREE_VALUE (arglist
);
11457 rtx op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
11458 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
11459 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
11462 || GET_MODE (target
) != tmode
11463 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
11464 target
= gen_reg_rtx (tmode
);
11466 if (VECTOR_MODE_P (mode0
))
11467 op0
= safe_vector_operand (op0
, mode0
);
11469 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
11470 op0
= copy_to_mode_reg (mode0
, op0
);
11472 pat
= GEN_FCN (icode
) (target
, op0
, op0
);
11479 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
11482 ix86_expand_sse_compare (d
, arglist
, target
)
11483 const struct builtin_description
*d
;
11488 tree arg0
= TREE_VALUE (arglist
);
11489 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
11490 rtx op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
11491 rtx op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
11493 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
11494 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
11495 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[2].mode
;
11496 enum rtx_code comparison
= d
->comparison
;
11498 if (VECTOR_MODE_P (mode0
))
11499 op0
= safe_vector_operand (op0
, mode0
);
11500 if (VECTOR_MODE_P (mode1
))
11501 op1
= safe_vector_operand (op1
, mode1
);
11503 /* Swap operands if we have a comparison that isn't available in
11507 rtx tmp
= gen_reg_rtx (mode1
);
11508 emit_move_insn (tmp
, op1
);
11514 || GET_MODE (target
) != tmode
11515 || ! (*insn_data
[d
->icode
].operand
[0].predicate
) (target
, tmode
))
11516 target
= gen_reg_rtx (tmode
);
11518 if (! (*insn_data
[d
->icode
].operand
[1].predicate
) (op0
, mode0
))
11519 op0
= copy_to_mode_reg (mode0
, op0
);
11520 if (! (*insn_data
[d
->icode
].operand
[2].predicate
) (op1
, mode1
))
11521 op1
= copy_to_mode_reg (mode1
, op1
);
11523 op2
= gen_rtx_fmt_ee (comparison
, mode0
, op0
, op1
);
11524 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
, op2
);
11531 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
11534 ix86_expand_sse_comi (d
, arglist
, target
)
11535 const struct builtin_description
*d
;
11540 tree arg0
= TREE_VALUE (arglist
);
11541 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
11542 rtx op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
11543 rtx op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
11545 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[0].mode
;
11546 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[1].mode
;
11547 enum rtx_code comparison
= d
->comparison
;
11549 if (VECTOR_MODE_P (mode0
))
11550 op0
= safe_vector_operand (op0
, mode0
);
11551 if (VECTOR_MODE_P (mode1
))
11552 op1
= safe_vector_operand (op1
, mode1
);
11554 /* Swap operands if we have a comparison that isn't available in
11563 target
= gen_reg_rtx (SImode
);
11564 emit_move_insn (target
, const0_rtx
);
11565 target
= gen_rtx_SUBREG (QImode
, target
, 0);
11567 if (! (*insn_data
[d
->icode
].operand
[0].predicate
) (op0
, mode0
))
11568 op0
= copy_to_mode_reg (mode0
, op0
);
11569 if (! (*insn_data
[d
->icode
].operand
[1].predicate
) (op1
, mode1
))
11570 op1
= copy_to_mode_reg (mode1
, op1
);
11572 op2
= gen_rtx_fmt_ee (comparison
, mode0
, op0
, op1
);
11573 pat
= GEN_FCN (d
->icode
) (op0
, op1
, op2
);
11577 emit_insn (gen_setcc_2 (target
, op2
));
11582 /* Expand an expression EXP that calls a built-in function,
11583 with result going to TARGET if that's convenient
11584 (and in mode MODE if that's convenient).
11585 SUBTARGET may be used as the target for computing one of EXP's operands.
11586 IGNORE is nonzero if the value is to be ignored. */
11589 ix86_expand_builtin (exp
, target
, subtarget
, mode
, ignore
)
11592 rtx subtarget ATTRIBUTE_UNUSED
;
11593 enum machine_mode mode ATTRIBUTE_UNUSED
;
11594 int ignore ATTRIBUTE_UNUSED
;
11596 const struct builtin_description
*d
;
11598 enum insn_code icode
;
11599 tree fndecl
= TREE_OPERAND (TREE_OPERAND (exp
, 0), 0);
11600 tree arglist
= TREE_OPERAND (exp
, 1);
11601 tree arg0
, arg1
, arg2
, arg3
;
11602 rtx op0
, op1
, op2
, pat
;
11603 enum machine_mode tmode
, mode0
, mode1
, mode2
;
11604 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
11608 case IX86_BUILTIN_EMMS
:
11609 emit_insn (gen_emms ());
11612 case IX86_BUILTIN_SFENCE
:
11613 emit_insn (gen_sfence ());
11616 case IX86_BUILTIN_M_FROM_INT
:
11617 target
= gen_reg_rtx (DImode
);
11618 op0
= expand_expr (TREE_VALUE (arglist
), NULL_RTX
, VOIDmode
, 0);
11619 emit_move_insn (gen_rtx_SUBREG (SImode
, target
, 0), op0
);
11622 case IX86_BUILTIN_M_TO_INT
:
11623 op0
= expand_expr (TREE_VALUE (arglist
), NULL_RTX
, VOIDmode
, 0);
11624 op0
= copy_to_mode_reg (DImode
, op0
);
11625 target
= gen_reg_rtx (SImode
);
11626 emit_move_insn (target
, gen_rtx_SUBREG (SImode
, op0
, 0));
11629 case IX86_BUILTIN_PEXTRW
:
11630 icode
= CODE_FOR_mmx_pextrw
;
11631 arg0
= TREE_VALUE (arglist
);
11632 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
11633 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
11634 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
11635 tmode
= insn_data
[icode
].operand
[0].mode
;
11636 mode0
= insn_data
[icode
].operand
[1].mode
;
11637 mode1
= insn_data
[icode
].operand
[2].mode
;
11639 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
11640 op0
= copy_to_mode_reg (mode0
, op0
);
11641 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
11643 /* @@@ better error message */
11644 error ("selector must be an immediate");
11648 || GET_MODE (target
) != tmode
11649 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
11650 target
= gen_reg_rtx (tmode
);
11651 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
11657 case IX86_BUILTIN_PINSRW
:
11658 icode
= CODE_FOR_mmx_pinsrw
;
11659 arg0
= TREE_VALUE (arglist
);
11660 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
11661 arg2
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
11662 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
11663 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
11664 op2
= expand_expr (arg2
, NULL_RTX
, VOIDmode
, 0);
11665 tmode
= insn_data
[icode
].operand
[0].mode
;
11666 mode0
= insn_data
[icode
].operand
[1].mode
;
11667 mode1
= insn_data
[icode
].operand
[2].mode
;
11668 mode2
= insn_data
[icode
].operand
[3].mode
;
11670 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
11671 op0
= copy_to_mode_reg (mode0
, op0
);
11672 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
11673 op1
= copy_to_mode_reg (mode1
, op1
);
11674 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
11676 /* @@@ better error message */
11677 error ("selector must be an immediate");
11681 || GET_MODE (target
) != tmode
11682 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
11683 target
= gen_reg_rtx (tmode
);
11684 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
11690 case IX86_BUILTIN_MASKMOVQ
:
11691 icode
= CODE_FOR_mmx_maskmovq
;
11692 /* Note the arg order is different from the operand order. */
11693 arg1
= TREE_VALUE (arglist
);
11694 arg2
= TREE_VALUE (TREE_CHAIN (arglist
));
11695 arg0
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
11696 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
11697 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
11698 op2
= expand_expr (arg2
, NULL_RTX
, VOIDmode
, 0);
11699 mode0
= insn_data
[icode
].operand
[0].mode
;
11700 mode1
= insn_data
[icode
].operand
[1].mode
;
11701 mode2
= insn_data
[icode
].operand
[2].mode
;
11703 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
11704 op0
= copy_to_mode_reg (mode0
, op0
);
11705 if (! (*insn_data
[icode
].operand
[1].predicate
) (op1
, mode1
))
11706 op1
= copy_to_mode_reg (mode1
, op1
);
11707 if (! (*insn_data
[icode
].operand
[2].predicate
) (op2
, mode2
))
11708 op2
= copy_to_mode_reg (mode2
, op2
);
11709 pat
= GEN_FCN (icode
) (op0
, op1
, op2
);
11715 case IX86_BUILTIN_SQRTSS
:
11716 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2
, arglist
, target
);
11717 case IX86_BUILTIN_RSQRTSS
:
11718 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2
, arglist
, target
);
11719 case IX86_BUILTIN_RCPSS
:
11720 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2
, arglist
, target
);
11722 case IX86_BUILTIN_LOADAPS
:
11723 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps
, arglist
, target
, 1);
11725 case IX86_BUILTIN_LOADUPS
:
11726 return ix86_expand_unop_builtin (CODE_FOR_sse_movups
, arglist
, target
, 1);
11728 case IX86_BUILTIN_STOREAPS
:
11729 return ix86_expand_store_builtin (CODE_FOR_sse_movaps
, arglist
, -1);
11730 case IX86_BUILTIN_STOREUPS
:
11731 return ix86_expand_store_builtin (CODE_FOR_sse_movups
, arglist
, -1);
11733 case IX86_BUILTIN_LOADSS
:
11734 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss
, arglist
, target
, 1);
11736 case IX86_BUILTIN_STORESS
:
11737 return ix86_expand_store_builtin (CODE_FOR_sse_storess
, arglist
, -1);
11739 case IX86_BUILTIN_LOADHPS
:
11740 case IX86_BUILTIN_LOADLPS
:
11741 icode
= (fcode
== IX86_BUILTIN_LOADHPS
11742 ? CODE_FOR_sse_movhps
: CODE_FOR_sse_movlps
);
11743 arg0
= TREE_VALUE (arglist
);
11744 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
11745 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
11746 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
11747 tmode
= insn_data
[icode
].operand
[0].mode
;
11748 mode0
= insn_data
[icode
].operand
[1].mode
;
11749 mode1
= insn_data
[icode
].operand
[2].mode
;
11751 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
11752 op0
= copy_to_mode_reg (mode0
, op0
);
11753 op1
= gen_rtx_MEM (mode1
, copy_to_mode_reg (Pmode
, op1
));
11755 || GET_MODE (target
) != tmode
11756 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
11757 target
= gen_reg_rtx (tmode
);
11758 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
11764 case IX86_BUILTIN_STOREHPS
:
11765 case IX86_BUILTIN_STORELPS
:
11766 icode
= (fcode
== IX86_BUILTIN_STOREHPS
11767 ? CODE_FOR_sse_movhps
: CODE_FOR_sse_movlps
);
11768 arg0
= TREE_VALUE (arglist
);
11769 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
11770 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
11771 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
11772 mode0
= insn_data
[icode
].operand
[1].mode
;
11773 mode1
= insn_data
[icode
].operand
[2].mode
;
11775 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
11776 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
11777 op1
= copy_to_mode_reg (mode1
, op1
);
11779 pat
= GEN_FCN (icode
) (op0
, op0
, op1
);
11785 case IX86_BUILTIN_MOVNTPS
:
11786 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf
, arglist
, -1);
11787 case IX86_BUILTIN_MOVNTQ
:
11788 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi
, arglist
, -1);
11790 case IX86_BUILTIN_LDMXCSR
:
11791 op0
= expand_expr (TREE_VALUE (arglist
), NULL_RTX
, VOIDmode
, 0);
11792 target
= assign_386_stack_local (SImode
, 0);
11793 emit_move_insn (target
, op0
);
11794 emit_insn (gen_ldmxcsr (target
));
11797 case IX86_BUILTIN_STMXCSR
:
11798 target
= assign_386_stack_local (SImode
, 0);
11799 emit_insn (gen_stmxcsr (target
));
11800 return copy_to_mode_reg (SImode
, target
);
11802 case IX86_BUILTIN_PREFETCH
:
11803 icode
= CODE_FOR_prefetch
;
11804 arg0
= TREE_VALUE (arglist
);
11805 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
11806 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
11807 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
11808 mode0
= insn_data
[icode
].operand
[0].mode
;
11809 mode1
= insn_data
[icode
].operand
[1].mode
;
11811 if (! (*insn_data
[icode
].operand
[1].predicate
) (op1
, mode1
))
11813 /* @@@ better error message */
11814 error ("selector must be an immediate");
11818 op0
= copy_to_mode_reg (Pmode
, op0
);
11819 pat
= GEN_FCN (icode
) (op0
, op1
);
11825 case IX86_BUILTIN_SHUFPS
:
11826 icode
= CODE_FOR_sse_shufps
;
11827 arg0
= TREE_VALUE (arglist
);
11828 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
11829 arg2
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
11830 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
11831 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
11832 op2
= expand_expr (arg2
, NULL_RTX
, VOIDmode
, 0);
11833 tmode
= insn_data
[icode
].operand
[0].mode
;
11834 mode0
= insn_data
[icode
].operand
[1].mode
;
11835 mode1
= insn_data
[icode
].operand
[2].mode
;
11836 mode2
= insn_data
[icode
].operand
[3].mode
;
11838 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
11839 op0
= copy_to_mode_reg (mode0
, op0
);
11840 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
11841 op1
= copy_to_mode_reg (mode1
, op1
);
11842 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
11844 /* @@@ better error message */
11845 error ("mask must be an immediate");
11849 || GET_MODE (target
) != tmode
11850 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
11851 target
= gen_reg_rtx (tmode
);
11852 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
11858 case IX86_BUILTIN_PSHUFW
:
11859 icode
= CODE_FOR_mmx_pshufw
;
11860 arg0
= TREE_VALUE (arglist
);
11861 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
11862 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
11863 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
11864 tmode
= insn_data
[icode
].operand
[0].mode
;
11865 mode0
= insn_data
[icode
].operand
[2].mode
;
11866 mode1
= insn_data
[icode
].operand
[3].mode
;
11868 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
11869 op0
= copy_to_mode_reg (mode0
, op0
);
11870 if (! (*insn_data
[icode
].operand
[3].predicate
) (op1
, mode1
))
11872 /* @@@ better error message */
11873 error ("mask must be an immediate");
11877 || GET_MODE (target
) != tmode
11878 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
11879 target
= gen_reg_rtx (tmode
);
11880 pat
= GEN_FCN (icode
) (target
, target
, op0
, op1
);
11886 case IX86_BUILTIN_FEMMS
:
11887 emit_insn (gen_femms ());
11890 case IX86_BUILTIN_PAVGUSB
:
11891 return ix86_expand_binop_builtin (CODE_FOR_pavgusb
, arglist
, target
);
11893 case IX86_BUILTIN_PF2ID
:
11894 return ix86_expand_unop_builtin (CODE_FOR_pf2id
, arglist
, target
, 0);
11896 case IX86_BUILTIN_PFACC
:
11897 return ix86_expand_binop_builtin (CODE_FOR_pfacc
, arglist
, target
);
11899 case IX86_BUILTIN_PFADD
:
11900 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3
, arglist
, target
);
11902 case IX86_BUILTIN_PFCMPEQ
:
11903 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3
, arglist
, target
);
11905 case IX86_BUILTIN_PFCMPGE
:
11906 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3
, arglist
, target
);
11908 case IX86_BUILTIN_PFCMPGT
:
11909 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3
, arglist
, target
);
11911 case IX86_BUILTIN_PFMAX
:
11912 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3
, arglist
, target
);
11914 case IX86_BUILTIN_PFMIN
:
11915 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3
, arglist
, target
);
11917 case IX86_BUILTIN_PFMUL
:
11918 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3
, arglist
, target
);
11920 case IX86_BUILTIN_PFRCP
:
11921 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2
, arglist
, target
, 0);
11923 case IX86_BUILTIN_PFRCPIT1
:
11924 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3
, arglist
, target
);
11926 case IX86_BUILTIN_PFRCPIT2
:
11927 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3
, arglist
, target
);
11929 case IX86_BUILTIN_PFRSQIT1
:
11930 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3
, arglist
, target
);
11932 case IX86_BUILTIN_PFRSQRT
:
11933 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2
, arglist
, target
, 0);
11935 case IX86_BUILTIN_PFSUB
:
11936 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3
, arglist
, target
);
11938 case IX86_BUILTIN_PFSUBR
:
11939 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3
, arglist
, target
);
11941 case IX86_BUILTIN_PI2FD
:
11942 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2
, arglist
, target
, 0);
11944 case IX86_BUILTIN_PMULHRW
:
11945 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3
, arglist
, target
);
11947 case IX86_BUILTIN_PREFETCH_3DNOW
:
11948 icode
= CODE_FOR_prefetch_3dnow
;
11949 arg0
= TREE_VALUE (arglist
);
11950 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
11951 mode0
= insn_data
[icode
].operand
[0].mode
;
11952 pat
= GEN_FCN (icode
) (copy_to_mode_reg (Pmode
, op0
));
11958 case IX86_BUILTIN_PREFETCHW
:
11959 icode
= CODE_FOR_prefetchw
;
11960 arg0
= TREE_VALUE (arglist
);
11961 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
11962 mode0
= insn_data
[icode
].operand
[0].mode
;
11963 pat
= GEN_FCN (icode
) (copy_to_mode_reg (Pmode
, op0
));
11969 case IX86_BUILTIN_PF2IW
:
11970 return ix86_expand_unop_builtin (CODE_FOR_pf2iw
, arglist
, target
, 0);
11972 case IX86_BUILTIN_PFNACC
:
11973 return ix86_expand_binop_builtin (CODE_FOR_pfnacc
, arglist
, target
);
11975 case IX86_BUILTIN_PFPNACC
:
11976 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc
, arglist
, target
);
11978 case IX86_BUILTIN_PI2FW
:
11979 return ix86_expand_unop_builtin (CODE_FOR_pi2fw
, arglist
, target
, 0);
11981 case IX86_BUILTIN_PSWAPDSI
:
11982 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2
, arglist
, target
, 0);
11984 case IX86_BUILTIN_PSWAPDSF
:
11985 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2
, arglist
, target
, 0);
11987 /* Composite intrinsics. */
11988 case IX86_BUILTIN_SETPS1
:
11989 target
= assign_386_stack_local (SFmode
, 0);
11990 arg0
= TREE_VALUE (arglist
);
11991 emit_move_insn (adjust_address (target
, SFmode
, 0),
11992 expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0));
11993 op0
= gen_reg_rtx (V4SFmode
);
11994 emit_insn (gen_sse_loadss (op0
, adjust_address (target
, V4SFmode
, 0)));
11995 emit_insn (gen_sse_shufps (op0
, op0
, op0
, GEN_INT (0)));
11998 case IX86_BUILTIN_SETPS
:
11999 target
= assign_386_stack_local (V4SFmode
, 0);
12000 arg0
= TREE_VALUE (arglist
);
12001 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
12002 arg2
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
12003 arg3
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist
))));
12004 emit_move_insn (adjust_address (target
, SFmode
, 0),
12005 expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0));
12006 emit_move_insn (adjust_address (target
, SFmode
, 4),
12007 expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0));
12008 emit_move_insn (adjust_address (target
, SFmode
, 8),
12009 expand_expr (arg2
, NULL_RTX
, VOIDmode
, 0));
12010 emit_move_insn (adjust_address (target
, SFmode
, 12),
12011 expand_expr (arg3
, NULL_RTX
, VOIDmode
, 0));
12012 op0
= gen_reg_rtx (V4SFmode
);
12013 emit_insn (gen_sse_movaps (op0
, target
));
12016 case IX86_BUILTIN_CLRPS
:
12017 target
= gen_reg_rtx (TImode
);
12018 emit_insn (gen_sse_clrti (target
));
12021 case IX86_BUILTIN_LOADRPS
:
12022 target
= ix86_expand_unop_builtin (CODE_FOR_sse_movaps
, arglist
,
12023 gen_reg_rtx (V4SFmode
), 1);
12024 emit_insn (gen_sse_shufps (target
, target
, target
, GEN_INT (0x1b)));
12027 case IX86_BUILTIN_LOADPS1
:
12028 target
= ix86_expand_unop_builtin (CODE_FOR_sse_loadss
, arglist
,
12029 gen_reg_rtx (V4SFmode
), 1);
12030 emit_insn (gen_sse_shufps (target
, target
, target
, const0_rtx
));
12033 case IX86_BUILTIN_STOREPS1
:
12034 return ix86_expand_store_builtin (CODE_FOR_sse_movaps
, arglist
, 0);
12035 case IX86_BUILTIN_STORERPS
:
12036 return ix86_expand_store_builtin (CODE_FOR_sse_movaps
, arglist
, 0x1B);
12038 case IX86_BUILTIN_MMX_ZERO
:
12039 target
= gen_reg_rtx (DImode
);
12040 emit_insn (gen_mmx_clrdi (target
));
12047 for (i
= 0, d
= bdesc_2arg
; i
< sizeof (bdesc_2arg
) / sizeof *d
; i
++, d
++)
12048 if (d
->code
== fcode
)
12050 /* Compares are treated specially. */
12051 if (d
->icode
== CODE_FOR_maskcmpv4sf3
12052 || d
->icode
== CODE_FOR_vmmaskcmpv4sf3
12053 || d
->icode
== CODE_FOR_maskncmpv4sf3
12054 || d
->icode
== CODE_FOR_vmmaskncmpv4sf3
)
12055 return ix86_expand_sse_compare (d
, arglist
, target
);
12057 return ix86_expand_binop_builtin (d
->icode
, arglist
, target
);
12060 for (i
= 0, d
= bdesc_1arg
; i
< sizeof (bdesc_1arg
) / sizeof *d
; i
++, d
++)
12061 if (d
->code
== fcode
)
12062 return ix86_expand_unop_builtin (d
->icode
, arglist
, target
, 0);
12064 for (i
= 0, d
= bdesc_comi
; i
< sizeof (bdesc_comi
) / sizeof *d
; i
++, d
++)
12065 if (d
->code
== fcode
)
12066 return ix86_expand_sse_comi (d
, arglist
, target
);
12068 /* @@@ Should really do something sensible here. */
12072 /* Store OPERAND to the memory after reload is completed. This means
12073 that we can't easilly use assign_stack_local. */
12075 ix86_force_to_memory (mode
, operand
)
12076 enum machine_mode mode
;
12080 if (!reload_completed
)
12082 if (TARGET_64BIT
&& TARGET_RED_ZONE
)
12084 result
= gen_rtx_MEM (mode
,
12085 gen_rtx_PLUS (Pmode
,
12087 GEN_INT (-RED_ZONE_SIZE
)));
12088 emit_move_insn (result
, operand
);
12090 else if (TARGET_64BIT
&& !TARGET_RED_ZONE
)
12096 operand
= gen_lowpart (DImode
, operand
);
12100 gen_rtx_SET (VOIDmode
,
12101 gen_rtx_MEM (DImode
,
12102 gen_rtx_PRE_DEC (DImode
,
12103 stack_pointer_rtx
)),
12109 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
12118 split_di (&operand
, 1, operands
, operands
+ 1);
12120 gen_rtx_SET (VOIDmode
,
12121 gen_rtx_MEM (SImode
,
12122 gen_rtx_PRE_DEC (Pmode
,
12123 stack_pointer_rtx
)),
12126 gen_rtx_SET (VOIDmode
,
12127 gen_rtx_MEM (SImode
,
12128 gen_rtx_PRE_DEC (Pmode
,
12129 stack_pointer_rtx
)),
12134 /* It is better to store HImodes as SImodes. */
12135 if (!TARGET_PARTIAL_REG_STALL
)
12136 operand
= gen_lowpart (SImode
, operand
);
12140 gen_rtx_SET (VOIDmode
,
12141 gen_rtx_MEM (GET_MODE (operand
),
12142 gen_rtx_PRE_DEC (SImode
,
12143 stack_pointer_rtx
)),
12149 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
12154 /* Free operand from the memory. */
12156 ix86_free_from_memory (mode
)
12157 enum machine_mode mode
;
12159 if (!TARGET_64BIT
|| !TARGET_RED_ZONE
)
12163 if (mode
== DImode
|| TARGET_64BIT
)
12165 else if (mode
== HImode
&& TARGET_PARTIAL_REG_STALL
)
12169 /* Use LEA to deallocate stack space. In peephole2 it will be converted
12170 to pop or add instruction if registers are available. */
12171 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
12172 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
12177 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
12178 QImode must go into class Q_REGS.
12179 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
12180 movdf to do mem-to-mem moves through integer regs. */
12182 ix86_preferred_reload_class (x
, class)
12184 enum reg_class
class;
12186 if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) != VOIDmode
)
12188 /* SSE can't load any constant directly yet. */
12189 if (SSE_CLASS_P (class))
12191 /* Floats can load 0 and 1. */
12192 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x
))
12194 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
12195 if (MAYBE_SSE_CLASS_P (class))
12196 return (reg_class_subset_p (class, GENERAL_REGS
)
12197 ? GENERAL_REGS
: FLOAT_REGS
);
12201 /* General regs can load everything. */
12202 if (reg_class_subset_p (class, GENERAL_REGS
))
12203 return GENERAL_REGS
;
12204 /* In case we haven't resolved FLOAT or SSE yet, give up. */
12205 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
12208 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x
))
12210 if (GET_MODE (x
) == QImode
&& ! reg_class_subset_p (class, Q_REGS
))
12215 /* If we are copying between general and FP registers, we need a memory
12216 location. The same is true for SSE and MMX registers.
12218 The macro can't work reliably when one of the CLASSES is class containing
12219 registers from multiple units (SSE, MMX, integer). We avoid this by never
12220 combining those units in single alternative in the machine description.
12221 Ensure that this constraint holds to avoid unexpected surprises.
12223 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
12224 enforce these sanity checks. */
12226 ix86_secondary_memory_needed (class1
, class2
, mode
, strict
)
12227 enum reg_class class1
, class2
;
12228 enum machine_mode mode
;
12231 if (MAYBE_FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class1
)
12232 || MAYBE_FLOAT_CLASS_P (class2
) != FLOAT_CLASS_P (class2
)
12233 || MAYBE_SSE_CLASS_P (class1
) != SSE_CLASS_P (class1
)
12234 || MAYBE_SSE_CLASS_P (class2
) != SSE_CLASS_P (class2
)
12235 || MAYBE_MMX_CLASS_P (class1
) != MMX_CLASS_P (class1
)
12236 || MAYBE_MMX_CLASS_P (class2
) != MMX_CLASS_P (class2
))
12243 return (FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class2
)
12244 || (SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
)
12245 && (mode
) != SImode
)
12246 || (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
)
12247 && (mode
) != SImode
));
12249 /* Return the cost of moving data from a register in class CLASS1 to
12250 one in class CLASS2.
12252 It is not required that the cost always equal 2 when FROM is the same as TO;
12253 on some machines it is expensive to move between registers if they are not
12254 general registers. */
12256 ix86_register_move_cost (mode
, class1
, class2
)
12257 enum machine_mode mode
;
12258 enum reg_class class1
, class2
;
12260 /* In case we require secondary memory, compute cost of the store followed
12261 by load. In case of copying from general_purpose_register we may emit
12262 multiple stores followed by single load causing memory size mismatch
12263 stall. Count this as arbitarily high cost of 20. */
12264 if (ix86_secondary_memory_needed (class1
, class2
, mode
, 0))
12267 if (CLASS_MAX_NREGS (class1
, mode
) > CLASS_MAX_NREGS (class2
, mode
))
12269 return (MEMORY_MOVE_COST (mode
, class1
, 0)
12270 + MEMORY_MOVE_COST (mode
, class2
, 1) + add_cost
);
12272 /* Moves between SSE/MMX and integer unit are expensive. */
12273 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
)
12274 || SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
12275 return ix86_cost
->mmxsse_to_integer
;
12276 if (MAYBE_FLOAT_CLASS_P (class1
))
12277 return ix86_cost
->fp_move
;
12278 if (MAYBE_SSE_CLASS_P (class1
))
12279 return ix86_cost
->sse_move
;
12280 if (MAYBE_MMX_CLASS_P (class1
))
12281 return ix86_cost
->mmx_move
;
12285 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
12287 ix86_hard_regno_mode_ok (regno
, mode
)
12289 enum machine_mode mode
;
12291 /* Flags and only flags can only hold CCmode values. */
12292 if (CC_REGNO_P (regno
))
12293 return GET_MODE_CLASS (mode
) == MODE_CC
;
12294 if (GET_MODE_CLASS (mode
) == MODE_CC
12295 || GET_MODE_CLASS (mode
) == MODE_RANDOM
12296 || GET_MODE_CLASS (mode
) == MODE_PARTIAL_INT
)
12298 if (FP_REGNO_P (regno
))
12299 return VALID_FP_MODE_P (mode
);
12300 if (SSE_REGNO_P (regno
))
12301 return VALID_SSE_REG_MODE (mode
);
12302 if (MMX_REGNO_P (regno
))
12303 return VALID_MMX_REG_MODE (mode
) || VALID_MMX_REG_MODE_3DNOW (mode
);
12304 /* We handle both integer and floats in the general purpose registers.
12305 In future we should be able to handle vector modes as well. */
12306 if (!VALID_INT_MODE_P (mode
) && !VALID_FP_MODE_P (mode
))
12308 /* Take care for QImode values - they can be in non-QI regs, but then
12309 they do cause partial register stalls. */
12310 if (regno
< 4 || mode
!= QImode
|| TARGET_64BIT
)
12312 return reload_in_progress
|| reload_completed
|| !TARGET_PARTIAL_REG_STALL
;
12315 /* Return the cost of moving data of mode M between a
12316 register and memory. A value of 2 is the default; this cost is
12317 relative to those in `REGISTER_MOVE_COST'.
12319 If moving between registers and memory is more expensive than
12320 between two registers, you should define this macro to express the
12323 Model also increased moving costs of QImode registers in non
12327 ix86_memory_move_cost (mode
, class, in
)
12328 enum machine_mode mode
;
12329 enum reg_class
class;
12332 if (FLOAT_CLASS_P (class))
12350 return in
? ix86_cost
->fp_load
[index
] : ix86_cost
->fp_store
[index
];
12352 if (SSE_CLASS_P (class))
12355 switch (GET_MODE_SIZE (mode
))
12369 return in
? ix86_cost
->sse_load
[index
] : ix86_cost
->sse_store
[index
];
12371 if (MMX_CLASS_P (class))
12374 switch (GET_MODE_SIZE (mode
))
12385 return in
? ix86_cost
->mmx_load
[index
] : ix86_cost
->mmx_store
[index
];
12387 switch (GET_MODE_SIZE (mode
))
12391 return (Q_CLASS_P (class) ? ix86_cost
->int_load
[0]
12392 : ix86_cost
->movzbl_load
);
12394 return (Q_CLASS_P (class) ? ix86_cost
->int_store
[0]
12395 : ix86_cost
->int_store
[0] + 4);
12398 return in
? ix86_cost
->int_load
[1] : ix86_cost
->int_store
[1];
12400 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
12401 if (mode
== TFmode
)
12403 return ((in
? ix86_cost
->int_load
[2] : ix86_cost
->int_store
[2])
12404 * (int) GET_MODE_SIZE (mode
) / 4);
12408 #ifdef DO_GLOBAL_CTORS_BODY
12410 ix86_svr3_asm_out_constructor (symbol
, priority
)
12412 int priority ATTRIBUTE_UNUSED
;
12415 fputs ("\tpushl $", asm_out_file
);
12416 assemble_name (asm_out_file
, XSTR (symbol
, 0));
12417 fputc ('\n', asm_out_file
);
12421 #if defined(TARGET_ELF) && defined(TARGET_COFF)
12423 sco_asm_named_section (name
, flags
)
12425 unsigned int flags
;
12428 default_elf_asm_named_section (name
, flags
);
12430 default_coff_asm_named_section (name
, flags
);
12434 sco_asm_out_constructor (symbol
, priority
)
12439 default_named_section_asm_out_constrctor (symbol
, priority
);
12441 ix86_svr3_asm_out_constructor (symbol
, priority
);