Documentation for vector extensions
[official-gcc.git] / gcc / config / i386 / i386.c
bloba8d68de40888a0a208f550455366484c5facb23b
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001
3 Free Software Foundation, Inc.
5 This file is part of GNU CC.
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
22 #include "config.h"
23 #include "system.h"
24 #include "rtl.h"
25 #include "tree.h"
26 #include "tm_p.h"
27 #include "regs.h"
28 #include "hard-reg-set.h"
29 #include "real.h"
30 #include "insn-config.h"
31 #include "conditions.h"
32 #include "output.h"
33 #include "insn-attr.h"
34 #include "flags.h"
35 #include "except.h"
36 #include "function.h"
37 #include "recog.h"
38 #include "expr.h"
39 #include "optabs.h"
40 #include "toplev.h"
41 #include "basic-block.h"
42 #include "ggc.h"
43 #include "target.h"
44 #include "target-def.h"
46 #ifndef CHECK_STACK_LIMIT
47 #define CHECK_STACK_LIMIT -1
48 #endif
50 /* Processor costs (relative to an add) */
51 struct processor_costs size_cost = { /* costs for tunning for size */
52 2, /* cost of an add instruction */
53 3, /* cost of a lea instruction */
54 2, /* variable shift costs */
55 3, /* constant shift costs */
56 3, /* cost of starting a multiply */
57 0, /* cost of multiply per each bit set */
58 3, /* cost of a divide/mod */
59 0, /* "large" insn */
60 2, /* MOVE_RATIO */
61 2, /* cost for loading QImode using movzbl */
62 {2, 2, 2}, /* cost of loading integer registers
63 in QImode, HImode and SImode.
64 Relative to reg-reg move (2). */
65 {2, 2, 2}, /* cost of storing integer registers */
66 2, /* cost of reg,reg fld/fst */
67 {2, 2, 2}, /* cost of loading fp registers
68 in SFmode, DFmode and XFmode */
69 {2, 2, 2}, /* cost of loading integer registers */
70 3, /* cost of moving MMX register */
71 {3, 3}, /* cost of loading MMX registers
72 in SImode and DImode */
73 {3, 3}, /* cost of storing MMX registers
74 in SImode and DImode */
75 3, /* cost of moving SSE register */
76 {3, 3, 3}, /* cost of loading SSE registers
77 in SImode, DImode and TImode */
78 {3, 3, 3}, /* cost of storing SSE registers
79 in SImode, DImode and TImode */
80 3, /* MMX or SSE register to integer */
82 /* Processor costs (relative to an add) */
83 struct processor_costs i386_cost = { /* 386 specific costs */
84 1, /* cost of an add instruction */
85 1, /* cost of a lea instruction */
86 3, /* variable shift costs */
87 2, /* constant shift costs */
88 6, /* cost of starting a multiply */
89 1, /* cost of multiply per each bit set */
90 23, /* cost of a divide/mod */
91 15, /* "large" insn */
92 3, /* MOVE_RATIO */
93 4, /* cost for loading QImode using movzbl */
94 {2, 4, 2}, /* cost of loading integer registers
95 in QImode, HImode and SImode.
96 Relative to reg-reg move (2). */
97 {2, 4, 2}, /* cost of storing integer registers */
98 2, /* cost of reg,reg fld/fst */
99 {8, 8, 8}, /* cost of loading fp registers
100 in SFmode, DFmode and XFmode */
101 {8, 8, 8}, /* cost of loading integer registers */
102 2, /* cost of moving MMX register */
103 {4, 8}, /* cost of loading MMX registers
104 in SImode and DImode */
105 {4, 8}, /* cost of storing MMX registers
106 in SImode and DImode */
107 2, /* cost of moving SSE register */
108 {4, 8, 16}, /* cost of loading SSE registers
109 in SImode, DImode and TImode */
110 {4, 8, 16}, /* cost of storing SSE registers
111 in SImode, DImode and TImode */
112 3, /* MMX or SSE register to integer */
115 struct processor_costs i486_cost = { /* 486 specific costs */
116 1, /* cost of an add instruction */
117 1, /* cost of a lea instruction */
118 3, /* variable shift costs */
119 2, /* constant shift costs */
120 12, /* cost of starting a multiply */
121 1, /* cost of multiply per each bit set */
122 40, /* cost of a divide/mod */
123 15, /* "large" insn */
124 3, /* MOVE_RATIO */
125 4, /* cost for loading QImode using movzbl */
126 {2, 4, 2}, /* cost of loading integer registers
127 in QImode, HImode and SImode.
128 Relative to reg-reg move (2). */
129 {2, 4, 2}, /* cost of storing integer registers */
130 2, /* cost of reg,reg fld/fst */
131 {8, 8, 8}, /* cost of loading fp registers
132 in SFmode, DFmode and XFmode */
133 {8, 8, 8}, /* cost of loading integer registers */
134 2, /* cost of moving MMX register */
135 {4, 8}, /* cost of loading MMX registers
136 in SImode and DImode */
137 {4, 8}, /* cost of storing MMX registers
138 in SImode and DImode */
139 2, /* cost of moving SSE register */
140 {4, 8, 16}, /* cost of loading SSE registers
141 in SImode, DImode and TImode */
142 {4, 8, 16}, /* cost of storing SSE registers
143 in SImode, DImode and TImode */
144 3 /* MMX or SSE register to integer */
147 struct processor_costs pentium_cost = {
148 1, /* cost of an add instruction */
149 1, /* cost of a lea instruction */
150 4, /* variable shift costs */
151 1, /* constant shift costs */
152 11, /* cost of starting a multiply */
153 0, /* cost of multiply per each bit set */
154 25, /* cost of a divide/mod */
155 8, /* "large" insn */
156 6, /* MOVE_RATIO */
157 6, /* cost for loading QImode using movzbl */
158 {2, 4, 2}, /* cost of loading integer registers
159 in QImode, HImode and SImode.
160 Relative to reg-reg move (2). */
161 {2, 4, 2}, /* cost of storing integer registers */
162 2, /* cost of reg,reg fld/fst */
163 {2, 2, 6}, /* cost of loading fp registers
164 in SFmode, DFmode and XFmode */
165 {4, 4, 6}, /* cost of loading integer registers */
166 8, /* cost of moving MMX register */
167 {8, 8}, /* cost of loading MMX registers
168 in SImode and DImode */
169 {8, 8}, /* cost of storing MMX registers
170 in SImode and DImode */
171 2, /* cost of moving SSE register */
172 {4, 8, 16}, /* cost of loading SSE registers
173 in SImode, DImode and TImode */
174 {4, 8, 16}, /* cost of storing SSE registers
175 in SImode, DImode and TImode */
176 3 /* MMX or SSE register to integer */
179 struct processor_costs pentiumpro_cost = {
180 1, /* cost of an add instruction */
181 1, /* cost of a lea instruction */
182 1, /* variable shift costs */
183 1, /* constant shift costs */
184 4, /* cost of starting a multiply */
185 0, /* cost of multiply per each bit set */
186 17, /* cost of a divide/mod */
187 8, /* "large" insn */
188 6, /* MOVE_RATIO */
189 2, /* cost for loading QImode using movzbl */
190 {4, 4, 4}, /* cost of loading integer registers
191 in QImode, HImode and SImode.
192 Relative to reg-reg move (2). */
193 {2, 2, 2}, /* cost of storing integer registers */
194 2, /* cost of reg,reg fld/fst */
195 {2, 2, 6}, /* cost of loading fp registers
196 in SFmode, DFmode and XFmode */
197 {4, 4, 6}, /* cost of loading integer registers */
198 2, /* cost of moving MMX register */
199 {2, 2}, /* cost of loading MMX registers
200 in SImode and DImode */
201 {2, 2}, /* cost of storing MMX registers
202 in SImode and DImode */
203 2, /* cost of moving SSE register */
204 {2, 2, 8}, /* cost of loading SSE registers
205 in SImode, DImode and TImode */
206 {2, 2, 8}, /* cost of storing SSE registers
207 in SImode, DImode and TImode */
208 3 /* MMX or SSE register to integer */
211 struct processor_costs k6_cost = {
212 1, /* cost of an add instruction */
213 2, /* cost of a lea instruction */
214 1, /* variable shift costs */
215 1, /* constant shift costs */
216 3, /* cost of starting a multiply */
217 0, /* cost of multiply per each bit set */
218 18, /* cost of a divide/mod */
219 8, /* "large" insn */
220 4, /* MOVE_RATIO */
221 3, /* cost for loading QImode using movzbl */
222 {4, 5, 4}, /* cost of loading integer registers
223 in QImode, HImode and SImode.
224 Relative to reg-reg move (2). */
225 {2, 3, 2}, /* cost of storing integer registers */
226 4, /* cost of reg,reg fld/fst */
227 {6, 6, 6}, /* cost of loading fp registers
228 in SFmode, DFmode and XFmode */
229 {4, 4, 4}, /* cost of loading integer registers */
230 2, /* cost of moving MMX register */
231 {2, 2}, /* cost of loading MMX registers
232 in SImode and DImode */
233 {2, 2}, /* cost of storing MMX registers
234 in SImode and DImode */
235 2, /* cost of moving SSE register */
236 {2, 2, 8}, /* cost of loading SSE registers
237 in SImode, DImode and TImode */
238 {2, 2, 8}, /* cost of storing SSE registers
239 in SImode, DImode and TImode */
240 6 /* MMX or SSE register to integer */
243 struct processor_costs athlon_cost = {
244 1, /* cost of an add instruction */
245 2, /* cost of a lea instruction */
246 1, /* variable shift costs */
247 1, /* constant shift costs */
248 5, /* cost of starting a multiply */
249 0, /* cost of multiply per each bit set */
250 42, /* cost of a divide/mod */
251 8, /* "large" insn */
252 9, /* MOVE_RATIO */
253 4, /* cost for loading QImode using movzbl */
254 {4, 5, 4}, /* cost of loading integer registers
255 in QImode, HImode and SImode.
256 Relative to reg-reg move (2). */
257 {2, 3, 2}, /* cost of storing integer registers */
258 4, /* cost of reg,reg fld/fst */
259 {6, 6, 20}, /* cost of loading fp registers
260 in SFmode, DFmode and XFmode */
261 {4, 4, 16}, /* cost of loading integer registers */
262 2, /* cost of moving MMX register */
263 {2, 2}, /* cost of loading MMX registers
264 in SImode and DImode */
265 {2, 2}, /* cost of storing MMX registers
266 in SImode and DImode */
267 2, /* cost of moving SSE register */
268 {2, 2, 8}, /* cost of loading SSE registers
269 in SImode, DImode and TImode */
270 {2, 2, 8}, /* cost of storing SSE registers
271 in SImode, DImode and TImode */
272 6 /* MMX or SSE register to integer */
275 struct processor_costs pentium4_cost = {
276 1, /* cost of an add instruction */
277 1, /* cost of a lea instruction */
278 8, /* variable shift costs */
279 8, /* constant shift costs */
280 30, /* cost of starting a multiply */
281 0, /* cost of multiply per each bit set */
282 112, /* cost of a divide/mod */
283 16, /* "large" insn */
284 6, /* MOVE_RATIO */
285 2, /* cost for loading QImode using movzbl */
286 {4, 5, 4}, /* cost of loading integer registers
287 in QImode, HImode and SImode.
288 Relative to reg-reg move (2). */
289 {2, 3, 2}, /* cost of storing integer registers */
290 2, /* cost of reg,reg fld/fst */
291 {2, 2, 6}, /* cost of loading fp registers
292 in SFmode, DFmode and XFmode */
293 {4, 4, 6}, /* cost of loading integer registers */
294 2, /* cost of moving MMX register */
295 {2, 2}, /* cost of loading MMX registers
296 in SImode and DImode */
297 {2, 2}, /* cost of storing MMX registers
298 in SImode and DImode */
299 12, /* cost of moving SSE register */
300 {12, 12, 12}, /* cost of loading SSE registers
301 in SImode, DImode and TImode */
302 {2, 2, 8}, /* cost of storing SSE registers
303 in SImode, DImode and TImode */
304 10, /* MMX or SSE register to integer */
307 struct processor_costs *ix86_cost = &pentium_cost;
309 /* Processor feature/optimization bitmasks. */
310 #define m_386 (1<<PROCESSOR_I386)
311 #define m_486 (1<<PROCESSOR_I486)
312 #define m_PENT (1<<PROCESSOR_PENTIUM)
313 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
314 #define m_K6 (1<<PROCESSOR_K6)
315 #define m_ATHLON (1<<PROCESSOR_ATHLON)
316 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
318 const int x86_use_leave = m_386 | m_K6 | m_ATHLON;
319 const int x86_push_memory = m_386 | m_K6 | m_ATHLON | m_PENT4;
320 const int x86_zero_extend_with_and = m_486 | m_PENT;
321 const int x86_movx = m_ATHLON | m_PPRO | m_PENT4 /* m_386 | m_K6 */;
322 const int x86_double_with_add = ~m_386;
323 const int x86_use_bit_test = m_386;
324 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON | m_K6;
325 const int x86_cmove = m_PPRO | m_ATHLON | m_PENT4;
326 const int x86_3dnow_a = m_ATHLON;
327 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON | m_PENT4;
328 const int x86_branch_hints = m_PENT4;
329 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4;
330 const int x86_partial_reg_stall = m_PPRO;
331 const int x86_use_loop = m_K6;
332 const int x86_use_fiop = ~(m_PPRO | m_ATHLON | m_PENT);
333 const int x86_use_mov0 = m_K6;
334 const int x86_use_cltd = ~(m_PENT | m_K6);
335 const int x86_read_modify_write = ~m_PENT;
336 const int x86_read_modify = ~(m_PENT | m_PPRO);
337 const int x86_split_long_moves = m_PPRO;
338 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486;
339 const int x86_single_stringop = m_386 | m_PENT4;
340 const int x86_qimode_math = ~(0);
341 const int x86_promote_qi_regs = 0;
342 const int x86_himode_math = ~(m_PPRO);
343 const int x86_promote_hi_regs = m_PPRO;
344 const int x86_sub_esp_4 = m_ATHLON | m_PPRO | m_PENT4;
345 const int x86_sub_esp_8 = m_ATHLON | m_PPRO | m_386 | m_486 | m_PENT4;
346 const int x86_add_esp_4 = m_ATHLON | m_K6 | m_PENT4;
347 const int x86_add_esp_8 = m_ATHLON | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4;
348 const int x86_integer_DFmode_moves = ~(m_ATHLON | m_PENT4);
349 const int x86_partial_reg_dependency = m_ATHLON | m_PENT4;
350 const int x86_memory_mismatch_stall = m_ATHLON | m_PENT4;
351 const int x86_accumulate_outgoing_args = m_ATHLON | m_PENT4 | m_PPRO;
352 const int x86_prologue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
353 const int x86_epilogue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
355 /* In case the avreage insn count for single function invocation is
356 lower than this constant, emit fast (but longer) prologue and
357 epilogue code. */
358 #define FAST_PROLOGUE_INSN_COUNT 30
359 /* Set by prologue expander and used by epilogue expander to determine
360 the style used. */
361 static int use_fast_prologue_epilogue;
363 #define AT_BP(mode) (gen_rtx_MEM ((mode), hard_frame_pointer_rtx))
365 static const char *const hi_reg_name[] = HI_REGISTER_NAMES; /* names for 16 bit regs */
366 static const char *const qi_reg_name[] = QI_REGISTER_NAMES; /* names for 8 bit regs (low) */
367 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES; /* names for 8 bit regs (high) */
369 /* Array of the smallest class containing reg number REGNO, indexed by
370 REGNO. Used by REGNO_REG_CLASS in i386.h. */
372 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
374 /* ax, dx, cx, bx */
375 AREG, DREG, CREG, BREG,
376 /* si, di, bp, sp */
377 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
378 /* FP registers */
379 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
380 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
381 /* arg pointer */
382 NON_Q_REGS,
383 /* flags, fpsr, dirflag, frame */
384 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
385 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
386 SSE_REGS, SSE_REGS,
387 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
388 MMX_REGS, MMX_REGS,
389 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
390 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
391 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
392 SSE_REGS, SSE_REGS,
395 /* The "default" register map used in 32bit mode. */
397 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
399 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
400 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
401 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
402 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
403 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
404 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
405 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
408 static int x86_64_int_parameter_registers[6] = {5 /*RDI*/, 4 /*RSI*/,
409 1 /*RDX*/, 2 /*RCX*/,
410 FIRST_REX_INT_REG /*R8 */,
411 FIRST_REX_INT_REG + 1 /*R9 */};
412 static int x86_64_int_return_registers[4] = {0 /*RAX*/, 1 /*RDI*/, 5, 4};
414 /* The "default" register map used in 64bit mode. */
415 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
417 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
418 33, 34, 35, 36, 37, 38, 39, 40 /* fp regs */
419 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
420 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
421 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
422 8,9,10,11,12,13,14,15, /* extended integer registers */
423 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
426 /* Define the register numbers to be used in Dwarf debugging information.
427 The SVR4 reference port C compiler uses the following register numbers
428 in its Dwarf output code:
429 0 for %eax (gcc regno = 0)
430 1 for %ecx (gcc regno = 2)
431 2 for %edx (gcc regno = 1)
432 3 for %ebx (gcc regno = 3)
433 4 for %esp (gcc regno = 7)
434 5 for %ebp (gcc regno = 6)
435 6 for %esi (gcc regno = 4)
436 7 for %edi (gcc regno = 5)
437 The following three DWARF register numbers are never generated by
438 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
439 believes these numbers have these meanings.
440 8 for %eip (no gcc equivalent)
441 9 for %eflags (gcc regno = 17)
442 10 for %trapno (no gcc equivalent)
443 It is not at all clear how we should number the FP stack registers
444 for the x86 architecture. If the version of SDB on x86/svr4 were
445 a bit less brain dead with respect to floating-point then we would
446 have a precedent to follow with respect to DWARF register numbers
447 for x86 FP registers, but the SDB on x86/svr4 is so completely
448 broken with respect to FP registers that it is hardly worth thinking
449 of it as something to strive for compatibility with.
450 The version of x86/svr4 SDB I have at the moment does (partially)
451 seem to believe that DWARF register number 11 is associated with
452 the x86 register %st(0), but that's about all. Higher DWARF
453 register numbers don't seem to be associated with anything in
454 particular, and even for DWARF regno 11, SDB only seems to under-
455 stand that it should say that a variable lives in %st(0) (when
456 asked via an `=' command) if we said it was in DWARF regno 11,
457 but SDB still prints garbage when asked for the value of the
458 variable in question (via a `/' command).
459 (Also note that the labels SDB prints for various FP stack regs
460 when doing an `x' command are all wrong.)
461 Note that these problems generally don't affect the native SVR4
462 C compiler because it doesn't allow the use of -O with -g and
463 because when it is *not* optimizing, it allocates a memory
464 location for each floating-point variable, and the memory
465 location is what gets described in the DWARF AT_location
466 attribute for the variable in question.
467 Regardless of the severe mental illness of the x86/svr4 SDB, we
468 do something sensible here and we use the following DWARF
469 register numbers. Note that these are all stack-top-relative
470 numbers.
471 11 for %st(0) (gcc regno = 8)
472 12 for %st(1) (gcc regno = 9)
473 13 for %st(2) (gcc regno = 10)
474 14 for %st(3) (gcc regno = 11)
475 15 for %st(4) (gcc regno = 12)
476 16 for %st(5) (gcc regno = 13)
477 17 for %st(6) (gcc regno = 14)
478 18 for %st(7) (gcc regno = 15)
480 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
482 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
483 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
484 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
485 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
486 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
487 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded integer registers */
488 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded SSE registers */
491 /* Test and compare insns in i386.md store the information needed to
492 generate branch and scc insns here. */
494 struct rtx_def *ix86_compare_op0 = NULL_RTX;
495 struct rtx_def *ix86_compare_op1 = NULL_RTX;
497 #define MAX_386_STACK_LOCALS 3
498 /* Size of the register save area. */
499 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
501 /* Define the structure for the machine field in struct function. */
502 struct machine_function
504 rtx stack_locals[(int) MAX_MACHINE_MODE][MAX_386_STACK_LOCALS];
505 int save_varrargs_registers;
506 int accesses_prev_frame;
509 #define ix86_stack_locals (cfun->machine->stack_locals)
510 #define ix86_save_varrargs_registers (cfun->machine->save_varrargs_registers)
512 /* Structure describing stack frame layout.
513 Stack grows downward:
515 [arguments]
516 <- ARG_POINTER
517 saved pc
519 saved frame pointer if frame_pointer_needed
520 <- HARD_FRAME_POINTER
521 [saved regs]
523 [padding1] \
525 [va_arg registers] (
526 > to_allocate <- FRAME_POINTER
527 [frame] (
529 [padding2] /
531 struct ix86_frame
533 int nregs;
534 int padding1;
535 int va_arg_size;
536 HOST_WIDE_INT frame;
537 int padding2;
538 int outgoing_arguments_size;
539 int red_zone_size;
541 HOST_WIDE_INT to_allocate;
542 /* The offsets relative to ARG_POINTER. */
543 HOST_WIDE_INT frame_pointer_offset;
544 HOST_WIDE_INT hard_frame_pointer_offset;
545 HOST_WIDE_INT stack_pointer_offset;
548 /* Code model option as passed by user. */
549 const char *ix86_cmodel_string;
550 /* Parsed value. */
551 enum cmodel ix86_cmodel;
553 /* which cpu are we scheduling for */
554 enum processor_type ix86_cpu;
556 /* which instruction set architecture to use. */
557 int ix86_arch;
559 /* Strings to hold which cpu and instruction set architecture to use. */
560 const char *ix86_cpu_string; /* for -mcpu=<xxx> */
561 const char *ix86_arch_string; /* for -march=<xxx> */
563 /* # of registers to use to pass arguments. */
564 const char *ix86_regparm_string;
566 /* ix86_regparm_string as a number */
567 int ix86_regparm;
569 /* Alignment to use for loops and jumps: */
571 /* Power of two alignment for loops. */
572 const char *ix86_align_loops_string;
574 /* Power of two alignment for non-loop jumps. */
575 const char *ix86_align_jumps_string;
577 /* Power of two alignment for stack boundary in bytes. */
578 const char *ix86_preferred_stack_boundary_string;
580 /* Preferred alignment for stack boundary in bits. */
581 int ix86_preferred_stack_boundary;
583 /* Values 1-5: see jump.c */
584 int ix86_branch_cost;
585 const char *ix86_branch_cost_string;
587 /* Power of two alignment for functions. */
588 const char *ix86_align_funcs_string;
590 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
591 static char internal_label_prefix[16];
592 static int internal_label_prefix_len;
594 static int local_symbolic_operand PARAMS ((rtx, enum machine_mode));
595 static void output_pic_addr_const PARAMS ((FILE *, rtx, int));
596 static void put_condition_code PARAMS ((enum rtx_code, enum machine_mode,
597 int, int, FILE *));
598 static rtx ix86_expand_int_compare PARAMS ((enum rtx_code, rtx, rtx));
599 static enum rtx_code ix86_prepare_fp_compare_args PARAMS ((enum rtx_code,
600 rtx *, rtx *));
601 static rtx gen_push PARAMS ((rtx));
602 static int memory_address_length PARAMS ((rtx addr));
603 static int ix86_flags_dependant PARAMS ((rtx, rtx, enum attr_type));
604 static int ix86_agi_dependant PARAMS ((rtx, rtx, enum attr_type));
605 static int ix86_safe_length PARAMS ((rtx));
606 static enum attr_memory ix86_safe_memory PARAMS ((rtx));
607 static enum attr_pent_pair ix86_safe_pent_pair PARAMS ((rtx));
608 static enum attr_ppro_uops ix86_safe_ppro_uops PARAMS ((rtx));
609 static void ix86_dump_ppro_packet PARAMS ((FILE *));
610 static void ix86_reorder_insn PARAMS ((rtx *, rtx *));
611 static rtx * ix86_pent_find_pair PARAMS ((rtx *, rtx *, enum attr_pent_pair,
612 rtx));
613 static void ix86_init_machine_status PARAMS ((struct function *));
614 static void ix86_mark_machine_status PARAMS ((struct function *));
615 static void ix86_free_machine_status PARAMS ((struct function *));
616 static int ix86_split_to_parts PARAMS ((rtx, rtx *, enum machine_mode));
617 static int ix86_safe_length_prefix PARAMS ((rtx));
618 static int ix86_nsaved_regs PARAMS((void));
619 static void ix86_emit_save_regs PARAMS((void));
620 static void ix86_emit_save_regs_using_mov PARAMS ((rtx, HOST_WIDE_INT));
621 static void ix86_emit_restore_regs_using_mov PARAMS ((rtx, int, int));
622 static void ix86_set_move_mem_attrs_1 PARAMS ((rtx, rtx, rtx, rtx, rtx));
623 static void ix86_sched_reorder_pentium PARAMS((rtx *, rtx *));
624 static void ix86_sched_reorder_ppro PARAMS((rtx *, rtx *));
625 static HOST_WIDE_INT ix86_GOT_alias_set PARAMS ((void));
626 static void ix86_adjust_counter PARAMS ((rtx, HOST_WIDE_INT));
627 static rtx ix86_expand_aligntest PARAMS ((rtx, int));
628 static void ix86_expand_strlensi_unroll_1 PARAMS ((rtx, rtx));
629 static int ix86_issue_rate PARAMS ((void));
630 static int ix86_adjust_cost PARAMS ((rtx, rtx, rtx, int));
631 static void ix86_sched_init PARAMS ((FILE *, int, int));
632 static int ix86_sched_reorder PARAMS ((FILE *, int, rtx *, int *, int));
633 static int ix86_variable_issue PARAMS ((FILE *, int, rtx, int));
635 struct ix86_address
637 rtx base, index, disp;
638 HOST_WIDE_INT scale;
641 static int ix86_decompose_address PARAMS ((rtx, struct ix86_address *));
643 struct builtin_description;
644 static rtx ix86_expand_sse_comi PARAMS ((struct builtin_description *, tree,
645 rtx));
646 static rtx ix86_expand_sse_compare PARAMS ((struct builtin_description *, tree,
647 rtx));
648 static rtx ix86_expand_unop1_builtin PARAMS ((enum insn_code, tree, rtx));
649 static rtx ix86_expand_unop_builtin PARAMS ((enum insn_code, tree, rtx, int));
650 static rtx ix86_expand_binop_builtin PARAMS ((enum insn_code, tree, rtx));
651 static rtx ix86_expand_store_builtin PARAMS ((enum insn_code, tree, int));
652 static rtx safe_vector_operand PARAMS ((rtx, enum machine_mode));
653 static enum rtx_code ix86_fp_compare_code_to_integer PARAMS ((enum rtx_code));
654 static void ix86_fp_comparison_codes PARAMS ((enum rtx_code code,
655 enum rtx_code *,
656 enum rtx_code *,
657 enum rtx_code *));
658 static rtx ix86_expand_fp_compare PARAMS ((enum rtx_code, rtx, rtx, rtx,
659 rtx *, rtx *));
660 static int ix86_fp_comparison_arithmetics_cost PARAMS ((enum rtx_code code));
661 static int ix86_fp_comparison_fcomi_cost PARAMS ((enum rtx_code code));
662 static int ix86_fp_comparison_sahf_cost PARAMS ((enum rtx_code code));
663 static int ix86_fp_comparison_cost PARAMS ((enum rtx_code code));
664 static int ix86_save_reg PARAMS ((int, int));
665 static void ix86_compute_frame_layout PARAMS ((struct ix86_frame *));
666 static int ix86_comp_type_attributes PARAMS ((tree, tree));
667 const struct attribute_spec ix86_attribute_table[];
668 static tree ix86_handle_cdecl_attribute PARAMS ((tree *, tree, tree, int, bool *));
669 static tree ix86_handle_regparm_attribute PARAMS ((tree *, tree, tree, int, bool *));
671 #ifdef DO_GLOBAL_CTORS_BODY
672 static void ix86_svr3_asm_out_constructor PARAMS ((rtx, int));
673 #endif
674 #if defined(TARGET_ELF) && defined(TARGET_COFF)
675 static void sco_asm_named_section PARAMS ((const char *, unsigned int));
676 static void sco_asm_out_constructor PARAMS ((rtx, int));
677 #endif
678 /* Register class used for passing given 64bit part of the argument.
679 These represent classes as documented by the PS ABI, with the exception
680 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
681 use SF or DFmode move instead of DImode to avoid reformating penalties.
683 Similary we play games with INTEGERSI_CLASS to use cheaper SImode moves
684 whenever possible (upper half does contain padding).
686 enum x86_64_reg_class
688 X86_64_NO_CLASS,
689 X86_64_INTEGER_CLASS,
690 X86_64_INTEGERSI_CLASS,
691 X86_64_SSE_CLASS,
692 X86_64_SSESF_CLASS,
693 X86_64_SSEDF_CLASS,
694 X86_64_SSEUP_CLASS,
695 X86_64_X87_CLASS,
696 X86_64_X87UP_CLASS,
697 X86_64_MEMORY_CLASS
699 const char * const x86_64_reg_class_name[] =
700 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
702 #define MAX_CLASSES 4
703 static int classify_argument PARAMS ((enum machine_mode, tree,
704 enum x86_64_reg_class [MAX_CLASSES],
705 int));
706 static int examine_argument PARAMS ((enum machine_mode, tree, int, int *,
707 int *));
708 static rtx construct_container PARAMS ((enum machine_mode, tree, int, int, int,
709 int *, int));
710 static enum x86_64_reg_class merge_classes PARAMS ((enum x86_64_reg_class,
711 enum x86_64_reg_class));
713 /* Initialize the GCC target structure. */
714 #undef TARGET_ATTRIBUTE_TABLE
715 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
716 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
717 # undef TARGET_MERGE_DECL_ATTRIBUTES
718 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
719 #endif
721 #undef TARGET_COMP_TYPE_ATTRIBUTES
722 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
724 #undef TARGET_INIT_BUILTINS
725 #define TARGET_INIT_BUILTINS ix86_init_builtins
727 #undef TARGET_EXPAND_BUILTIN
728 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
730 #if defined (OSF_OS) || defined (TARGET_OSF1ELF)
731 static void ix86_osf_output_function_prologue PARAMS ((FILE *,
732 HOST_WIDE_INT));
733 # undef TARGET_ASM_FUNCTION_PROLOGUE
734 # define TARGET_ASM_FUNCTION_PROLOGUE ix86_osf_output_function_prologue
735 #endif
737 #undef TARGET_ASM_OPEN_PAREN
738 #define TARGET_ASM_OPEN_PAREN ""
739 #undef TARGET_ASM_CLOSE_PAREN
740 #define TARGET_ASM_CLOSE_PAREN ""
742 #undef TARGET_SCHED_ADJUST_COST
743 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
744 #undef TARGET_SCHED_ISSUE_RATE
745 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
746 #undef TARGET_SCHED_VARIABLE_ISSUE
747 #define TARGET_SCHED_VARIABLE_ISSUE ix86_variable_issue
748 #undef TARGET_SCHED_INIT
749 #define TARGET_SCHED_INIT ix86_sched_init
750 #undef TARGET_SCHED_REORDER
751 #define TARGET_SCHED_REORDER ix86_sched_reorder
753 struct gcc_target targetm = TARGET_INITIALIZER;
755 /* Sometimes certain combinations of command options do not make
756 sense on a particular target machine. You can define a macro
757 `OVERRIDE_OPTIONS' to take account of this. This macro, if
758 defined, is executed once just after all the command options have
759 been parsed.
761 Don't use this macro to turn on various extra optimizations for
762 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
764 void
765 override_options ()
767 int i;
768 /* Comes from final.c -- no real reason to change it. */
769 #define MAX_CODE_ALIGN 16
771 static struct ptt
773 struct processor_costs *cost; /* Processor costs */
774 int target_enable; /* Target flags to enable. */
775 int target_disable; /* Target flags to disable. */
776 int align_loop; /* Default alignments. */
777 int align_jump;
778 int align_func;
779 int branch_cost;
781 const processor_target_table[PROCESSOR_max] =
783 {&i386_cost, 0, 0, 2, 2, 2, 1},
784 {&i486_cost, 0, 0, 4, 4, 4, 1},
785 {&pentium_cost, 0, 0, -4, -4, -4, 1},
786 {&pentiumpro_cost, 0, 0, 4, -4, 4, 1},
787 {&k6_cost, 0, 0, -5, -5, 4, 1},
788 {&athlon_cost, 0, 0, 4, -4, 4, 1},
789 {&pentium4_cost, 0, 0, 2, 2, 2, 1}
792 static struct pta
794 const char *name; /* processor name or nickname. */
795 enum processor_type processor;
797 const processor_alias_table[] =
799 {"i386", PROCESSOR_I386},
800 {"i486", PROCESSOR_I486},
801 {"i586", PROCESSOR_PENTIUM},
802 {"pentium", PROCESSOR_PENTIUM},
803 {"i686", PROCESSOR_PENTIUMPRO},
804 {"pentiumpro", PROCESSOR_PENTIUMPRO},
805 {"k6", PROCESSOR_K6},
806 {"athlon", PROCESSOR_ATHLON},
807 {"pentium4", PROCESSOR_PENTIUM4},
810 int const pta_size = sizeof (processor_alias_table) / sizeof (struct pta);
812 #ifdef SUBTARGET_OVERRIDE_OPTIONS
813 SUBTARGET_OVERRIDE_OPTIONS;
814 #endif
816 ix86_arch = PROCESSOR_I386;
817 ix86_cpu = (enum processor_type) TARGET_CPU_DEFAULT;
819 if (ix86_cmodel_string != 0)
821 if (!strcmp (ix86_cmodel_string, "small"))
822 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
823 else if (flag_pic)
824 sorry ("Code model %s not supported in PIC mode", ix86_cmodel_string);
825 else if (!strcmp (ix86_cmodel_string, "32"))
826 ix86_cmodel = CM_32;
827 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
828 ix86_cmodel = CM_KERNEL;
829 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
830 ix86_cmodel = CM_MEDIUM;
831 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
832 ix86_cmodel = CM_LARGE;
833 else
834 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
836 else
838 ix86_cmodel = CM_32;
839 if (TARGET_64BIT)
840 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
842 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
843 error ("Code model `%s' not supported in the %s bit mode.",
844 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
845 if (ix86_cmodel == CM_LARGE)
846 sorry ("Code model `large' not supported yet.");
847 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
848 sorry ("%i-bit mode not compiled in.",
849 (target_flags & MASK_64BIT) ? 64 : 32);
851 if (ix86_arch_string != 0)
853 for (i = 0; i < pta_size; i++)
854 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
856 ix86_arch = processor_alias_table[i].processor;
857 /* Default cpu tuning to the architecture. */
858 ix86_cpu = ix86_arch;
859 break;
862 if (i == pta_size)
863 error ("bad value (%s) for -march= switch", ix86_arch_string);
866 if (ix86_cpu_string != 0)
868 for (i = 0; i < pta_size; i++)
869 if (! strcmp (ix86_cpu_string, processor_alias_table[i].name))
871 ix86_cpu = processor_alias_table[i].processor;
872 break;
874 if (i == pta_size)
875 error ("bad value (%s) for -mcpu= switch", ix86_cpu_string);
878 if (optimize_size)
879 ix86_cost = &size_cost;
880 else
881 ix86_cost = processor_target_table[ix86_cpu].cost;
882 target_flags |= processor_target_table[ix86_cpu].target_enable;
883 target_flags &= ~processor_target_table[ix86_cpu].target_disable;
885 /* Arrange to set up i386_stack_locals for all functions. */
886 init_machine_status = ix86_init_machine_status;
887 mark_machine_status = ix86_mark_machine_status;
888 free_machine_status = ix86_free_machine_status;
890 /* Validate -mregparm= value. */
891 if (ix86_regparm_string)
893 i = atoi (ix86_regparm_string);
894 if (i < 0 || i > REGPARM_MAX)
895 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
896 else
897 ix86_regparm = i;
899 else
900 if (TARGET_64BIT)
901 ix86_regparm = REGPARM_MAX;
903 /* If the user has provided any of the -malign-* options,
904 warn and use that value only if -falign-* is not set.
905 Remove this code in GCC 3.2 or later. */
906 if (ix86_align_loops_string)
908 warning ("-malign-loops is obsolete, use -falign-loops");
909 if (align_loops == 0)
911 i = atoi (ix86_align_loops_string);
912 if (i < 0 || i > MAX_CODE_ALIGN)
913 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
914 else
915 align_loops = 1 << i;
919 if (ix86_align_jumps_string)
921 warning ("-malign-jumps is obsolete, use -falign-jumps");
922 if (align_jumps == 0)
924 i = atoi (ix86_align_jumps_string);
925 if (i < 0 || i > MAX_CODE_ALIGN)
926 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
927 else
928 align_jumps = 1 << i;
932 if (ix86_align_funcs_string)
934 warning ("-malign-functions is obsolete, use -falign-functions");
935 if (align_functions == 0)
937 i = atoi (ix86_align_funcs_string);
938 if (i < 0 || i > MAX_CODE_ALIGN)
939 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
940 else
941 align_functions = 1 << i;
945 /* Default align_* from the processor table. */
946 #define abs(n) (n < 0 ? -n : n)
947 if (align_loops == 0)
948 align_loops = 1 << abs (processor_target_table[ix86_cpu].align_loop);
949 if (align_jumps == 0)
950 align_jumps = 1 << abs (processor_target_table[ix86_cpu].align_jump);
951 if (align_functions == 0)
952 align_functions = 1 << abs (processor_target_table[ix86_cpu].align_func);
954 /* Validate -mpreferred-stack-boundary= value, or provide default.
955 The default of 128 bits is for Pentium III's SSE __m128. */
956 ix86_preferred_stack_boundary = 128;
957 if (ix86_preferred_stack_boundary_string)
959 i = atoi (ix86_preferred_stack_boundary_string);
960 if (i < (TARGET_64BIT ? 3 : 2) || i > 31)
961 error ("-mpreferred-stack-boundary=%d is not between %d and 31", i,
962 TARGET_64BIT ? 3 : 2);
963 else
964 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
967 /* Validate -mbranch-cost= value, or provide default. */
968 ix86_branch_cost = processor_target_table[ix86_cpu].branch_cost;
969 if (ix86_branch_cost_string)
971 i = atoi (ix86_branch_cost_string);
972 if (i < 0 || i > 5)
973 error ("-mbranch-cost=%d is not between 0 and 5", i);
974 else
975 ix86_branch_cost = i;
978 /* Keep nonleaf frame pointers. */
979 if (TARGET_OMIT_LEAF_FRAME_POINTER)
980 flag_omit_frame_pointer = 1;
982 /* If we're doing fast math, we don't care about comparison order
983 wrt NaNs. This lets us use a shorter comparison sequence. */
984 if (flag_unsafe_math_optimizations)
985 target_flags &= ~MASK_IEEE_FP;
987 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
988 on by -msse. */
989 if (TARGET_SSE)
990 target_flags |= MASK_MMX;
992 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
993 if (TARGET_3DNOW)
995 target_flags |= MASK_MMX;
996 /* If we are targetting the Athlon architecture, enable the 3Dnow/MMX
997 extensions it adds. */
998 if (x86_3dnow_a & (1 << ix86_arch))
999 target_flags |= MASK_3DNOW_A;
1001 if ((x86_accumulate_outgoing_args & CPUMASK)
1002 && !(target_flags & MASK_NO_ACCUMULATE_OUTGOING_ARGS)
1003 && !optimize_size)
1004 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1006 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1008 char *p;
1009 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1010 p = strchr (internal_label_prefix, 'X');
1011 internal_label_prefix_len = p - internal_label_prefix;
1012 *p = '\0';
1016 void
1017 optimization_options (level, size)
1018 int level;
1019 int size ATTRIBUTE_UNUSED;
1021 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1022 make the problem with not enough registers even worse. */
1023 #ifdef INSN_SCHEDULING
1024 if (level > 1)
1025 flag_schedule_insns = 0;
1026 #endif
1027 if (TARGET_64BIT && optimize >= 1)
1028 flag_omit_frame_pointer = 1;
1029 if (TARGET_64BIT)
1030 flag_pcc_struct_return = 0;
1033 /* Table of valid machine attributes. */
1034 const struct attribute_spec ix86_attribute_table[] =
1036 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1037 /* Stdcall attribute says callee is responsible for popping arguments
1038 if they are not variable. */
1039 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1040 /* Cdecl attribute says the callee is a normal C declaration */
1041 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1042 /* Regparm attribute specifies how many integer arguments are to be
1043 passed in registers. */
1044 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
1045 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
1046 { "dllimport", 1, 1, false, false, false, ix86_handle_dll_attribute },
1047 { "dllexport", 1, 1, false, false, false, ix86_handle_dll_attribute },
1048 { "shared", 1, 1, true, false, false, ix86_handle_shared_attribute },
1049 #endif
1050 { NULL, 0, 0, false, false, false, NULL }
1053 /* Handle a "cdecl" or "stdcall" attribute;
1054 arguments as in struct attribute_spec.handler. */
1055 static tree
1056 ix86_handle_cdecl_attribute (node, name, args, flags, no_add_attrs)
1057 tree *node;
1058 tree name;
1059 tree args ATTRIBUTE_UNUSED;
1060 int flags ATTRIBUTE_UNUSED;
1061 bool *no_add_attrs;
1063 if (TREE_CODE (*node) != FUNCTION_TYPE
1064 && TREE_CODE (*node) != METHOD_TYPE
1065 && TREE_CODE (*node) != FIELD_DECL
1066 && TREE_CODE (*node) != TYPE_DECL)
1068 warning ("`%s' attribute only applies to functions",
1069 IDENTIFIER_POINTER (name));
1070 *no_add_attrs = true;
1073 if (TARGET_64BIT)
1075 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1076 *no_add_attrs = true;
1079 return NULL_TREE;
1082 /* Handle a "regparm" attribute;
1083 arguments as in struct attribute_spec.handler. */
1084 static tree
1085 ix86_handle_regparm_attribute (node, name, args, flags, no_add_attrs)
1086 tree *node;
1087 tree name;
1088 tree args;
1089 int flags ATTRIBUTE_UNUSED;
1090 bool *no_add_attrs;
1092 if (TREE_CODE (*node) != FUNCTION_TYPE
1093 && TREE_CODE (*node) != METHOD_TYPE
1094 && TREE_CODE (*node) != FIELD_DECL
1095 && TREE_CODE (*node) != TYPE_DECL)
1097 warning ("`%s' attribute only applies to functions",
1098 IDENTIFIER_POINTER (name));
1099 *no_add_attrs = true;
1101 else
1103 tree cst;
1105 cst = TREE_VALUE (args);
1106 if (TREE_CODE (cst) != INTEGER_CST)
1108 warning ("`%s' attribute requires an integer constant argument",
1109 IDENTIFIER_POINTER (name));
1110 *no_add_attrs = true;
1112 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1114 warning ("argument to `%s' attribute larger than %d",
1115 IDENTIFIER_POINTER (name), REGPARM_MAX);
1116 *no_add_attrs = true;
1120 return NULL_TREE;
1123 #if defined (OSF_OS) || defined (TARGET_OSF1ELF)
1125 /* Generate the assembly code for function entry. FILE is a stdio
1126 stream to output the code to. SIZE is an int: how many units of
1127 temporary storage to allocate.
1129 Refer to the array `regs_ever_live' to determine which registers to
1130 save; `regs_ever_live[I]' is nonzero if register number I is ever
1131 used in the function. This function is responsible for knowing
1132 which registers should not be saved even if used.
1134 We override it here to allow for the new profiling code to go before
1135 the prologue and the old mcount code to go after the prologue (and
1136 after %ebx has been set up for ELF shared library support). */
1138 static void
1139 ix86_osf_output_function_prologue (file, size)
1140 FILE *file;
1141 HOST_WIDE_INT size;
1143 char *prefix = "";
1144 char *lprefix = LPREFIX;
1145 int labelno = profile_label_no;
1147 #ifdef OSF_OS
1149 if (TARGET_UNDERSCORES)
1150 prefix = "_";
1152 if (profile_flag && OSF_PROFILE_BEFORE_PROLOGUE)
1154 if (!flag_pic && !HALF_PIC_P ())
1156 fprintf (file, "\tmovl $%sP%d,%%edx\n", lprefix, labelno);
1157 fprintf (file, "\tcall *%s_mcount_ptr\n", prefix);
1160 else if (HALF_PIC_P ())
1162 rtx symref;
1164 HALF_PIC_EXTERNAL ("_mcount_ptr");
1165 symref = HALF_PIC_PTR (gen_rtx_SYMBOL_REF (Pmode,
1166 "_mcount_ptr"));
1168 fprintf (file, "\tmovl $%sP%d,%%edx\n", lprefix, labelno);
1169 fprintf (file, "\tmovl %s%s,%%eax\n", prefix,
1170 XSTR (symref, 0));
1171 fprintf (file, "\tcall *(%%eax)\n");
1174 else
1176 static int call_no = 0;
1178 fprintf (file, "\tcall %sPc%d\n", lprefix, call_no);
1179 fprintf (file, "%sPc%d:\tpopl %%eax\n", lprefix, call_no);
1180 fprintf (file, "\taddl $_GLOBAL_OFFSET_TABLE_+[.-%sPc%d],%%eax\n",
1181 lprefix, call_no++);
1182 fprintf (file, "\tleal %sP%d@GOTOFF(%%eax),%%edx\n",
1183 lprefix, labelno);
1184 fprintf (file, "\tmovl %s_mcount_ptr@GOT(%%eax),%%eax\n",
1185 prefix);
1186 fprintf (file, "\tcall *(%%eax)\n");
1190 #else /* !OSF_OS */
1192 if (profile_flag && OSF_PROFILE_BEFORE_PROLOGUE)
1194 if (!flag_pic)
1196 fprintf (file, "\tmovl $%sP%d,%%edx\n", lprefix, labelno);
1197 fprintf (file, "\tcall *%s_mcount_ptr\n", prefix);
1200 else
1202 static int call_no = 0;
1204 fprintf (file, "\tcall %sPc%d\n", lprefix, call_no);
1205 fprintf (file, "%sPc%d:\tpopl %%eax\n", lprefix, call_no);
1206 fprintf (file, "\taddl $_GLOBAL_OFFSET_TABLE_+[.-%sPc%d],%%eax\n",
1207 lprefix, call_no++);
1208 fprintf (file, "\tleal %sP%d@GOTOFF(%%eax),%%edx\n",
1209 lprefix, labelno);
1210 fprintf (file, "\tmovl %s_mcount_ptr@GOT(%%eax),%%eax\n",
1211 prefix);
1212 fprintf (file, "\tcall *(%%eax)\n");
1215 #endif /* !OSF_OS */
1217 function_prologue (file, size);
1220 #endif /* OSF_OS || TARGET_OSF1ELF */
1222 /* Return 0 if the attributes for two types are incompatible, 1 if they
1223 are compatible, and 2 if they are nearly compatible (which causes a
1224 warning to be generated). */
1226 static int
1227 ix86_comp_type_attributes (type1, type2)
1228 tree type1;
1229 tree type2;
1231 /* Check for mismatch of non-default calling convention. */
1232 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
1234 if (TREE_CODE (type1) != FUNCTION_TYPE)
1235 return 1;
1237 /* Check for mismatched return types (cdecl vs stdcall). */
1238 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1239 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
1240 return 0;
1241 return 1;
1244 /* Value is the number of bytes of arguments automatically
1245 popped when returning from a subroutine call.
1246 FUNDECL is the declaration node of the function (as a tree),
1247 FUNTYPE is the data type of the function (as a tree),
1248 or for a library call it is an identifier node for the subroutine name.
1249 SIZE is the number of bytes of arguments passed on the stack.
1251 On the 80386, the RTD insn may be used to pop them if the number
1252 of args is fixed, but if the number is variable then the caller
1253 must pop them all. RTD can't be used for library calls now
1254 because the library is compiled with the Unix compiler.
1255 Use of RTD is a selectable option, since it is incompatible with
1256 standard Unix calling sequences. If the option is not selected,
1257 the caller must always pop the args.
1259 The attribute stdcall is equivalent to RTD on a per module basis. */
1262 ix86_return_pops_args (fundecl, funtype, size)
1263 tree fundecl;
1264 tree funtype;
1265 int size;
1267 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
1269 /* Cdecl functions override -mrtd, and never pop the stack. */
1270 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
1272 /* Stdcall functions will pop the stack if not variable args. */
1273 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype)))
1274 rtd = 1;
1276 if (rtd
1277 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
1278 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1279 == void_type_node)))
1280 return size;
1283 /* Lose any fake structure return argument. */
1284 if (aggregate_value_p (TREE_TYPE (funtype))
1285 && !TARGET_64BIT)
1286 return GET_MODE_SIZE (Pmode);
1288 return 0;
1291 /* Argument support functions. */
1293 /* Return true when register may be used to pass function parameters. */
1294 bool
1295 ix86_function_arg_regno_p (regno)
1296 int regno;
1298 int i;
1299 if (!TARGET_64BIT)
1300 return regno < REGPARM_MAX || (TARGET_SSE && SSE_REGNO_P (regno));
1301 if (SSE_REGNO_P (regno) && TARGET_SSE)
1302 return true;
1303 /* RAX is used as hidden argument to va_arg functions. */
1304 if (!regno)
1305 return true;
1306 for (i = 0; i < REGPARM_MAX; i++)
1307 if (regno == x86_64_int_parameter_registers[i])
1308 return true;
1309 return false;
1312 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1313 for a call to a function whose data type is FNTYPE.
1314 For a library call, FNTYPE is 0. */
1316 void
1317 init_cumulative_args (cum, fntype, libname)
1318 CUMULATIVE_ARGS *cum; /* Argument info to initialize */
1319 tree fntype; /* tree ptr for function decl */
1320 rtx libname; /* SYMBOL_REF of library name or 0 */
1322 static CUMULATIVE_ARGS zero_cum;
1323 tree param, next_param;
1325 if (TARGET_DEBUG_ARG)
1327 fprintf (stderr, "\ninit_cumulative_args (");
1328 if (fntype)
1329 fprintf (stderr, "fntype code = %s, ret code = %s",
1330 tree_code_name[(int) TREE_CODE (fntype)],
1331 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
1332 else
1333 fprintf (stderr, "no fntype");
1335 if (libname)
1336 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1339 *cum = zero_cum;
1341 /* Set up the number of registers to use for passing arguments. */
1342 cum->nregs = ix86_regparm;
1343 cum->sse_nregs = SSE_REGPARM_MAX;
1344 if (fntype && !TARGET_64BIT)
1346 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype));
1348 if (attr)
1349 cum->nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1351 cum->maybe_vaarg = false;
1353 /* Determine if this function has variable arguments. This is
1354 indicated by the last argument being 'void_type_mode' if there
1355 are no variable arguments. If there are variable arguments, then
1356 we won't pass anything in registers */
1358 if (cum->nregs)
1360 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
1361 param != 0; param = next_param)
1363 next_param = TREE_CHAIN (param);
1364 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
1366 if (!TARGET_64BIT)
1367 cum->nregs = 0;
1368 cum->maybe_vaarg = true;
1372 if ((!fntype && !libname)
1373 || (fntype && !TYPE_ARG_TYPES (fntype)))
1374 cum->maybe_vaarg = 1;
1376 if (TARGET_DEBUG_ARG)
1377 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
1379 return;
1382 /* x86-64 register passing impleemntation. See x86-64 ABI for details. Goal
1383 of this code is to classify each 8bytes of incomming argument by the register
1384 class and assign registers accordingly. */
1386 /* Return the union class of CLASS1 and CLASS2.
1387 See the x86-64 PS ABI for details. */
1389 static enum x86_64_reg_class
1390 merge_classes (class1, class2)
1391 enum x86_64_reg_class class1, class2;
1393 /* Rule #1: If both classes are equal, this is the resulting class. */
1394 if (class1 == class2)
1395 return class1;
1397 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
1398 the other class. */
1399 if (class1 == X86_64_NO_CLASS)
1400 return class2;
1401 if (class2 == X86_64_NO_CLASS)
1402 return class1;
1404 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
1405 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
1406 return X86_64_MEMORY_CLASS;
1408 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
1409 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
1410 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
1411 return X86_64_INTEGERSI_CLASS;
1412 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
1413 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
1414 return X86_64_INTEGER_CLASS;
1416 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
1417 if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS
1418 || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS)
1419 return X86_64_MEMORY_CLASS;
1421 /* Rule #6: Otherwise class SSE is used. */
1422 return X86_64_SSE_CLASS;
1425 /* Classify the argument of type TYPE and mode MODE.
1426 CLASSES will be filled by the register class used to pass each word
1427 of the operand. The number of words is returned. In case the parameter
1428 should be passed in memory, 0 is returned. As a special case for zero
1429 sized containers, classes[0] will be NO_CLASS and 1 is returned.
1431 BIT_OFFSET is used internally for handling records and specifies offset
1432 of the offset in bits modulo 256 to avoid overflow cases.
1434 See the x86-64 PS ABI for details.
1437 static int
1438 classify_argument (mode, type, classes, bit_offset)
1439 enum machine_mode mode;
1440 tree type;
1441 enum x86_64_reg_class classes[MAX_CLASSES];
1442 int bit_offset;
1444 int bytes =
1445 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1446 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1448 if (type && AGGREGATE_TYPE_P (type))
1450 int i;
1451 tree field;
1452 enum x86_64_reg_class subclasses[MAX_CLASSES];
1454 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
1455 if (bytes > 16)
1456 return 0;
1458 for (i = 0; i < words; i++)
1459 classes[i] = X86_64_NO_CLASS;
1461 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
1462 signalize memory class, so handle it as special case. */
1463 if (!words)
1465 classes[0] = X86_64_NO_CLASS;
1466 return 1;
1469 /* Classify each field of record and merge classes. */
1470 if (TREE_CODE (type) == RECORD_TYPE)
1472 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1474 if (TREE_CODE (field) == FIELD_DECL)
1476 int num;
1478 /* Bitfields are always classified as integer. Handle them
1479 early, since later code would consider them to be
1480 misaligned integers. */
1481 if (DECL_BIT_FIELD (field))
1483 for (i = int_bit_position (field) / 8 / 8;
1484 i < (int_bit_position (field)
1485 + tree_low_cst (DECL_SIZE (field), 0)
1486 + 63) / 8 / 8; i++)
1487 classes[i] =
1488 merge_classes (X86_64_INTEGER_CLASS,
1489 classes[i]);
1491 else
1493 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
1494 TREE_TYPE (field), subclasses,
1495 (int_bit_position (field)
1496 + bit_offset) % 256);
1497 if (!num)
1498 return 0;
1499 for (i = 0; i < num; i++)
1501 int pos =
1502 (int_bit_position (field) + bit_offset) / 8 / 8;
1503 classes[i + pos] =
1504 merge_classes (subclasses[i], classes[i + pos]);
1510 /* Arrays are handled as small records. */
1511 else if (TREE_CODE (type) == ARRAY_TYPE)
1513 int num;
1514 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
1515 TREE_TYPE (type), subclasses, bit_offset);
1516 if (!num)
1517 return 0;
1519 /* The partial classes are now full classes. */
1520 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
1521 subclasses[0] = X86_64_SSE_CLASS;
1522 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
1523 subclasses[0] = X86_64_INTEGER_CLASS;
1525 for (i = 0; i < words; i++)
1526 classes[i] = subclasses[i % num];
1528 /* Unions are similar to RECORD_TYPE but offset is always 0. */
1529 else if (TREE_CODE (type) == UNION_TYPE)
1531 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1533 if (TREE_CODE (field) == FIELD_DECL)
1535 int num;
1536 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
1537 TREE_TYPE (field), subclasses,
1538 bit_offset);
1539 if (!num)
1540 return 0;
1541 for (i = 0; i < num; i++)
1542 classes[i] = merge_classes (subclasses[i], classes[i]);
1546 else
1547 abort ();
1549 /* Final merger cleanup. */
1550 for (i = 0; i < words; i++)
1552 /* If one class is MEMORY, everything should be passed in
1553 memory. */
1554 if (classes[i] == X86_64_MEMORY_CLASS)
1555 return 0;
1557 /* The X86_64_SSEUP_CLASS should be always preceeded by
1558 X86_64_SSE_CLASS. */
1559 if (classes[i] == X86_64_SSEUP_CLASS
1560 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
1561 classes[i] = X86_64_SSE_CLASS;
1563 /* X86_64_X87UP_CLASS should be preceeded by X86_64_X87_CLASS. */
1564 if (classes[i] == X86_64_X87UP_CLASS
1565 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
1566 classes[i] = X86_64_SSE_CLASS;
1568 return words;
1571 /* Compute alignment needed. We align all types to natural boundaries with
1572 exception of XFmode that is aligned to 64bits. */
1573 if (mode != VOIDmode && mode != BLKmode)
1575 int mode_alignment = GET_MODE_BITSIZE (mode);
1577 if (mode == XFmode)
1578 mode_alignment = 128;
1579 else if (mode == XCmode)
1580 mode_alignment = 256;
1581 /* Missalignmed fields are always returned in memory. */
1582 if (bit_offset % mode_alignment)
1583 return 0;
1586 /* Classification of atomic types. */
1587 switch (mode)
1589 case DImode:
1590 case SImode:
1591 case HImode:
1592 case QImode:
1593 case CSImode:
1594 case CHImode:
1595 case CQImode:
1596 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
1597 classes[0] = X86_64_INTEGERSI_CLASS;
1598 else
1599 classes[0] = X86_64_INTEGER_CLASS;
1600 return 1;
1601 case CDImode:
1602 case TImode:
1603 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
1604 return 2;
1605 case CTImode:
1606 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
1607 classes[2] = classes[3] = X86_64_INTEGER_CLASS;
1608 return 4;
1609 case SFmode:
1610 if (!(bit_offset % 64))
1611 classes[0] = X86_64_SSESF_CLASS;
1612 else
1613 classes[0] = X86_64_SSE_CLASS;
1614 return 1;
1615 case DFmode:
1616 classes[0] = X86_64_SSEDF_CLASS;
1617 return 1;
1618 case TFmode:
1619 classes[0] = X86_64_X87_CLASS;
1620 classes[1] = X86_64_X87UP_CLASS;
1621 return 2;
1622 case TCmode:
1623 classes[0] = X86_64_X87_CLASS;
1624 classes[1] = X86_64_X87UP_CLASS;
1625 classes[2] = X86_64_X87_CLASS;
1626 classes[3] = X86_64_X87UP_CLASS;
1627 return 4;
1628 case DCmode:
1629 classes[0] = X86_64_SSEDF_CLASS;
1630 classes[1] = X86_64_SSEDF_CLASS;
1631 return 2;
1632 case SCmode:
1633 classes[0] = X86_64_SSE_CLASS;
1634 return 1;
1635 case BLKmode:
1636 return 0;
1637 default:
1638 abort ();
1642 /* Examine the argument and return set number of register required in each
1643 class. Return 0 ifif parameter should be passed in memory. */
1644 static int
1645 examine_argument (mode, type, in_return, int_nregs, sse_nregs)
1646 enum machine_mode mode;
1647 tree type;
1648 int *int_nregs, *sse_nregs;
1649 int in_return;
1651 enum x86_64_reg_class class[MAX_CLASSES];
1652 int n = classify_argument (mode, type, class, 0);
1654 *int_nregs = 0;
1655 *sse_nregs = 0;
1656 if (!n)
1657 return 0;
1658 for (n--; n >= 0; n--)
1659 switch (class[n])
1661 case X86_64_INTEGER_CLASS:
1662 case X86_64_INTEGERSI_CLASS:
1663 (*int_nregs)++;
1664 break;
1665 case X86_64_SSE_CLASS:
1666 case X86_64_SSESF_CLASS:
1667 case X86_64_SSEDF_CLASS:
1668 (*sse_nregs)++;
1669 break;
1670 case X86_64_NO_CLASS:
1671 case X86_64_SSEUP_CLASS:
1672 break;
1673 case X86_64_X87_CLASS:
1674 case X86_64_X87UP_CLASS:
1675 if (!in_return)
1676 return 0;
1677 break;
1678 case X86_64_MEMORY_CLASS:
1679 abort ();
1681 return 1;
1683 /* Construct container for the argument used by GCC interface. See
1684 FUNCTION_ARG for the detailed description. */
1685 static rtx
1686 construct_container (mode, type, in_return, nintregs, nsseregs, intreg, sse_regno)
1687 enum machine_mode mode;
1688 tree type;
1689 int in_return;
1690 int nintregs, nsseregs;
1691 int *intreg, sse_regno;
1693 enum machine_mode tmpmode;
1694 int bytes =
1695 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1696 enum x86_64_reg_class class[MAX_CLASSES];
1697 int n;
1698 int i;
1699 int nexps = 0;
1700 int needed_sseregs, needed_intregs;
1701 rtx exp[MAX_CLASSES];
1702 rtx ret;
1704 n = classify_argument (mode, type, class, 0);
1705 if (TARGET_DEBUG_ARG)
1707 if (!n)
1708 fprintf (stderr, "Memory class\n");
1709 else
1711 fprintf (stderr, "Classes:");
1712 for (i = 0; i < n; i++)
1714 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
1716 fprintf (stderr, "\n");
1719 if (!n)
1720 return NULL;
1721 if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs))
1722 return NULL;
1723 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
1724 return NULL;
1726 /* First construct simple cases. Avoid SCmode, since we want to use
1727 single register to pass this type. */
1728 if (n == 1 && mode != SCmode)
1729 switch (class[0])
1731 case X86_64_INTEGER_CLASS:
1732 case X86_64_INTEGERSI_CLASS:
1733 return gen_rtx_REG (mode, intreg[0]);
1734 case X86_64_SSE_CLASS:
1735 case X86_64_SSESF_CLASS:
1736 case X86_64_SSEDF_CLASS:
1737 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
1738 case X86_64_X87_CLASS:
1739 return gen_rtx_REG (mode, FIRST_STACK_REG);
1740 case X86_64_NO_CLASS:
1741 /* Zero sized array, struct or class. */
1742 return NULL;
1743 default:
1744 abort ();
1746 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS)
1747 return gen_rtx_REG (TImode, SSE_REGNO (sse_regno));
1748 if (n == 2
1749 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
1750 return gen_rtx_REG (TFmode, FIRST_STACK_REG);
1751 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
1752 && class[1] == X86_64_INTEGER_CLASS
1753 && (mode == CDImode || mode == TImode)
1754 && intreg[0] + 1 == intreg[1])
1755 return gen_rtx_REG (mode, intreg[0]);
1756 if (n == 4
1757 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS
1758 && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS)
1759 return gen_rtx_REG (TCmode, FIRST_STACK_REG);
1761 /* Otherwise figure out the entries of the PARALLEL. */
1762 for (i = 0; i < n; i++)
1764 switch (class[i])
1766 case X86_64_NO_CLASS:
1767 break;
1768 case X86_64_INTEGER_CLASS:
1769 case X86_64_INTEGERSI_CLASS:
1770 /* Merge TImodes on aligned occassions here too. */
1771 if (i * 8 + 8 > bytes)
1772 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
1773 else if (class[i] == X86_64_INTEGERSI_CLASS)
1774 tmpmode = SImode;
1775 else
1776 tmpmode = DImode;
1777 /* We've requested 24 bytes we don't have mode for. Use DImode. */
1778 if (tmpmode == BLKmode)
1779 tmpmode = DImode;
1780 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
1781 gen_rtx_REG (tmpmode, *intreg),
1782 GEN_INT (i*8));
1783 intreg++;
1784 break;
1785 case X86_64_SSESF_CLASS:
1786 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
1787 gen_rtx_REG (SFmode,
1788 SSE_REGNO (sse_regno)),
1789 GEN_INT (i*8));
1790 sse_regno++;
1791 break;
1792 case X86_64_SSEDF_CLASS:
1793 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
1794 gen_rtx_REG (DFmode,
1795 SSE_REGNO (sse_regno)),
1796 GEN_INT (i*8));
1797 sse_regno++;
1798 break;
1799 case X86_64_SSE_CLASS:
1800 if (i < n && class[i + 1] == X86_64_SSEUP_CLASS)
1801 tmpmode = TImode, i++;
1802 else
1803 tmpmode = DImode;
1804 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
1805 gen_rtx_REG (tmpmode,
1806 SSE_REGNO (sse_regno)),
1807 GEN_INT (i*8));
1808 sse_regno++;
1809 break;
1810 default:
1811 abort ();
1814 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
1815 for (i = 0; i < nexps; i++)
1816 XVECEXP (ret, 0, i) = exp [i];
1817 return ret;
1820 /* Update the data in CUM to advance over an argument
1821 of mode MODE and data type TYPE.
1822 (TYPE is null for libcalls where that information may not be available.) */
1824 void
1825 function_arg_advance (cum, mode, type, named)
1826 CUMULATIVE_ARGS *cum; /* current arg information */
1827 enum machine_mode mode; /* current arg mode */
1828 tree type; /* type of the argument or 0 if lib support */
1829 int named; /* whether or not the argument was named */
1831 int bytes =
1832 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1833 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1835 if (TARGET_DEBUG_ARG)
1836 fprintf (stderr,
1837 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
1838 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
1839 if (TARGET_64BIT)
1841 int int_nregs, sse_nregs;
1842 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
1843 cum->words += words;
1844 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
1846 cum->nregs -= int_nregs;
1847 cum->sse_nregs -= sse_nregs;
1848 cum->regno += int_nregs;
1849 cum->sse_regno += sse_nregs;
1851 else
1852 cum->words += words;
1854 else
1856 if (TARGET_SSE && mode == TImode)
1858 cum->sse_words += words;
1859 cum->sse_nregs -= 1;
1860 cum->sse_regno += 1;
1861 if (cum->sse_nregs <= 0)
1863 cum->sse_nregs = 0;
1864 cum->sse_regno = 0;
1867 else
1869 cum->words += words;
1870 cum->nregs -= words;
1871 cum->regno += words;
1873 if (cum->nregs <= 0)
1875 cum->nregs = 0;
1876 cum->regno = 0;
1880 return;
1883 /* Define where to put the arguments to a function.
1884 Value is zero to push the argument on the stack,
1885 or a hard register in which to store the argument.
1887 MODE is the argument's machine mode.
1888 TYPE is the data type of the argument (as a tree).
1889 This is null for libcalls where that information may
1890 not be available.
1891 CUM is a variable of type CUMULATIVE_ARGS which gives info about
1892 the preceding args and about the function being called.
1893 NAMED is nonzero if this argument is a named parameter
1894 (otherwise it is an extra parameter matching an ellipsis). */
1896 struct rtx_def *
1897 function_arg (cum, mode, type, named)
1898 CUMULATIVE_ARGS *cum; /* current arg information */
1899 enum machine_mode mode; /* current arg mode */
1900 tree type; /* type of the argument or 0 if lib support */
1901 int named; /* != 0 for normal args, == 0 for ... args */
1903 rtx ret = NULL_RTX;
1904 int bytes =
1905 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1906 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1908 /* Handle an hidden AL argument containing number of registers for varargs
1909 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
1910 any AL settings. */
1911 if (mode == VOIDmode)
1913 if (TARGET_64BIT)
1914 return GEN_INT (cum->maybe_vaarg
1915 ? (cum->sse_nregs < 0
1916 ? SSE_REGPARM_MAX
1917 : cum->sse_regno)
1918 : -1);
1919 else
1920 return constm1_rtx;
1922 if (TARGET_64BIT)
1923 ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs,
1924 &x86_64_int_parameter_registers [cum->regno],
1925 cum->sse_regno);
1926 else
1927 switch (mode)
1929 /* For now, pass fp/complex values on the stack. */
1930 default:
1931 break;
1933 case BLKmode:
1934 case DImode:
1935 case SImode:
1936 case HImode:
1937 case QImode:
1938 if (words <= cum->nregs)
1939 ret = gen_rtx_REG (mode, cum->regno);
1940 break;
1941 case TImode:
1942 if (cum->sse_nregs)
1943 ret = gen_rtx_REG (mode, cum->sse_regno);
1944 break;
1947 if (TARGET_DEBUG_ARG)
1949 fprintf (stderr,
1950 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d",
1951 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
1953 if (ret)
1954 fprintf (stderr, ", reg=%%e%s", reg_names[ REGNO(ret) ]);
1955 else
1956 fprintf (stderr, ", stack");
1958 fprintf (stderr, " )\n");
1961 return ret;
1964 /* Gives the alignment boundary, in bits, of an argument with the specified mode
1965 and type. */
1968 ix86_function_arg_boundary (mode, type)
1969 enum machine_mode mode;
1970 tree type;
1972 int align;
1973 if (!TARGET_64BIT)
1974 return PARM_BOUNDARY;
1975 if (type)
1976 align = TYPE_ALIGN (type);
1977 else
1978 align = GET_MODE_ALIGNMENT (mode);
1979 if (align < PARM_BOUNDARY)
1980 align = PARM_BOUNDARY;
1981 if (align > 128)
1982 align = 128;
1983 return align;
1986 /* Return true if N is a possible register number of function value. */
1987 bool
1988 ix86_function_value_regno_p (regno)
1989 int regno;
1991 if (!TARGET_64BIT)
1993 return ((regno) == 0
1994 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
1995 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
1997 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
1998 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
1999 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
2002 /* Define how to find the value returned by a function.
2003 VALTYPE is the data type of the value (as a tree).
2004 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2005 otherwise, FUNC is 0. */
2007 ix86_function_value (valtype)
2008 tree valtype;
2010 if (TARGET_64BIT)
2012 rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1,
2013 REGPARM_MAX, SSE_REGPARM_MAX,
2014 x86_64_int_return_registers, 0);
2015 /* For zero sized structures, construct_continer return NULL, but we need
2016 to keep rest of compiler happy by returning meaningfull value. */
2017 if (!ret)
2018 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
2019 return ret;
2021 else
2022 return gen_rtx_REG (TYPE_MODE (valtype), VALUE_REGNO (TYPE_MODE (valtype)));
2025 /* Return false ifif type is returned in memory. */
2027 ix86_return_in_memory (type)
2028 tree type;
2030 int needed_intregs, needed_sseregs;
2031 if (TARGET_64BIT)
2033 return !examine_argument (TYPE_MODE (type), type, 1,
2034 &needed_intregs, &needed_sseregs);
2036 else
2038 if (TYPE_MODE (type) == BLKmode
2039 || (VECTOR_MODE_P (TYPE_MODE (type))
2040 && int_size_in_bytes (type) == 8)
2041 || (int_size_in_bytes (type) > 12 && TYPE_MODE (type) != TImode
2042 && TYPE_MODE (type) != TFmode
2043 && !VECTOR_MODE_P (TYPE_MODE (type))))
2044 return 1;
2045 return 0;
2049 /* Define how to find the value returned by a library function
2050 assuming the value has mode MODE. */
2052 ix86_libcall_value (mode)
2053 enum machine_mode mode;
2055 if (TARGET_64BIT)
2057 switch (mode)
2059 case SFmode:
2060 case SCmode:
2061 case DFmode:
2062 case DCmode:
2063 return gen_rtx_REG (mode, FIRST_SSE_REG);
2064 case TFmode:
2065 case TCmode:
2066 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
2067 default:
2068 return gen_rtx_REG (mode, 0);
2071 else
2072 return gen_rtx_REG (mode, VALUE_REGNO (mode));
2075 /* Create the va_list data type. */
2077 tree
2078 ix86_build_va_list ()
2080 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
2082 /* For i386 we use plain pointer to argument area. */
2083 if (!TARGET_64BIT)
2084 return build_pointer_type (char_type_node);
2086 record = make_lang_type (RECORD_TYPE);
2087 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
2089 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
2090 unsigned_type_node);
2091 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
2092 unsigned_type_node);
2093 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
2094 ptr_type_node);
2095 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
2096 ptr_type_node);
2098 DECL_FIELD_CONTEXT (f_gpr) = record;
2099 DECL_FIELD_CONTEXT (f_fpr) = record;
2100 DECL_FIELD_CONTEXT (f_ovf) = record;
2101 DECL_FIELD_CONTEXT (f_sav) = record;
2103 TREE_CHAIN (record) = type_decl;
2104 TYPE_NAME (record) = type_decl;
2105 TYPE_FIELDS (record) = f_gpr;
2106 TREE_CHAIN (f_gpr) = f_fpr;
2107 TREE_CHAIN (f_fpr) = f_ovf;
2108 TREE_CHAIN (f_ovf) = f_sav;
2110 layout_type (record);
2112 /* The correct type is an array type of one element. */
2113 return build_array_type (record, build_index_type (size_zero_node));
2116 /* Perform any needed actions needed for a function that is receiving a
2117 variable number of arguments.
2119 CUM is as above.
2121 MODE and TYPE are the mode and type of the current parameter.
2123 PRETEND_SIZE is a variable that should be set to the amount of stack
2124 that must be pushed by the prolog to pretend that our caller pushed
2127 Normally, this macro will push all remaining incoming registers on the
2128 stack and set PRETEND_SIZE to the length of the registers pushed. */
2130 void
2131 ix86_setup_incoming_varargs (cum, mode, type, pretend_size, no_rtl)
2132 CUMULATIVE_ARGS *cum;
2133 enum machine_mode mode;
2134 tree type;
2135 int *pretend_size ATTRIBUTE_UNUSED;
2136 int no_rtl;
2139 CUMULATIVE_ARGS next_cum;
2140 rtx save_area = NULL_RTX, mem;
2141 rtx label;
2142 rtx label_ref;
2143 rtx tmp_reg;
2144 rtx nsse_reg;
2145 int set;
2146 tree fntype;
2147 int stdarg_p;
2148 int i;
2150 if (!TARGET_64BIT)
2151 return;
2153 /* Indicate to allocate space on the stack for varargs save area. */
2154 ix86_save_varrargs_registers = 1;
2156 fntype = TREE_TYPE (current_function_decl);
2157 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
2158 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
2159 != void_type_node));
2161 /* For varargs, we do not want to skip the dummy va_dcl argument.
2162 For stdargs, we do want to skip the last named argument. */
2163 next_cum = *cum;
2164 if (stdarg_p)
2165 function_arg_advance (&next_cum, mode, type, 1);
2167 if (!no_rtl)
2168 save_area = frame_pointer_rtx;
2170 set = get_varargs_alias_set ();
2172 for (i = next_cum.regno; i < ix86_regparm; i++)
2174 mem = gen_rtx_MEM (Pmode,
2175 plus_constant (save_area, i * UNITS_PER_WORD));
2176 MEM_ALIAS_SET (mem) = set;
2177 emit_move_insn (mem, gen_rtx_REG (Pmode,
2178 x86_64_int_parameter_registers[i]));
2181 if (next_cum.sse_nregs)
2183 /* Now emit code to save SSE registers. The AX parameter contains number
2184 of SSE parameter regsiters used to call this function. We use
2185 sse_prologue_save insn template that produces computed jump across
2186 SSE saves. We need some preparation work to get this working. */
2188 label = gen_label_rtx ();
2189 label_ref = gen_rtx_LABEL_REF (Pmode, label);
2191 /* Compute address to jump to :
2192 label - 5*eax + nnamed_sse_arguments*5 */
2193 tmp_reg = gen_reg_rtx (Pmode);
2194 nsse_reg = gen_reg_rtx (Pmode);
2195 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
2196 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2197 gen_rtx_MULT (VOIDmode, nsse_reg,
2198 GEN_INT (4))));
2199 if (next_cum.sse_regno)
2200 emit_move_insn
2201 (nsse_reg,
2202 gen_rtx_CONST (DImode,
2203 gen_rtx_PLUS (DImode,
2204 label_ref,
2205 GEN_INT (next_cum.sse_regno * 4))));
2206 else
2207 emit_move_insn (nsse_reg, label_ref);
2208 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
2210 /* Compute address of memory block we save into. We always use pointer
2211 pointing 127 bytes after first byte to store - this is needed to keep
2212 instruction size limited by 4 bytes. */
2213 tmp_reg = gen_reg_rtx (Pmode);
2214 emit_insn (gen_rtx_SET(VOIDmode, tmp_reg,
2215 plus_constant (save_area, 8 * REGPARM_MAX + 127)));
2216 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
2217 MEM_ALIAS_SET (mem) = set;
2219 /* And finally do the dirty job! */
2220 emit_insn (gen_sse_prologue_save (mem, nsse_reg, GEN_INT (next_cum.sse_regno),
2221 label));
2226 /* Implement va_start. */
2228 void
2229 ix86_va_start (stdarg_p, valist, nextarg)
2230 int stdarg_p;
2231 tree valist;
2232 rtx nextarg;
2234 HOST_WIDE_INT words, n_gpr, n_fpr;
2235 tree f_gpr, f_fpr, f_ovf, f_sav;
2236 tree gpr, fpr, ovf, sav, t;
2238 /* Only 64bit target needs something special. */
2239 if (!TARGET_64BIT)
2241 std_expand_builtin_va_start (stdarg_p, valist, nextarg);
2242 return;
2245 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2246 f_fpr = TREE_CHAIN (f_gpr);
2247 f_ovf = TREE_CHAIN (f_fpr);
2248 f_sav = TREE_CHAIN (f_ovf);
2250 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2251 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2252 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2253 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2254 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2256 /* Count number of gp and fp argument registers used. */
2257 words = current_function_args_info.words;
2258 n_gpr = current_function_args_info.regno;
2259 n_fpr = current_function_args_info.sse_regno;
2261 if (TARGET_DEBUG_ARG)
2262 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
2263 words, n_gpr, n_fpr);
2265 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
2266 build_int_2 (n_gpr * 8, 0));
2267 TREE_SIDE_EFFECTS (t) = 1;
2268 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2270 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
2271 build_int_2 (n_fpr * 16 + 8*REGPARM_MAX, 0));
2272 TREE_SIDE_EFFECTS (t) = 1;
2273 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2275 /* Find the overflow area. */
2276 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
2277 if (words != 0)
2278 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
2279 build_int_2 (words * UNITS_PER_WORD, 0));
2280 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2281 TREE_SIDE_EFFECTS (t) = 1;
2282 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2284 /* Find the register save area.
2285 Prologue of the function save it right above stack frame. */
2286 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
2287 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
2288 TREE_SIDE_EFFECTS (t) = 1;
2289 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2292 /* Implement va_arg. */
2294 ix86_va_arg (valist, type)
2295 tree valist, type;
2297 static int intreg[6] = { 0, 1, 2, 3, 4, 5 };
2298 tree f_gpr, f_fpr, f_ovf, f_sav;
2299 tree gpr, fpr, ovf, sav, t;
2300 int indirect_p = 0, size, rsize;
2301 rtx lab_false, lab_over = NULL_RTX;
2302 rtx addr_rtx, r;
2303 rtx container;
2305 /* Only 64bit target needs something special. */
2306 if (!TARGET_64BIT)
2308 return std_expand_builtin_va_arg (valist, type);
2311 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2312 f_fpr = TREE_CHAIN (f_gpr);
2313 f_ovf = TREE_CHAIN (f_fpr);
2314 f_sav = TREE_CHAIN (f_ovf);
2316 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2317 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2318 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2319 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2320 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2322 size = int_size_in_bytes (type);
2323 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2325 container = construct_container (TYPE_MODE (type), type, 0,
2326 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
2328 * Pull the value out of the saved registers ...
2331 addr_rtx = gen_reg_rtx (Pmode);
2333 if (container)
2335 rtx int_addr_rtx, sse_addr_rtx;
2336 int needed_intregs, needed_sseregs;
2337 int need_temp;
2339 lab_over = gen_label_rtx ();
2340 lab_false = gen_label_rtx ();
2342 examine_argument (TYPE_MODE (type), type, 0,
2343 &needed_intregs, &needed_sseregs);
2346 need_temp = ((needed_intregs && TYPE_ALIGN (type) > 64)
2347 || TYPE_ALIGN (type) > 128);
2349 /* In case we are passing structure, verify that it is consetuctive block
2350 on the register save area. If not we need to do moves. */
2351 if (!need_temp && !REG_P (container))
2353 /* Verify that all registers are strictly consetuctive */
2354 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
2356 int i;
2358 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2360 rtx slot = XVECEXP (container, 0, i);
2361 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int)i
2362 || INTVAL (XEXP (slot, 1)) != i * 16)
2363 need_temp = 1;
2366 else
2368 int i;
2370 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2372 rtx slot = XVECEXP (container, 0, i);
2373 if (REGNO (XEXP (slot, 0)) != (unsigned int)i
2374 || INTVAL (XEXP (slot, 1)) != i * 8)
2375 need_temp = 1;
2379 if (!need_temp)
2381 int_addr_rtx = addr_rtx;
2382 sse_addr_rtx = addr_rtx;
2384 else
2386 int_addr_rtx = gen_reg_rtx (Pmode);
2387 sse_addr_rtx = gen_reg_rtx (Pmode);
2389 /* First ensure that we fit completely in registers. */
2390 if (needed_intregs)
2392 emit_cmp_and_jump_insns (expand_expr
2393 (gpr, NULL_RTX, SImode, EXPAND_NORMAL),
2394 GEN_INT ((REGPARM_MAX - needed_intregs +
2395 1) * 8), GE, const1_rtx, SImode,
2396 1, 1, lab_false);
2398 if (needed_sseregs)
2400 emit_cmp_and_jump_insns (expand_expr
2401 (fpr, NULL_RTX, SImode, EXPAND_NORMAL),
2402 GEN_INT ((SSE_REGPARM_MAX -
2403 needed_sseregs + 1) * 16 +
2404 REGPARM_MAX * 8), GE, const1_rtx,
2405 SImode, 1, 1, lab_false);
2408 /* Compute index to start of area used for integer regs. */
2409 if (needed_intregs)
2411 t = build (PLUS_EXPR, ptr_type_node, sav, gpr);
2412 r = expand_expr (t, int_addr_rtx, Pmode, EXPAND_NORMAL);
2413 if (r != int_addr_rtx)
2414 emit_move_insn (int_addr_rtx, r);
2416 if (needed_sseregs)
2418 t = build (PLUS_EXPR, ptr_type_node, sav, fpr);
2419 r = expand_expr (t, sse_addr_rtx, Pmode, EXPAND_NORMAL);
2420 if (r != sse_addr_rtx)
2421 emit_move_insn (sse_addr_rtx, r);
2423 if (need_temp)
2425 int i;
2426 rtx mem;
2428 mem = assign_temp (type, 0, 1, 0);
2429 MEM_ALIAS_SET (mem) = get_varargs_alias_set ();
2430 addr_rtx = XEXP (mem, 0);
2431 for (i = 0; i < XVECLEN (container, 0); i++)
2433 rtx slot = XVECEXP (container, 0, i);
2434 rtx reg = XEXP (slot, 0);
2435 enum machine_mode mode = GET_MODE (reg);
2436 rtx src_addr;
2437 rtx src_mem;
2438 int src_offset;
2439 rtx dest_mem;
2441 if (SSE_REGNO_P (REGNO (reg)))
2443 src_addr = sse_addr_rtx;
2444 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
2446 else
2448 src_addr = int_addr_rtx;
2449 src_offset = REGNO (reg) * 8;
2451 src_mem = gen_rtx_MEM (mode, src_addr);
2452 MEM_ALIAS_SET (src_mem) = get_varargs_alias_set ();
2453 src_mem = adjust_address (src_mem, mode, src_offset);
2454 dest_mem = adjust_address (mem, mode, INTVAL (XEXP (slot, 1)));
2455 PUT_MODE (dest_mem, mode);
2456 /* ??? Break out TImode moves from integer registers? */
2457 emit_move_insn (dest_mem, src_mem);
2461 if (needed_intregs)
2464 build (PLUS_EXPR, TREE_TYPE (gpr), gpr,
2465 build_int_2 (needed_intregs * 8, 0));
2466 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
2467 TREE_SIDE_EFFECTS (t) = 1;
2468 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2470 if (needed_sseregs)
2473 build (PLUS_EXPR, TREE_TYPE (fpr), fpr,
2474 build_int_2 (needed_sseregs * 16, 0));
2475 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
2476 TREE_SIDE_EFFECTS (t) = 1;
2477 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2480 emit_jump_insn (gen_jump (lab_over));
2481 emit_barrier ();
2482 emit_label (lab_false);
2485 /* ... otherwise out of the overflow area. */
2487 /* Care for on-stack alignment if needed. */
2488 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
2489 t = ovf;
2490 else
2492 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
2493 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf, build_int_2 (align - 1, 0));
2494 t = build (BIT_AND_EXPR, TREE_TYPE (t), t, build_int_2 (-align, -1));
2496 t = save_expr (t);
2498 r = expand_expr (t, addr_rtx, Pmode, EXPAND_NORMAL);
2499 if (r != addr_rtx)
2500 emit_move_insn (addr_rtx, r);
2503 build (PLUS_EXPR, TREE_TYPE (t), t,
2504 build_int_2 (rsize * UNITS_PER_WORD, 0));
2505 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2506 TREE_SIDE_EFFECTS (t) = 1;
2507 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2509 if (container)
2510 emit_label (lab_over);
2512 if (indirect_p)
2514 abort ();
2515 r = gen_rtx_MEM (Pmode, addr_rtx);
2516 MEM_ALIAS_SET (r) = get_varargs_alias_set ();
2517 emit_move_insn (addr_rtx, r);
2520 return addr_rtx;
2523 /* Return nonzero if OP is general operand representable on x86_64. */
2526 x86_64_general_operand (op, mode)
2527 rtx op;
2528 enum machine_mode mode;
2530 if (!TARGET_64BIT)
2531 return general_operand (op, mode);
2532 if (nonimmediate_operand (op, mode))
2533 return 1;
2534 return x86_64_sign_extended_value (op);
2537 /* Return nonzero if OP is general operand representable on x86_64
2538 as eighter sign extended or zero extended constant. */
2541 x86_64_szext_general_operand (op, mode)
2542 rtx op;
2543 enum machine_mode mode;
2545 if (!TARGET_64BIT)
2546 return general_operand (op, mode);
2547 if (nonimmediate_operand (op, mode))
2548 return 1;
2549 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
2552 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
2555 x86_64_nonmemory_operand (op, mode)
2556 rtx op;
2557 enum machine_mode mode;
2559 if (!TARGET_64BIT)
2560 return nonmemory_operand (op, mode);
2561 if (register_operand (op, mode))
2562 return 1;
2563 return x86_64_sign_extended_value (op);
2566 /* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
2569 x86_64_movabs_operand (op, mode)
2570 rtx op;
2571 enum machine_mode mode;
2573 if (!TARGET_64BIT || !flag_pic)
2574 return nonmemory_operand (op, mode);
2575 if (register_operand (op, mode) || x86_64_sign_extended_value (op))
2576 return 1;
2577 if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op))
2578 return 1;
2579 return 0;
2582 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
2585 x86_64_szext_nonmemory_operand (op, mode)
2586 rtx op;
2587 enum machine_mode mode;
2589 if (!TARGET_64BIT)
2590 return nonmemory_operand (op, mode);
2591 if (register_operand (op, mode))
2592 return 1;
2593 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
2596 /* Return nonzero if OP is immediate operand representable on x86_64. */
2599 x86_64_immediate_operand (op, mode)
2600 rtx op;
2601 enum machine_mode mode;
2603 if (!TARGET_64BIT)
2604 return immediate_operand (op, mode);
2605 return x86_64_sign_extended_value (op);
2608 /* Return nonzero if OP is immediate operand representable on x86_64. */
2611 x86_64_zext_immediate_operand (op, mode)
2612 rtx op;
2613 enum machine_mode mode ATTRIBUTE_UNUSED;
2615 return x86_64_zero_extended_value (op);
2618 /* Return nonzero if OP is (const_int 1), else return zero. */
2621 const_int_1_operand (op, mode)
2622 rtx op;
2623 enum machine_mode mode ATTRIBUTE_UNUSED;
2625 return (GET_CODE (op) == CONST_INT && INTVAL (op) == 1);
2628 /* Returns 1 if OP is either a symbol reference or a sum of a symbol
2629 reference and a constant. */
2632 symbolic_operand (op, mode)
2633 register rtx op;
2634 enum machine_mode mode ATTRIBUTE_UNUSED;
2636 switch (GET_CODE (op))
2638 case SYMBOL_REF:
2639 case LABEL_REF:
2640 return 1;
2642 case CONST:
2643 op = XEXP (op, 0);
2644 if (GET_CODE (op) == SYMBOL_REF
2645 || GET_CODE (op) == LABEL_REF
2646 || (GET_CODE (op) == UNSPEC
2647 && (XINT (op, 1) == 6
2648 || XINT (op, 1) == 7
2649 || XINT (op, 1) == 15)))
2650 return 1;
2651 if (GET_CODE (op) != PLUS
2652 || GET_CODE (XEXP (op, 1)) != CONST_INT)
2653 return 0;
2655 op = XEXP (op, 0);
2656 if (GET_CODE (op) == SYMBOL_REF
2657 || GET_CODE (op) == LABEL_REF)
2658 return 1;
2659 /* Only @GOTOFF gets offsets. */
2660 if (GET_CODE (op) != UNSPEC
2661 || XINT (op, 1) != 7)
2662 return 0;
2664 op = XVECEXP (op, 0, 0);
2665 if (GET_CODE (op) == SYMBOL_REF
2666 || GET_CODE (op) == LABEL_REF)
2667 return 1;
2668 return 0;
2670 default:
2671 return 0;
2675 /* Return true if the operand contains a @GOT or @GOTOFF reference. */
2678 pic_symbolic_operand (op, mode)
2679 register rtx op;
2680 enum machine_mode mode ATTRIBUTE_UNUSED;
2682 if (GET_CODE (op) != CONST)
2683 return 0;
2684 op = XEXP (op, 0);
2685 if (TARGET_64BIT)
2687 if (GET_CODE (XEXP (op, 0)) == UNSPEC)
2688 return 1;
2690 else
2692 if (GET_CODE (op) == UNSPEC)
2693 return 1;
2694 if (GET_CODE (op) != PLUS
2695 || GET_CODE (XEXP (op, 1)) != CONST_INT)
2696 return 0;
2697 op = XEXP (op, 0);
2698 if (GET_CODE (op) == UNSPEC)
2699 return 1;
2701 return 0;
2704 /* Return true if OP is a symbolic operand that resolves locally. */
2706 static int
2707 local_symbolic_operand (op, mode)
2708 rtx op;
2709 enum machine_mode mode ATTRIBUTE_UNUSED;
2711 if (GET_CODE (op) == LABEL_REF)
2712 return 1;
2714 if (GET_CODE (op) == CONST
2715 && GET_CODE (XEXP (op, 0)) == PLUS
2716 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
2717 op = XEXP (XEXP (op, 0), 0);
2719 if (GET_CODE (op) != SYMBOL_REF)
2720 return 0;
2722 /* These we've been told are local by varasm and encode_section_info
2723 respectively. */
2724 if (CONSTANT_POOL_ADDRESS_P (op) || SYMBOL_REF_FLAG (op))
2725 return 1;
2727 /* There is, however, a not insubstantial body of code in the rest of
2728 the compiler that assumes it can just stick the results of
2729 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */
2730 /* ??? This is a hack. Should update the body of the compiler to
2731 always create a DECL an invoke ENCODE_SECTION_INFO. */
2732 if (strncmp (XSTR (op, 0), internal_label_prefix,
2733 internal_label_prefix_len) == 0)
2734 return 1;
2736 return 0;
2739 /* Test for a valid operand for a call instruction. Don't allow the
2740 arg pointer register or virtual regs since they may decay into
2741 reg + const, which the patterns can't handle. */
2744 call_insn_operand (op, mode)
2745 rtx op;
2746 enum machine_mode mode ATTRIBUTE_UNUSED;
2748 /* Disallow indirect through a virtual register. This leads to
2749 compiler aborts when trying to eliminate them. */
2750 if (GET_CODE (op) == REG
2751 && (op == arg_pointer_rtx
2752 || op == frame_pointer_rtx
2753 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
2754 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
2755 return 0;
2757 /* Disallow `call 1234'. Due to varying assembler lameness this
2758 gets either rejected or translated to `call .+1234'. */
2759 if (GET_CODE (op) == CONST_INT)
2760 return 0;
2762 /* Explicitly allow SYMBOL_REF even if pic. */
2763 if (GET_CODE (op) == SYMBOL_REF)
2764 return 1;
2766 /* Half-pic doesn't allow anything but registers and constants.
2767 We've just taken care of the later. */
2768 if (HALF_PIC_P ())
2769 return register_operand (op, Pmode);
2771 /* Otherwise we can allow any general_operand in the address. */
2772 return general_operand (op, Pmode);
2776 constant_call_address_operand (op, mode)
2777 rtx op;
2778 enum machine_mode mode ATTRIBUTE_UNUSED;
2780 if (GET_CODE (op) == CONST
2781 && GET_CODE (XEXP (op, 0)) == PLUS
2782 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
2783 op = XEXP (XEXP (op, 0), 0);
2784 return GET_CODE (op) == SYMBOL_REF;
2787 /* Match exactly zero and one. */
2790 const0_operand (op, mode)
2791 register rtx op;
2792 enum machine_mode mode;
2794 return op == CONST0_RTX (mode);
2798 const1_operand (op, mode)
2799 register rtx op;
2800 enum machine_mode mode ATTRIBUTE_UNUSED;
2802 return op == const1_rtx;
2805 /* Match 2, 4, or 8. Used for leal multiplicands. */
2808 const248_operand (op, mode)
2809 register rtx op;
2810 enum machine_mode mode ATTRIBUTE_UNUSED;
2812 return (GET_CODE (op) == CONST_INT
2813 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
2816 /* True if this is a constant appropriate for an increment or decremenmt. */
2819 incdec_operand (op, mode)
2820 register rtx op;
2821 enum machine_mode mode ATTRIBUTE_UNUSED;
2823 /* On Pentium4, the inc and dec operations causes extra dependancy on flag
2824 registers, since carry flag is not set. */
2825 if (TARGET_PENTIUM4 && !optimize_size)
2826 return 0;
2827 return op == const1_rtx || op == constm1_rtx;
2830 /* Return nonzero if OP is acceptable as operand of DImode shift
2831 expander. */
2834 shiftdi_operand (op, mode)
2835 rtx op;
2836 enum machine_mode mode ATTRIBUTE_UNUSED;
2838 if (TARGET_64BIT)
2839 return nonimmediate_operand (op, mode);
2840 else
2841 return register_operand (op, mode);
2844 /* Return false if this is the stack pointer, or any other fake
2845 register eliminable to the stack pointer. Otherwise, this is
2846 a register operand.
2848 This is used to prevent esp from being used as an index reg.
2849 Which would only happen in pathological cases. */
2852 reg_no_sp_operand (op, mode)
2853 register rtx op;
2854 enum machine_mode mode;
2856 rtx t = op;
2857 if (GET_CODE (t) == SUBREG)
2858 t = SUBREG_REG (t);
2859 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
2860 return 0;
2862 return register_operand (op, mode);
2866 mmx_reg_operand (op, mode)
2867 register rtx op;
2868 enum machine_mode mode ATTRIBUTE_UNUSED;
2870 return MMX_REG_P (op);
2873 /* Return false if this is any eliminable register. Otherwise
2874 general_operand. */
2877 general_no_elim_operand (op, mode)
2878 register rtx op;
2879 enum machine_mode mode;
2881 rtx t = op;
2882 if (GET_CODE (t) == SUBREG)
2883 t = SUBREG_REG (t);
2884 if (t == arg_pointer_rtx || t == frame_pointer_rtx
2885 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
2886 || t == virtual_stack_dynamic_rtx)
2887 return 0;
2888 if (REG_P (t)
2889 && REGNO (t) >= FIRST_VIRTUAL_REGISTER
2890 && REGNO (t) <= LAST_VIRTUAL_REGISTER)
2891 return 0;
2893 return general_operand (op, mode);
2896 /* Return false if this is any eliminable register. Otherwise
2897 register_operand or const_int. */
2900 nonmemory_no_elim_operand (op, mode)
2901 register rtx op;
2902 enum machine_mode mode;
2904 rtx t = op;
2905 if (GET_CODE (t) == SUBREG)
2906 t = SUBREG_REG (t);
2907 if (t == arg_pointer_rtx || t == frame_pointer_rtx
2908 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
2909 || t == virtual_stack_dynamic_rtx)
2910 return 0;
2912 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
2915 /* Return true if op is a Q_REGS class register. */
2918 q_regs_operand (op, mode)
2919 register rtx op;
2920 enum machine_mode mode;
2922 if (mode != VOIDmode && GET_MODE (op) != mode)
2923 return 0;
2924 if (GET_CODE (op) == SUBREG)
2925 op = SUBREG_REG (op);
2926 return QI_REG_P (op);
2929 /* Return true if op is a NON_Q_REGS class register. */
2932 non_q_regs_operand (op, mode)
2933 register rtx op;
2934 enum machine_mode mode;
2936 if (mode != VOIDmode && GET_MODE (op) != mode)
2937 return 0;
2938 if (GET_CODE (op) == SUBREG)
2939 op = SUBREG_REG (op);
2940 return NON_QI_REG_P (op);
2943 /* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
2944 insns. */
2946 sse_comparison_operator (op, mode)
2947 rtx op;
2948 enum machine_mode mode ATTRIBUTE_UNUSED;
2950 enum rtx_code code = GET_CODE (op);
2951 switch (code)
2953 /* Operations supported directly. */
2954 case EQ:
2955 case LT:
2956 case LE:
2957 case UNORDERED:
2958 case NE:
2959 case UNGE:
2960 case UNGT:
2961 case ORDERED:
2962 return 1;
2963 /* These are equivalent to ones above in non-IEEE comparisons. */
2964 case UNEQ:
2965 case UNLT:
2966 case UNLE:
2967 case LTGT:
2968 case GE:
2969 case GT:
2970 return !TARGET_IEEE_FP;
2971 default:
2972 return 0;
2975 /* Return 1 if OP is a valid comparison operator in valid mode. */
2977 ix86_comparison_operator (op, mode)
2978 register rtx op;
2979 enum machine_mode mode;
2981 enum machine_mode inmode;
2982 enum rtx_code code = GET_CODE (op);
2983 if (mode != VOIDmode && GET_MODE (op) != mode)
2984 return 0;
2985 if (GET_RTX_CLASS (code) != '<')
2986 return 0;
2987 inmode = GET_MODE (XEXP (op, 0));
2989 if (inmode == CCFPmode || inmode == CCFPUmode)
2991 enum rtx_code second_code, bypass_code;
2992 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
2993 return (bypass_code == NIL && second_code == NIL);
2995 switch (code)
2997 case EQ: case NE:
2998 return 1;
2999 case LT: case GE:
3000 if (inmode == CCmode || inmode == CCGCmode
3001 || inmode == CCGOCmode || inmode == CCNOmode)
3002 return 1;
3003 return 0;
3004 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
3005 if (inmode == CCmode)
3006 return 1;
3007 return 0;
3008 case GT: case LE:
3009 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
3010 return 1;
3011 return 0;
3012 default:
3013 return 0;
3017 /* Return 1 if OP is a comparison operator that can be issued by fcmov. */
3020 fcmov_comparison_operator (op, mode)
3021 register rtx op;
3022 enum machine_mode mode;
3024 enum machine_mode inmode;
3025 enum rtx_code code = GET_CODE (op);
3026 if (mode != VOIDmode && GET_MODE (op) != mode)
3027 return 0;
3028 if (GET_RTX_CLASS (code) != '<')
3029 return 0;
3030 inmode = GET_MODE (XEXP (op, 0));
3031 if (inmode == CCFPmode || inmode == CCFPUmode)
3033 enum rtx_code second_code, bypass_code;
3034 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3035 if (bypass_code != NIL || second_code != NIL)
3036 return 0;
3037 code = ix86_fp_compare_code_to_integer (code);
3039 /* i387 supports just limited amount of conditional codes. */
3040 switch (code)
3042 case LTU: case GTU: case LEU: case GEU:
3043 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
3044 return 1;
3045 return 0;
3046 case ORDERED: case UNORDERED:
3047 case EQ: case NE:
3048 return 1;
3049 default:
3050 return 0;
3054 /* Return 1 if OP is a binary operator that can be promoted to wider mode. */
3057 promotable_binary_operator (op, mode)
3058 register rtx op;
3059 enum machine_mode mode ATTRIBUTE_UNUSED;
3061 switch (GET_CODE (op))
3063 case MULT:
3064 /* Modern CPUs have same latency for HImode and SImode multiply,
3065 but 386 and 486 do HImode multiply faster. */
3066 return ix86_cpu > PROCESSOR_I486;
3067 case PLUS:
3068 case AND:
3069 case IOR:
3070 case XOR:
3071 case ASHIFT:
3072 return 1;
3073 default:
3074 return 0;
3078 /* Nearly general operand, but accept any const_double, since we wish
3079 to be able to drop them into memory rather than have them get pulled
3080 into registers. */
3083 cmp_fp_expander_operand (op, mode)
3084 register rtx op;
3085 enum machine_mode mode;
3087 if (mode != VOIDmode && mode != GET_MODE (op))
3088 return 0;
3089 if (GET_CODE (op) == CONST_DOUBLE)
3090 return 1;
3091 return general_operand (op, mode);
3094 /* Match an SI or HImode register for a zero_extract. */
3097 ext_register_operand (op, mode)
3098 register rtx op;
3099 enum machine_mode mode ATTRIBUTE_UNUSED;
3101 int regno;
3102 if ((!TARGET_64BIT || GET_MODE (op) != DImode)
3103 && GET_MODE (op) != SImode && GET_MODE (op) != HImode)
3104 return 0;
3106 if (!register_operand (op, VOIDmode))
3107 return 0;
3109 /* Be curefull to accept only registers having upper parts. */
3110 regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op));
3111 return (regno > LAST_VIRTUAL_REGISTER || regno < 4);
3114 /* Return 1 if this is a valid binary floating-point operation.
3115 OP is the expression matched, and MODE is its mode. */
3118 binary_fp_operator (op, mode)
3119 register rtx op;
3120 enum machine_mode mode;
3122 if (mode != VOIDmode && mode != GET_MODE (op))
3123 return 0;
3125 switch (GET_CODE (op))
3127 case PLUS:
3128 case MINUS:
3129 case MULT:
3130 case DIV:
3131 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
3133 default:
3134 return 0;
3139 mult_operator(op, mode)
3140 register rtx op;
3141 enum machine_mode mode ATTRIBUTE_UNUSED;
3143 return GET_CODE (op) == MULT;
3147 div_operator(op, mode)
3148 register rtx op;
3149 enum machine_mode mode ATTRIBUTE_UNUSED;
3151 return GET_CODE (op) == DIV;
3155 arith_or_logical_operator (op, mode)
3156 rtx op;
3157 enum machine_mode mode;
3159 return ((mode == VOIDmode || GET_MODE (op) == mode)
3160 && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
3161 || GET_RTX_CLASS (GET_CODE (op)) == '2'));
3164 /* Returns 1 if OP is memory operand with a displacement. */
3167 memory_displacement_operand (op, mode)
3168 register rtx op;
3169 enum machine_mode mode;
3171 struct ix86_address parts;
3173 if (! memory_operand (op, mode))
3174 return 0;
3176 if (! ix86_decompose_address (XEXP (op, 0), &parts))
3177 abort ();
3179 return parts.disp != NULL_RTX;
3182 /* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
3183 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
3185 ??? It seems likely that this will only work because cmpsi is an
3186 expander, and no actual insns use this. */
3189 cmpsi_operand (op, mode)
3190 rtx op;
3191 enum machine_mode mode;
3193 if (nonimmediate_operand (op, mode))
3194 return 1;
3196 if (GET_CODE (op) == AND
3197 && GET_MODE (op) == SImode
3198 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
3199 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
3200 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
3201 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
3202 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
3203 && GET_CODE (XEXP (op, 1)) == CONST_INT)
3204 return 1;
3206 return 0;
3209 /* Returns 1 if OP is memory operand that can not be represented by the
3210 modRM array. */
3213 long_memory_operand (op, mode)
3214 register rtx op;
3215 enum machine_mode mode;
3217 if (! memory_operand (op, mode))
3218 return 0;
3220 return memory_address_length (op) != 0;
3223 /* Return nonzero if the rtx is known aligned. */
3226 aligned_operand (op, mode)
3227 rtx op;
3228 enum machine_mode mode;
3230 struct ix86_address parts;
3232 if (!general_operand (op, mode))
3233 return 0;
3235 /* Registers and immediate operands are always "aligned". */
3236 if (GET_CODE (op) != MEM)
3237 return 1;
3239 /* Don't even try to do any aligned optimizations with volatiles. */
3240 if (MEM_VOLATILE_P (op))
3241 return 0;
3243 op = XEXP (op, 0);
3245 /* Pushes and pops are only valid on the stack pointer. */
3246 if (GET_CODE (op) == PRE_DEC
3247 || GET_CODE (op) == POST_INC)
3248 return 1;
3250 /* Decode the address. */
3251 if (! ix86_decompose_address (op, &parts))
3252 abort ();
3254 /* Look for some component that isn't known to be aligned. */
3255 if (parts.index)
3257 if (parts.scale < 4
3258 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
3259 return 0;
3261 if (parts.base)
3263 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
3264 return 0;
3266 if (parts.disp)
3268 if (GET_CODE (parts.disp) != CONST_INT
3269 || (INTVAL (parts.disp) & 3) != 0)
3270 return 0;
3273 /* Didn't find one -- this must be an aligned address. */
3274 return 1;
3277 /* Return true if the constant is something that can be loaded with
3278 a special instruction. Only handle 0.0 and 1.0; others are less
3279 worthwhile. */
3282 standard_80387_constant_p (x)
3283 rtx x;
3285 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
3286 return -1;
3287 /* Note that on the 80387, other constants, such as pi, that we should support
3288 too. On some machines, these are much slower to load as standard constant,
3289 than to load from doubles in memory. */
3290 if (x == CONST0_RTX (GET_MODE (x)))
3291 return 1;
3292 if (x == CONST1_RTX (GET_MODE (x)))
3293 return 2;
3294 return 0;
3297 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
3300 standard_sse_constant_p (x)
3301 rtx x;
3303 if (GET_CODE (x) != CONST_DOUBLE)
3304 return -1;
3305 return (x == CONST0_RTX (GET_MODE (x)));
3308 /* Returns 1 if OP contains a symbol reference */
3311 symbolic_reference_mentioned_p (op)
3312 rtx op;
3314 register const char *fmt;
3315 register int i;
3317 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
3318 return 1;
3320 fmt = GET_RTX_FORMAT (GET_CODE (op));
3321 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
3323 if (fmt[i] == 'E')
3325 register int j;
3327 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
3328 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
3329 return 1;
3332 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
3333 return 1;
3336 return 0;
3339 /* Return 1 if it is appropriate to emit `ret' instructions in the
3340 body of a function. Do this only if the epilogue is simple, needing a
3341 couple of insns. Prior to reloading, we can't tell how many registers
3342 must be saved, so return 0 then. Return 0 if there is no frame
3343 marker to de-allocate.
3345 If NON_SAVING_SETJMP is defined and true, then it is not possible
3346 for the epilogue to be simple, so return 0. This is a special case
3347 since NON_SAVING_SETJMP will not cause regs_ever_live to change
3348 until final, but jump_optimize may need to know sooner if a
3349 `return' is OK. */
3352 ix86_can_use_return_insn_p ()
3354 struct ix86_frame frame;
3356 #ifdef NON_SAVING_SETJMP
3357 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
3358 return 0;
3359 #endif
3360 #ifdef FUNCTION_BLOCK_PROFILER_EXIT
3361 if (profile_block_flag == 2)
3362 return 0;
3363 #endif
3365 if (! reload_completed || frame_pointer_needed)
3366 return 0;
3368 /* Don't allow more than 32 pop, since that's all we can do
3369 with one instruction. */
3370 if (current_function_pops_args
3371 && current_function_args_size >= 32768)
3372 return 0;
3374 ix86_compute_frame_layout (&frame);
3375 return frame.to_allocate == 0 && frame.nregs == 0;
3378 /* Return 1 if VALUE can be stored in the sign extended immediate field. */
3380 x86_64_sign_extended_value (value)
3381 rtx value;
3383 switch (GET_CODE (value))
3385 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
3386 to be at least 32 and this all acceptable constants are
3387 represented as CONST_INT. */
3388 case CONST_INT:
3389 if (HOST_BITS_PER_WIDE_INT == 32)
3390 return 1;
3391 else
3393 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode);
3394 return trunc_int_for_mode (val, SImode) == val;
3396 break;
3398 /* For certain code models, the symbolic references are known to fit. */
3399 case SYMBOL_REF:
3400 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL;
3402 /* For certain code models, the code is near as well. */
3403 case LABEL_REF:
3404 return ix86_cmodel != CM_LARGE && ix86_cmodel != CM_SMALL_PIC;
3406 /* We also may accept the offsetted memory references in certain special
3407 cases. */
3408 case CONST:
3409 if (GET_CODE (XEXP (value, 0)) == UNSPEC
3410 && XVECLEN (XEXP (value, 0), 0) == 1
3411 && XINT (XEXP (value, 0), 1) == 15)
3412 return 1;
3413 else if (GET_CODE (XEXP (value, 0)) == PLUS)
3415 rtx op1 = XEXP (XEXP (value, 0), 0);
3416 rtx op2 = XEXP (XEXP (value, 0), 1);
3417 HOST_WIDE_INT offset;
3419 if (ix86_cmodel == CM_LARGE)
3420 return 0;
3421 if (GET_CODE (op2) != CONST_INT)
3422 return 0;
3423 offset = trunc_int_for_mode (INTVAL (op2), DImode);
3424 switch (GET_CODE (op1))
3426 case SYMBOL_REF:
3427 /* For CM_SMALL assume that latest object is 1MB before
3428 end of 31bits boundary. We may also accept pretty
3429 large negative constants knowing that all objects are
3430 in the positive half of address space. */
3431 if (ix86_cmodel == CM_SMALL
3432 && offset < 1024*1024*1024
3433 && trunc_int_for_mode (offset, SImode) == offset)
3434 return 1;
3435 /* For CM_KERNEL we know that all object resist in the
3436 negative half of 32bits address space. We may not
3437 accept negative offsets, since they may be just off
3438 and we may accept pretty large possitive ones. */
3439 if (ix86_cmodel == CM_KERNEL
3440 && offset > 0
3441 && trunc_int_for_mode (offset, SImode) == offset)
3442 return 1;
3443 break;
3444 case LABEL_REF:
3445 /* These conditions are similar to SYMBOL_REF ones, just the
3446 constraints for code models differ. */
3447 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
3448 && offset < 1024*1024*1024
3449 && trunc_int_for_mode (offset, SImode) == offset)
3450 return 1;
3451 if (ix86_cmodel == CM_KERNEL
3452 && offset > 0
3453 && trunc_int_for_mode (offset, SImode) == offset)
3454 return 1;
3455 break;
3456 default:
3457 return 0;
3460 return 0;
3461 default:
3462 return 0;
3466 /* Return 1 if VALUE can be stored in the zero extended immediate field. */
3468 x86_64_zero_extended_value (value)
3469 rtx value;
3471 switch (GET_CODE (value))
3473 case CONST_DOUBLE:
3474 if (HOST_BITS_PER_WIDE_INT == 32)
3475 return (GET_MODE (value) == VOIDmode
3476 && !CONST_DOUBLE_HIGH (value));
3477 else
3478 return 0;
3479 case CONST_INT:
3480 if (HOST_BITS_PER_WIDE_INT == 32)
3481 return INTVAL (value) >= 0;
3482 else
3483 return !(INTVAL (value) & ~(HOST_WIDE_INT)0xffffffff);
3484 break;
3486 /* For certain code models, the symbolic references are known to fit. */
3487 case SYMBOL_REF:
3488 return ix86_cmodel == CM_SMALL;
3490 /* For certain code models, the code is near as well. */
3491 case LABEL_REF:
3492 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM;
3494 /* We also may accept the offsetted memory references in certain special
3495 cases. */
3496 case CONST:
3497 if (GET_CODE (XEXP (value, 0)) == PLUS)
3499 rtx op1 = XEXP (XEXP (value, 0), 0);
3500 rtx op2 = XEXP (XEXP (value, 0), 1);
3502 if (ix86_cmodel == CM_LARGE)
3503 return 0;
3504 switch (GET_CODE (op1))
3506 case SYMBOL_REF:
3507 return 0;
3508 /* For small code model we may accept pretty large possitive
3509 offsets, since one bit is available for free. Negative
3510 offsets are limited by the size of NULL pointer area
3511 specified by the ABI. */
3512 if (ix86_cmodel == CM_SMALL
3513 && GET_CODE (op2) == CONST_INT
3514 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
3515 && (trunc_int_for_mode (INTVAL (op2), SImode)
3516 == INTVAL (op2)))
3517 return 1;
3518 /* ??? For the kernel, we may accept adjustment of
3519 -0x10000000, since we know that it will just convert
3520 negative address space to possitive, but perhaps this
3521 is not worthwhile. */
3522 break;
3523 case LABEL_REF:
3524 /* These conditions are similar to SYMBOL_REF ones, just the
3525 constraints for code models differ. */
3526 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
3527 && GET_CODE (op2) == CONST_INT
3528 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
3529 && (trunc_int_for_mode (INTVAL (op2), SImode)
3530 == INTVAL (op2)))
3531 return 1;
3532 break;
3533 default:
3534 return 0;
3537 return 0;
3538 default:
3539 return 0;
3543 /* Value should be nonzero if functions must have frame pointers.
3544 Zero means the frame pointer need not be set up (and parms may
3545 be accessed via the stack pointer) in functions that seem suitable. */
3548 ix86_frame_pointer_required ()
3550 /* If we accessed previous frames, then the generated code expects
3551 to be able to access the saved ebp value in our frame. */
3552 if (cfun->machine->accesses_prev_frame)
3553 return 1;
3555 /* Several x86 os'es need a frame pointer for other reasons,
3556 usually pertaining to setjmp. */
3557 if (SUBTARGET_FRAME_POINTER_REQUIRED)
3558 return 1;
3560 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
3561 the frame pointer by default. Turn it back on now if we've not
3562 got a leaf function. */
3563 if (TARGET_OMIT_LEAF_FRAME_POINTER && ! leaf_function_p ())
3564 return 1;
3566 return 0;
3569 /* Record that the current function accesses previous call frames. */
3571 void
3572 ix86_setup_frame_addresses ()
3574 cfun->machine->accesses_prev_frame = 1;
3577 static char pic_label_name[32];
3579 /* This function generates code for -fpic that loads %ebx with
3580 the return address of the caller and then returns. */
3582 void
3583 ix86_asm_file_end (file)
3584 FILE *file;
3586 rtx xops[2];
3588 if (! TARGET_DEEP_BRANCH_PREDICTION || pic_label_name[0] == 0)
3589 return;
3591 /* ??? Binutils 2.10 and earlier has a linkonce elimination bug related
3592 to updating relocations to a section being discarded such that this
3593 doesn't work. Ought to detect this at configure time. */
3594 #if 0
3595 /* The trick here is to create a linkonce section containing the
3596 pic label thunk, but to refer to it with an internal label.
3597 Because the label is internal, we don't have inter-dso name
3598 binding issues on hosts that don't support ".hidden".
3600 In order to use these macros, however, we must create a fake
3601 function decl. */
3602 if (targetm.have_named_sections)
3604 tree decl = build_decl (FUNCTION_DECL,
3605 get_identifier ("i686.get_pc_thunk"),
3606 error_mark_node);
3607 DECL_ONE_ONLY (decl) = 1;
3608 UNIQUE_SECTION (decl, 0);
3609 named_section (decl, NULL);
3611 else
3612 #else
3613 text_section ();
3614 #endif
3616 /* This used to call ASM_DECLARE_FUNCTION_NAME() but since it's an
3617 internal (non-global) label that's being emitted, it didn't make
3618 sense to have .type information for local labels. This caused
3619 the SCO OpenServer 5.0.4 ELF assembler grief (why are you giving
3620 me debug info for a label that you're declaring non-global?) this
3621 was changed to call ASM_OUTPUT_LABEL() instead. */
3623 ASM_OUTPUT_LABEL (file, pic_label_name);
3625 xops[0] = pic_offset_table_rtx;
3626 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
3627 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
3628 output_asm_insn ("ret", xops);
3631 void
3632 load_pic_register ()
3634 rtx gotsym, pclab;
3636 if (TARGET_64BIT)
3637 abort();
3639 gotsym = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
3641 if (TARGET_DEEP_BRANCH_PREDICTION)
3643 if (! pic_label_name[0])
3644 ASM_GENERATE_INTERNAL_LABEL (pic_label_name, "LPR", 0);
3645 pclab = gen_rtx_MEM (QImode, gen_rtx_SYMBOL_REF (Pmode, pic_label_name));
3647 else
3649 pclab = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
3652 emit_insn (gen_prologue_get_pc (pic_offset_table_rtx, pclab));
3654 if (! TARGET_DEEP_BRANCH_PREDICTION)
3655 emit_insn (gen_popsi1 (pic_offset_table_rtx));
3657 emit_insn (gen_prologue_set_got (pic_offset_table_rtx, gotsym, pclab));
3660 /* Generate an "push" pattern for input ARG. */
3662 static rtx
3663 gen_push (arg)
3664 rtx arg;
3666 return gen_rtx_SET (VOIDmode,
3667 gen_rtx_MEM (Pmode,
3668 gen_rtx_PRE_DEC (Pmode,
3669 stack_pointer_rtx)),
3670 arg);
3673 /* Return 1 if we need to save REGNO. */
3674 static int
3675 ix86_save_reg (regno, maybe_eh_return)
3676 int regno;
3677 int maybe_eh_return;
3679 if (flag_pic
3680 && ! TARGET_64BIT
3681 && regno == PIC_OFFSET_TABLE_REGNUM
3682 && (current_function_uses_pic_offset_table
3683 || current_function_uses_const_pool
3684 || current_function_calls_eh_return))
3685 return 1;
3687 if (current_function_calls_eh_return && maybe_eh_return)
3689 unsigned i;
3690 for (i = 0; ; i++)
3692 unsigned test = EH_RETURN_DATA_REGNO(i);
3693 if (test == INVALID_REGNUM)
3694 break;
3695 if (test == (unsigned) regno)
3696 return 1;
3700 return (regs_ever_live[regno]
3701 && !call_used_regs[regno]
3702 && !fixed_regs[regno]
3703 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
3706 /* Return number of registers to be saved on the stack. */
3708 static int
3709 ix86_nsaved_regs ()
3711 int nregs = 0;
3712 int regno;
3714 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
3715 if (ix86_save_reg (regno, true))
3716 nregs++;
3717 return nregs;
3720 /* Return the offset between two registers, one to be eliminated, and the other
3721 its replacement, at the start of a routine. */
3723 HOST_WIDE_INT
3724 ix86_initial_elimination_offset (from, to)
3725 int from;
3726 int to;
3728 struct ix86_frame frame;
3729 ix86_compute_frame_layout (&frame);
3731 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3732 return frame.hard_frame_pointer_offset;
3733 else if (from == FRAME_POINTER_REGNUM
3734 && to == HARD_FRAME_POINTER_REGNUM)
3735 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
3736 else
3738 if (to != STACK_POINTER_REGNUM)
3739 abort ();
3740 else if (from == ARG_POINTER_REGNUM)
3741 return frame.stack_pointer_offset;
3742 else if (from != FRAME_POINTER_REGNUM)
3743 abort ();
3744 else
3745 return frame.stack_pointer_offset - frame.frame_pointer_offset;
3749 /* Fill structure ix86_frame about frame of currently computed function. */
3751 static void
3752 ix86_compute_frame_layout (frame)
3753 struct ix86_frame *frame;
3755 HOST_WIDE_INT total_size;
3756 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
3757 int offset;
3758 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
3759 HOST_WIDE_INT size = get_frame_size ();
3761 frame->nregs = ix86_nsaved_regs ();
3762 total_size = size;
3764 /* Skip return value and save base pointer. */
3765 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
3767 frame->hard_frame_pointer_offset = offset;
3769 /* Do some sanity checking of stack_alignment_needed and
3770 preferred_alignment, since i386 port is the only using those features
3771 that may break easilly. */
3773 if (size && !stack_alignment_needed)
3774 abort ();
3775 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
3776 abort ();
3777 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
3778 abort ();
3779 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
3780 abort ();
3782 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
3783 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
3785 /* Register save area */
3786 offset += frame->nregs * UNITS_PER_WORD;
3788 /* Va-arg area */
3789 if (ix86_save_varrargs_registers)
3791 offset += X86_64_VARARGS_SIZE;
3792 frame->va_arg_size = X86_64_VARARGS_SIZE;
3794 else
3795 frame->va_arg_size = 0;
3797 /* Align start of frame for local function. */
3798 frame->padding1 = ((offset + stack_alignment_needed - 1)
3799 & -stack_alignment_needed) - offset;
3801 offset += frame->padding1;
3803 /* Frame pointer points here. */
3804 frame->frame_pointer_offset = offset;
3806 offset += size;
3808 /* Add outgoing arguments area. */
3809 if (ACCUMULATE_OUTGOING_ARGS)
3811 offset += current_function_outgoing_args_size;
3812 frame->outgoing_arguments_size = current_function_outgoing_args_size;
3814 else
3815 frame->outgoing_arguments_size = 0;
3817 /* Align stack boundary. */
3818 frame->padding2 = ((offset + preferred_alignment - 1)
3819 & -preferred_alignment) - offset;
3821 offset += frame->padding2;
3823 /* We've reached end of stack frame. */
3824 frame->stack_pointer_offset = offset;
3826 /* Size prologue needs to allocate. */
3827 frame->to_allocate =
3828 (size + frame->padding1 + frame->padding2
3829 + frame->outgoing_arguments_size + frame->va_arg_size);
3831 if (TARGET_64BIT && TARGET_RED_ZONE && current_function_sp_is_unchanging
3832 && current_function_is_leaf)
3834 frame->red_zone_size = frame->to_allocate;
3835 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
3836 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
3838 else
3839 frame->red_zone_size = 0;
3840 frame->to_allocate -= frame->red_zone_size;
3841 frame->stack_pointer_offset -= frame->red_zone_size;
3842 #if 0
3843 fprintf (stderr, "nregs: %i\n", frame->nregs);
3844 fprintf (stderr, "size: %i\n", size);
3845 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
3846 fprintf (stderr, "padding1: %i\n", frame->padding1);
3847 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
3848 fprintf (stderr, "padding2: %i\n", frame->padding2);
3849 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
3850 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
3851 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
3852 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
3853 frame->hard_frame_pointer_offset);
3854 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
3855 #endif
3858 /* Emit code to save registers in the prologue. */
3860 static void
3861 ix86_emit_save_regs ()
3863 register int regno;
3864 rtx insn;
3866 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
3867 if (ix86_save_reg (regno, true))
3869 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
3870 RTX_FRAME_RELATED_P (insn) = 1;
3874 /* Emit code to save registers using MOV insns. First register
3875 is restored from POINTER + OFFSET. */
3876 static void
3877 ix86_emit_save_regs_using_mov (pointer, offset)
3878 rtx pointer;
3879 HOST_WIDE_INT offset;
3881 int regno;
3882 rtx insn;
3884 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
3885 if (ix86_save_reg (regno, true))
3887 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
3888 Pmode, offset),
3889 gen_rtx_REG (Pmode, regno));
3890 RTX_FRAME_RELATED_P (insn) = 1;
3891 offset += UNITS_PER_WORD;
3895 /* Expand the prologue into a bunch of separate insns. */
3897 void
3898 ix86_expand_prologue ()
3900 rtx insn;
3901 int pic_reg_used = (flag_pic && (current_function_uses_pic_offset_table
3902 || current_function_uses_const_pool)
3903 && !TARGET_64BIT);
3904 struct ix86_frame frame;
3905 int use_mov = 0;
3906 HOST_WIDE_INT allocate;
3908 if (!optimize_size)
3910 use_fast_prologue_epilogue
3911 = !expensive_function_p (FAST_PROLOGUE_INSN_COUNT);
3912 if (TARGET_PROLOGUE_USING_MOVE)
3913 use_mov = use_fast_prologue_epilogue;
3915 ix86_compute_frame_layout (&frame);
3917 /* Note: AT&T enter does NOT have reversed args. Enter is probably
3918 slower on all targets. Also sdb doesn't like it. */
3920 if (frame_pointer_needed)
3922 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
3923 RTX_FRAME_RELATED_P (insn) = 1;
3925 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
3926 RTX_FRAME_RELATED_P (insn) = 1;
3929 allocate = frame.to_allocate;
3930 /* In case we are dealing only with single register and empty frame,
3931 push is equivalent of the mov+add sequence. */
3932 if (allocate == 0 && frame.nregs <= 1)
3933 use_mov = 0;
3935 if (!use_mov)
3936 ix86_emit_save_regs ();
3937 else
3938 allocate += frame.nregs * UNITS_PER_WORD;
3940 if (allocate == 0)
3942 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
3944 insn = emit_insn (gen_pro_epilogue_adjust_stack
3945 (stack_pointer_rtx, stack_pointer_rtx,
3946 GEN_INT (-allocate)));
3947 RTX_FRAME_RELATED_P (insn) = 1;
3949 else
3951 /* ??? Is this only valid for Win32? */
3953 rtx arg0, sym;
3955 if (TARGET_64BIT)
3956 abort();
3958 arg0 = gen_rtx_REG (SImode, 0);
3959 emit_move_insn (arg0, GEN_INT (allocate));
3961 sym = gen_rtx_MEM (FUNCTION_MODE,
3962 gen_rtx_SYMBOL_REF (Pmode, "_alloca"));
3963 insn = emit_call_insn (gen_call (sym, const0_rtx, constm1_rtx));
3965 CALL_INSN_FUNCTION_USAGE (insn)
3966 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, arg0),
3967 CALL_INSN_FUNCTION_USAGE (insn));
3969 if (use_mov)
3971 if (!frame_pointer_needed || !frame.to_allocate)
3972 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
3973 else
3974 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
3975 -frame.nregs * UNITS_PER_WORD);
3978 #ifdef SUBTARGET_PROLOGUE
3979 SUBTARGET_PROLOGUE;
3980 #endif
3982 if (pic_reg_used)
3983 load_pic_register ();
3985 /* If we are profiling, make sure no instructions are scheduled before
3986 the call to mcount. However, if -fpic, the above call will have
3987 done that. */
3988 if ((profile_flag || profile_block_flag) && ! pic_reg_used)
3989 emit_insn (gen_blockage ());
3992 /* Emit code to restore saved registers using MOV insns. First register
3993 is restored from POINTER + OFFSET. */
3994 static void
3995 ix86_emit_restore_regs_using_mov (pointer, offset, maybe_eh_return)
3996 rtx pointer;
3997 int offset;
3998 int maybe_eh_return;
4000 int regno;
4002 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4003 if (ix86_save_reg (regno, maybe_eh_return))
4005 emit_move_insn (gen_rtx_REG (Pmode, regno),
4006 adjust_address (gen_rtx_MEM (Pmode, pointer),
4007 Pmode, offset));
4008 offset += UNITS_PER_WORD;
4012 /* Restore function stack, frame, and registers. */
4014 void
4015 ix86_expand_epilogue (style)
4016 int style;
4018 int regno;
4019 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
4020 struct ix86_frame frame;
4021 HOST_WIDE_INT offset;
4023 ix86_compute_frame_layout (&frame);
4025 /* Calculate start of saved registers relative to ebp. Special care
4026 must be taken for the normal return case of a function using
4027 eh_return: the eax and edx registers are marked as saved, but not
4028 restored along this path. */
4029 offset = frame.nregs;
4030 if (current_function_calls_eh_return && style != 2)
4031 offset -= 2;
4032 offset *= -UNITS_PER_WORD;
4034 #ifdef FUNCTION_BLOCK_PROFILER_EXIT
4035 if (profile_block_flag == 2)
4037 FUNCTION_BLOCK_PROFILER_EXIT;
4039 #endif
4041 /* If we're only restoring one register and sp is not valid then
4042 using a move instruction to restore the register since it's
4043 less work than reloading sp and popping the register.
4045 The default code result in stack adjustment using add/lea instruction,
4046 while this code results in LEAVE instruction (or discrete equivalent),
4047 so it is profitable in some other cases as well. Especially when there
4048 are no registers to restore. We also use this code when TARGET_USE_LEAVE
4049 and there is exactly one register to pop. This heruistic may need some
4050 tuning in future. */
4051 if ((!sp_valid && frame.nregs <= 1)
4052 || (TARGET_EPILOGUE_USING_MOVE
4053 && use_fast_prologue_epilogue
4054 && (frame.nregs > 1 || frame.to_allocate))
4055 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
4056 || (frame_pointer_needed && TARGET_USE_LEAVE
4057 && use_fast_prologue_epilogue && frame.nregs == 1)
4058 || current_function_calls_eh_return)
4060 /* Restore registers. We can use ebp or esp to address the memory
4061 locations. If both are available, default to ebp, since offsets
4062 are known to be small. Only exception is esp pointing directly to the
4063 end of block of saved registers, where we may simplify addressing
4064 mode. */
4066 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
4067 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
4068 frame.to_allocate, style == 2);
4069 else
4070 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
4071 offset, style == 2);
4073 /* eh_return epilogues need %ecx added to the stack pointer. */
4074 if (style == 2)
4076 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
4078 if (frame_pointer_needed)
4080 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
4081 tmp = plus_constant (tmp, UNITS_PER_WORD);
4082 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
4084 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
4085 emit_move_insn (hard_frame_pointer_rtx, tmp);
4087 emit_insn (gen_pro_epilogue_adjust_stack
4088 (stack_pointer_rtx, sa, const0_rtx));
4090 else
4092 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
4093 tmp = plus_constant (tmp, (frame.to_allocate
4094 + frame.nregs * UNITS_PER_WORD));
4095 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
4098 else if (!frame_pointer_needed)
4099 emit_insn (gen_pro_epilogue_adjust_stack
4100 (stack_pointer_rtx, stack_pointer_rtx,
4101 GEN_INT (frame.to_allocate
4102 + frame.nregs * UNITS_PER_WORD)));
4103 /* If not an i386, mov & pop is faster than "leave". */
4104 else if (TARGET_USE_LEAVE || optimize_size || !use_fast_prologue_epilogue)
4105 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4106 else
4108 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
4109 hard_frame_pointer_rtx,
4110 const0_rtx));
4111 if (TARGET_64BIT)
4112 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4113 else
4114 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4117 else
4119 /* First step is to deallocate the stack frame so that we can
4120 pop the registers. */
4121 if (!sp_valid)
4123 if (!frame_pointer_needed)
4124 abort ();
4125 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
4126 hard_frame_pointer_rtx,
4127 GEN_INT (offset)));
4129 else if (frame.to_allocate)
4130 emit_insn (gen_pro_epilogue_adjust_stack
4131 (stack_pointer_rtx, stack_pointer_rtx,
4132 GEN_INT (frame.to_allocate)));
4134 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4135 if (ix86_save_reg (regno, false))
4137 if (TARGET_64BIT)
4138 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
4139 else
4140 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
4142 if (frame_pointer_needed)
4144 /* Leave results in shorter depdendancy chains on CPUs that are
4145 able to grok it fast. */
4146 if (TARGET_USE_LEAVE)
4147 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4148 else if (TARGET_64BIT)
4149 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4150 else
4151 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4155 /* Sibcall epilogues don't want a return instruction. */
4156 if (style == 0)
4157 return;
4159 if (current_function_pops_args && current_function_args_size)
4161 rtx popc = GEN_INT (current_function_pops_args);
4163 /* i386 can only pop 64K bytes. If asked to pop more, pop
4164 return address, do explicit add, and jump indirectly to the
4165 caller. */
4167 if (current_function_pops_args >= 65536)
4169 rtx ecx = gen_rtx_REG (SImode, 2);
4171 /* There are is no "pascal" calling convention in 64bit ABI. */
4172 if (TARGET_64BIT)
4173 abort();
4175 emit_insn (gen_popsi1 (ecx));
4176 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
4177 emit_jump_insn (gen_return_indirect_internal (ecx));
4179 else
4180 emit_jump_insn (gen_return_pop_internal (popc));
4182 else
4183 emit_jump_insn (gen_return_internal ());
4186 /* Extract the parts of an RTL expression that is a valid memory address
4187 for an instruction. Return false if the structure of the address is
4188 grossly off. */
4190 static int
4191 ix86_decompose_address (addr, out)
4192 register rtx addr;
4193 struct ix86_address *out;
4195 rtx base = NULL_RTX;
4196 rtx index = NULL_RTX;
4197 rtx disp = NULL_RTX;
4198 HOST_WIDE_INT scale = 1;
4199 rtx scale_rtx = NULL_RTX;
4201 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
4202 base = addr;
4203 else if (GET_CODE (addr) == PLUS)
4205 rtx op0 = XEXP (addr, 0);
4206 rtx op1 = XEXP (addr, 1);
4207 enum rtx_code code0 = GET_CODE (op0);
4208 enum rtx_code code1 = GET_CODE (op1);
4210 if (code0 == REG || code0 == SUBREG)
4212 if (code1 == REG || code1 == SUBREG)
4213 index = op0, base = op1; /* index + base */
4214 else
4215 base = op0, disp = op1; /* base + displacement */
4217 else if (code0 == MULT)
4219 index = XEXP (op0, 0);
4220 scale_rtx = XEXP (op0, 1);
4221 if (code1 == REG || code1 == SUBREG)
4222 base = op1; /* index*scale + base */
4223 else
4224 disp = op1; /* index*scale + disp */
4226 else if (code0 == PLUS && GET_CODE (XEXP (op0, 0)) == MULT)
4228 index = XEXP (XEXP (op0, 0), 0); /* index*scale + base + disp */
4229 scale_rtx = XEXP (XEXP (op0, 0), 1);
4230 base = XEXP (op0, 1);
4231 disp = op1;
4233 else if (code0 == PLUS)
4235 index = XEXP (op0, 0); /* index + base + disp */
4236 base = XEXP (op0, 1);
4237 disp = op1;
4239 else
4240 return FALSE;
4242 else if (GET_CODE (addr) == MULT)
4244 index = XEXP (addr, 0); /* index*scale */
4245 scale_rtx = XEXP (addr, 1);
4247 else if (GET_CODE (addr) == ASHIFT)
4249 rtx tmp;
4251 /* We're called for lea too, which implements ashift on occasion. */
4252 index = XEXP (addr, 0);
4253 tmp = XEXP (addr, 1);
4254 if (GET_CODE (tmp) != CONST_INT)
4255 return FALSE;
4256 scale = INTVAL (tmp);
4257 if ((unsigned HOST_WIDE_INT) scale > 3)
4258 return FALSE;
4259 scale = 1 << scale;
4261 else
4262 disp = addr; /* displacement */
4264 /* Extract the integral value of scale. */
4265 if (scale_rtx)
4267 if (GET_CODE (scale_rtx) != CONST_INT)
4268 return FALSE;
4269 scale = INTVAL (scale_rtx);
4272 /* Allow arg pointer and stack pointer as index if there is not scaling */
4273 if (base && index && scale == 1
4274 && (index == arg_pointer_rtx || index == frame_pointer_rtx
4275 || index == stack_pointer_rtx))
4277 rtx tmp = base;
4278 base = index;
4279 index = tmp;
4282 /* Special case: %ebp cannot be encoded as a base without a displacement. */
4283 if ((base == hard_frame_pointer_rtx
4284 || base == frame_pointer_rtx
4285 || base == arg_pointer_rtx) && !disp)
4286 disp = const0_rtx;
4288 /* Special case: on K6, [%esi] makes the instruction vector decoded.
4289 Avoid this by transforming to [%esi+0]. */
4290 if (ix86_cpu == PROCESSOR_K6 && !optimize_size
4291 && base && !index && !disp
4292 && REG_P (base)
4293 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
4294 disp = const0_rtx;
4296 /* Special case: encode reg+reg instead of reg*2. */
4297 if (!base && index && scale && scale == 2)
4298 base = index, scale = 1;
4300 /* Special case: scaling cannot be encoded without base or displacement. */
4301 if (!base && !disp && index && scale != 1)
4302 disp = const0_rtx;
4304 out->base = base;
4305 out->index = index;
4306 out->disp = disp;
4307 out->scale = scale;
4309 return TRUE;
4312 /* Return cost of the memory address x.
4313 For i386, it is better to use a complex address than let gcc copy
4314 the address into a reg and make a new pseudo. But not if the address
4315 requires to two regs - that would mean more pseudos with longer
4316 lifetimes. */
4318 ix86_address_cost (x)
4319 rtx x;
4321 struct ix86_address parts;
4322 int cost = 1;
4324 if (!ix86_decompose_address (x, &parts))
4325 abort ();
4327 /* More complex memory references are better. */
4328 if (parts.disp && parts.disp != const0_rtx)
4329 cost--;
4331 /* Attempt to minimize number of registers in the address. */
4332 if ((parts.base
4333 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
4334 || (parts.index
4335 && (!REG_P (parts.index)
4336 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
4337 cost++;
4339 if (parts.base
4340 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
4341 && parts.index
4342 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
4343 && parts.base != parts.index)
4344 cost++;
4346 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
4347 since it's predecode logic can't detect the length of instructions
4348 and it degenerates to vector decoded. Increase cost of such
4349 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
4350 to split such addresses or even refuse such addresses at all.
4352 Following addressing modes are affected:
4353 [base+scale*index]
4354 [scale*index+disp]
4355 [base+index]
4357 The first and last case may be avoidable by explicitly coding the zero in
4358 memory address, but I don't have AMD-K6 machine handy to check this
4359 theory. */
4361 if (TARGET_K6
4362 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
4363 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
4364 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
4365 cost += 10;
4367 return cost;
4370 /* If X is a machine specific address (i.e. a symbol or label being
4371 referenced as a displacement from the GOT implemented using an
4372 UNSPEC), then return the base term. Otherwise return X. */
4375 ix86_find_base_term (x)
4376 rtx x;
4378 rtx term;
4380 if (TARGET_64BIT)
4382 if (GET_CODE (x) != CONST)
4383 return x;
4384 term = XEXP (x, 0);
4385 if (GET_CODE (term) == PLUS
4386 && (GET_CODE (XEXP (term, 1)) == CONST_INT
4387 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
4388 term = XEXP (term, 0);
4389 if (GET_CODE (term) != UNSPEC
4390 || XVECLEN (term, 0) != 1
4391 || XINT (term, 1) != 15)
4392 return x;
4394 term = XVECEXP (term, 0, 0);
4396 if (GET_CODE (term) != SYMBOL_REF
4397 && GET_CODE (term) != LABEL_REF)
4398 return x;
4400 return term;
4403 if (GET_CODE (x) != PLUS
4404 || XEXP (x, 0) != pic_offset_table_rtx
4405 || GET_CODE (XEXP (x, 1)) != CONST)
4406 return x;
4408 term = XEXP (XEXP (x, 1), 0);
4410 if (GET_CODE (term) == PLUS && GET_CODE (XEXP (term, 1)) == CONST_INT)
4411 term = XEXP (term, 0);
4413 if (GET_CODE (term) != UNSPEC
4414 || XVECLEN (term, 0) != 1
4415 || XINT (term, 1) != 7)
4416 return x;
4418 term = XVECEXP (term, 0, 0);
4420 if (GET_CODE (term) != SYMBOL_REF
4421 && GET_CODE (term) != LABEL_REF)
4422 return x;
4424 return term;
4427 /* Determine if a given CONST RTX is a valid memory displacement
4428 in PIC mode. */
4431 legitimate_pic_address_disp_p (disp)
4432 register rtx disp;
4434 /* In 64bit mode we can allow direct addresses of symbols and labels
4435 when they are not dynamic symbols. */
4436 if (TARGET_64BIT)
4438 rtx x = disp;
4439 if (GET_CODE (disp) == CONST)
4440 x = XEXP (disp, 0);
4441 /* ??? Handle PIC code models */
4442 if (GET_CODE (x) == PLUS
4443 && (GET_CODE (XEXP (x, 1)) == CONST_INT
4444 && ix86_cmodel == CM_SMALL_PIC
4445 && INTVAL (XEXP (x, 1)) < 1024*1024*1024
4446 && INTVAL (XEXP (x, 1)) > -1024*1024*1024))
4447 x = XEXP (x, 0);
4448 if (local_symbolic_operand (x, Pmode))
4449 return 1;
4451 if (GET_CODE (disp) != CONST)
4452 return 0;
4453 disp = XEXP (disp, 0);
4455 if (TARGET_64BIT)
4457 /* We are unsafe to allow PLUS expressions. This limit allowed distance
4458 of GOT tables. We should not need these anyway. */
4459 if (GET_CODE (disp) != UNSPEC
4460 || XVECLEN (disp, 0) != 1
4461 || XINT (disp, 1) != 15)
4462 return 0;
4464 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
4465 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
4466 return 0;
4467 return 1;
4470 if (GET_CODE (disp) == PLUS)
4472 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
4473 return 0;
4474 disp = XEXP (disp, 0);
4477 if (GET_CODE (disp) != UNSPEC
4478 || XVECLEN (disp, 0) != 1)
4479 return 0;
4481 /* Must be @GOT or @GOTOFF. */
4482 switch (XINT (disp, 1))
4484 case 6: /* @GOT */
4485 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
4487 case 7: /* @GOTOFF */
4488 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
4491 return 0;
4494 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
4495 memory address for an instruction. The MODE argument is the machine mode
4496 for the MEM expression that wants to use this address.
4498 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
4499 convert common non-canonical forms to canonical form so that they will
4500 be recognized. */
4503 legitimate_address_p (mode, addr, strict)
4504 enum machine_mode mode;
4505 register rtx addr;
4506 int strict;
4508 struct ix86_address parts;
4509 rtx base, index, disp;
4510 HOST_WIDE_INT scale;
4511 const char *reason = NULL;
4512 rtx reason_rtx = NULL_RTX;
4514 if (TARGET_DEBUG_ADDR)
4516 fprintf (stderr,
4517 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
4518 GET_MODE_NAME (mode), strict);
4519 debug_rtx (addr);
4522 if (! ix86_decompose_address (addr, &parts))
4524 reason = "decomposition failed";
4525 goto report_error;
4528 base = parts.base;
4529 index = parts.index;
4530 disp = parts.disp;
4531 scale = parts.scale;
4533 /* Validate base register.
4535 Don't allow SUBREG's here, it can lead to spill failures when the base
4536 is one word out of a two word structure, which is represented internally
4537 as a DImode int. */
4539 if (base)
4541 reason_rtx = base;
4543 if (GET_CODE (base) != REG)
4545 reason = "base is not a register";
4546 goto report_error;
4549 if (GET_MODE (base) != Pmode)
4551 reason = "base is not in Pmode";
4552 goto report_error;
4555 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (base))
4556 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (base)))
4558 reason = "base is not valid";
4559 goto report_error;
4563 /* Validate index register.
4565 Don't allow SUBREG's here, it can lead to spill failures when the index
4566 is one word out of a two word structure, which is represented internally
4567 as a DImode int. */
4569 if (index)
4571 reason_rtx = index;
4573 if (GET_CODE (index) != REG)
4575 reason = "index is not a register";
4576 goto report_error;
4579 if (GET_MODE (index) != Pmode)
4581 reason = "index is not in Pmode";
4582 goto report_error;
4585 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (index))
4586 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (index)))
4588 reason = "index is not valid";
4589 goto report_error;
4593 /* Validate scale factor. */
4594 if (scale != 1)
4596 reason_rtx = GEN_INT (scale);
4597 if (!index)
4599 reason = "scale without index";
4600 goto report_error;
4603 if (scale != 2 && scale != 4 && scale != 8)
4605 reason = "scale is not a valid multiplier";
4606 goto report_error;
4610 /* Validate displacement. */
4611 if (disp)
4613 reason_rtx = disp;
4615 if (!CONSTANT_ADDRESS_P (disp))
4617 reason = "displacement is not constant";
4618 goto report_error;
4621 if (TARGET_64BIT)
4623 if (!x86_64_sign_extended_value (disp))
4625 reason = "displacement is out of range";
4626 goto report_error;
4629 else
4631 if (GET_CODE (disp) == CONST_DOUBLE)
4633 reason = "displacement is a const_double";
4634 goto report_error;
4638 if (flag_pic && SYMBOLIC_CONST (disp))
4640 if (TARGET_64BIT && (index || base))
4642 reason = "non-constant pic memory reference";
4643 goto report_error;
4645 if (! legitimate_pic_address_disp_p (disp))
4647 reason = "displacement is an invalid pic construct";
4648 goto report_error;
4651 /* This code used to verify that a symbolic pic displacement
4652 includes the pic_offset_table_rtx register.
4654 While this is good idea, unfortunately these constructs may
4655 be created by "adds using lea" optimization for incorrect
4656 code like:
4658 int a;
4659 int foo(int i)
4661 return *(&a+i);
4664 This code is nonsensical, but results in addressing
4665 GOT table with pic_offset_table_rtx base. We can't
4666 just refuse it easilly, since it gets matched by
4667 "addsi3" pattern, that later gets split to lea in the
4668 case output register differs from input. While this
4669 can be handled by separate addsi pattern for this case
4670 that never results in lea, this seems to be easier and
4671 correct fix for crash to disable this test. */
4673 else if (HALF_PIC_P ())
4675 if (! HALF_PIC_ADDRESS_P (disp)
4676 || (base != NULL_RTX || index != NULL_RTX))
4678 reason = "displacement is an invalid half-pic reference";
4679 goto report_error;
4684 /* Everything looks valid. */
4685 if (TARGET_DEBUG_ADDR)
4686 fprintf (stderr, "Success.\n");
4687 return TRUE;
4689 report_error:
4690 if (TARGET_DEBUG_ADDR)
4692 fprintf (stderr, "Error: %s\n", reason);
4693 debug_rtx (reason_rtx);
4695 return FALSE;
4698 /* Return an unique alias set for the GOT. */
4700 static HOST_WIDE_INT
4701 ix86_GOT_alias_set ()
4703 static HOST_WIDE_INT set = -1;
4704 if (set == -1)
4705 set = new_alias_set ();
4706 return set;
4709 /* Return a legitimate reference for ORIG (an address) using the
4710 register REG. If REG is 0, a new pseudo is generated.
4712 There are two types of references that must be handled:
4714 1. Global data references must load the address from the GOT, via
4715 the PIC reg. An insn is emitted to do this load, and the reg is
4716 returned.
4718 2. Static data references, constant pool addresses, and code labels
4719 compute the address as an offset from the GOT, whose base is in
4720 the PIC reg. Static data objects have SYMBOL_REF_FLAG set to
4721 differentiate them from global data objects. The returned
4722 address is the PIC reg + an unspec constant.
4724 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
4725 reg also appears in the address. */
4728 legitimize_pic_address (orig, reg)
4729 rtx orig;
4730 rtx reg;
4732 rtx addr = orig;
4733 rtx new = orig;
4734 rtx base;
4736 if (local_symbolic_operand (addr, Pmode))
4738 /* This symbol may be referenced via a displacement from the PIC
4739 base address (@GOTOFF). */
4741 current_function_uses_pic_offset_table = 1;
4742 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), 7);
4743 new = gen_rtx_CONST (Pmode, new);
4744 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
4746 if (reg != 0)
4748 emit_move_insn (reg, new);
4749 new = reg;
4752 else if (GET_CODE (addr) == SYMBOL_REF)
4754 /* This symbol must be referenced via a load from the
4755 Global Offset Table (@GOT). */
4757 current_function_uses_pic_offset_table = 1;
4758 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), 6);
4759 new = gen_rtx_CONST (Pmode, new);
4760 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
4761 new = gen_rtx_MEM (Pmode, new);
4762 RTX_UNCHANGING_P (new) = 1;
4763 set_mem_alias_set (new, ix86_GOT_alias_set ());
4765 if (reg == 0)
4766 reg = gen_reg_rtx (Pmode);
4767 emit_move_insn (reg, new);
4768 new = reg;
4770 else
4772 if (GET_CODE (addr) == CONST)
4774 addr = XEXP (addr, 0);
4775 if (GET_CODE (addr) == UNSPEC)
4777 /* Check that the unspec is one of the ones we generate? */
4779 else if (GET_CODE (addr) != PLUS)
4780 abort ();
4782 if (GET_CODE (addr) == PLUS)
4784 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
4786 /* Check first to see if this is a constant offset from a @GOTOFF
4787 symbol reference. */
4788 if (local_symbolic_operand (op0, Pmode)
4789 && GET_CODE (op1) == CONST_INT)
4791 if (!TARGET_64BIT)
4793 current_function_uses_pic_offset_table = 1;
4794 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0), 7);
4795 new = gen_rtx_PLUS (Pmode, new, op1);
4796 new = gen_rtx_CONST (Pmode, new);
4797 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
4799 if (reg != 0)
4801 emit_move_insn (reg, new);
4802 new = reg;
4805 else
4807 /* ??? We need to limit offsets here. */
4810 else
4812 base = legitimize_pic_address (XEXP (addr, 0), reg);
4813 new = legitimize_pic_address (XEXP (addr, 1),
4814 base == reg ? NULL_RTX : reg);
4816 if (GET_CODE (new) == CONST_INT)
4817 new = plus_constant (base, INTVAL (new));
4818 else
4820 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
4822 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
4823 new = XEXP (new, 1);
4825 new = gen_rtx_PLUS (Pmode, base, new);
4830 return new;
4833 /* Try machine-dependent ways of modifying an illegitimate address
4834 to be legitimate. If we find one, return the new, valid address.
4835 This macro is used in only one place: `memory_address' in explow.c.
4837 OLDX is the address as it was before break_out_memory_refs was called.
4838 In some cases it is useful to look at this to decide what needs to be done.
4840 MODE and WIN are passed so that this macro can use
4841 GO_IF_LEGITIMATE_ADDRESS.
4843 It is always safe for this macro to do nothing. It exists to recognize
4844 opportunities to optimize the output.
4846 For the 80386, we handle X+REG by loading X into a register R and
4847 using R+REG. R will go in a general reg and indexing will be used.
4848 However, if REG is a broken-out memory address or multiplication,
4849 nothing needs to be done because REG can certainly go in a general reg.
4851 When -fpic is used, special handling is needed for symbolic references.
4852 See comments by legitimize_pic_address in i386.c for details. */
4855 legitimize_address (x, oldx, mode)
4856 register rtx x;
4857 register rtx oldx ATTRIBUTE_UNUSED;
4858 enum machine_mode mode;
4860 int changed = 0;
4861 unsigned log;
4863 if (TARGET_DEBUG_ADDR)
4865 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
4866 GET_MODE_NAME (mode));
4867 debug_rtx (x);
4870 if (flag_pic && SYMBOLIC_CONST (x))
4871 return legitimize_pic_address (x, 0);
4873 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
4874 if (GET_CODE (x) == ASHIFT
4875 && GET_CODE (XEXP (x, 1)) == CONST_INT
4876 && (log = (unsigned)exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
4878 changed = 1;
4879 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
4880 GEN_INT (1 << log));
4883 if (GET_CODE (x) == PLUS)
4885 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
4887 if (GET_CODE (XEXP (x, 0)) == ASHIFT
4888 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
4889 && (log = (unsigned)exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
4891 changed = 1;
4892 XEXP (x, 0) = gen_rtx_MULT (Pmode,
4893 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
4894 GEN_INT (1 << log));
4897 if (GET_CODE (XEXP (x, 1)) == ASHIFT
4898 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
4899 && (log = (unsigned)exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
4901 changed = 1;
4902 XEXP (x, 1) = gen_rtx_MULT (Pmode,
4903 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
4904 GEN_INT (1 << log));
4907 /* Put multiply first if it isn't already. */
4908 if (GET_CODE (XEXP (x, 1)) == MULT)
4910 rtx tmp = XEXP (x, 0);
4911 XEXP (x, 0) = XEXP (x, 1);
4912 XEXP (x, 1) = tmp;
4913 changed = 1;
4916 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
4917 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
4918 created by virtual register instantiation, register elimination, and
4919 similar optimizations. */
4920 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
4922 changed = 1;
4923 x = gen_rtx_PLUS (Pmode,
4924 gen_rtx_PLUS (Pmode, XEXP (x, 0),
4925 XEXP (XEXP (x, 1), 0)),
4926 XEXP (XEXP (x, 1), 1));
4929 /* Canonicalize
4930 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
4931 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
4932 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
4933 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
4934 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
4935 && CONSTANT_P (XEXP (x, 1)))
4937 rtx constant;
4938 rtx other = NULL_RTX;
4940 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
4942 constant = XEXP (x, 1);
4943 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
4945 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
4947 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
4948 other = XEXP (x, 1);
4950 else
4951 constant = 0;
4953 if (constant)
4955 changed = 1;
4956 x = gen_rtx_PLUS (Pmode,
4957 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
4958 XEXP (XEXP (XEXP (x, 0), 1), 0)),
4959 plus_constant (other, INTVAL (constant)));
4963 if (changed && legitimate_address_p (mode, x, FALSE))
4964 return x;
4966 if (GET_CODE (XEXP (x, 0)) == MULT)
4968 changed = 1;
4969 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
4972 if (GET_CODE (XEXP (x, 1)) == MULT)
4974 changed = 1;
4975 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
4978 if (changed
4979 && GET_CODE (XEXP (x, 1)) == REG
4980 && GET_CODE (XEXP (x, 0)) == REG)
4981 return x;
4983 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
4985 changed = 1;
4986 x = legitimize_pic_address (x, 0);
4989 if (changed && legitimate_address_p (mode, x, FALSE))
4990 return x;
4992 if (GET_CODE (XEXP (x, 0)) == REG)
4994 register rtx temp = gen_reg_rtx (Pmode);
4995 register rtx val = force_operand (XEXP (x, 1), temp);
4996 if (val != temp)
4997 emit_move_insn (temp, val);
4999 XEXP (x, 1) = temp;
5000 return x;
5003 else if (GET_CODE (XEXP (x, 1)) == REG)
5005 register rtx temp = gen_reg_rtx (Pmode);
5006 register rtx val = force_operand (XEXP (x, 0), temp);
5007 if (val != temp)
5008 emit_move_insn (temp, val);
5010 XEXP (x, 0) = temp;
5011 return x;
5015 return x;
5018 /* Print an integer constant expression in assembler syntax. Addition
5019 and subtraction are the only arithmetic that may appear in these
5020 expressions. FILE is the stdio stream to write to, X is the rtx, and
5021 CODE is the operand print code from the output string. */
5023 static void
5024 output_pic_addr_const (file, x, code)
5025 FILE *file;
5026 rtx x;
5027 int code;
5029 char buf[256];
5031 switch (GET_CODE (x))
5033 case PC:
5034 if (flag_pic)
5035 putc ('.', file);
5036 else
5037 abort ();
5038 break;
5040 case SYMBOL_REF:
5041 assemble_name (file, XSTR (x, 0));
5042 if (code == 'P' && ! SYMBOL_REF_FLAG (x))
5043 fputs ("@PLT", file);
5044 break;
5046 case LABEL_REF:
5047 x = XEXP (x, 0);
5048 /* FALLTHRU */
5049 case CODE_LABEL:
5050 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
5051 assemble_name (asm_out_file, buf);
5052 break;
5054 case CONST_INT:
5055 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
5056 break;
5058 case CONST:
5059 /* This used to output parentheses around the expression,
5060 but that does not work on the 386 (either ATT or BSD assembler). */
5061 output_pic_addr_const (file, XEXP (x, 0), code);
5062 break;
5064 case CONST_DOUBLE:
5065 if (GET_MODE (x) == VOIDmode)
5067 /* We can use %d if the number is <32 bits and positive. */
5068 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
5069 fprintf (file, "0x%lx%08lx",
5070 (unsigned long) CONST_DOUBLE_HIGH (x),
5071 (unsigned long) CONST_DOUBLE_LOW (x));
5072 else
5073 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
5075 else
5076 /* We can't handle floating point constants;
5077 PRINT_OPERAND must handle them. */
5078 output_operand_lossage ("floating constant misused");
5079 break;
5081 case PLUS:
5082 /* Some assemblers need integer constants to appear first. */
5083 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
5085 output_pic_addr_const (file, XEXP (x, 0), code);
5086 putc ('+', file);
5087 output_pic_addr_const (file, XEXP (x, 1), code);
5089 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5091 output_pic_addr_const (file, XEXP (x, 1), code);
5092 putc ('+', file);
5093 output_pic_addr_const (file, XEXP (x, 0), code);
5095 else
5096 abort ();
5097 break;
5099 case MINUS:
5100 putc (ASSEMBLER_DIALECT ? '(' : '[', file);
5101 output_pic_addr_const (file, XEXP (x, 0), code);
5102 putc ('-', file);
5103 output_pic_addr_const (file, XEXP (x, 1), code);
5104 putc (ASSEMBLER_DIALECT ? ')' : ']', file);
5105 break;
5107 case UNSPEC:
5108 if (XVECLEN (x, 0) != 1)
5109 abort ();
5110 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
5111 switch (XINT (x, 1))
5113 case 6:
5114 fputs ("@GOT", file);
5115 break;
5116 case 7:
5117 fputs ("@GOTOFF", file);
5118 break;
5119 case 8:
5120 fputs ("@PLT", file);
5121 break;
5122 case 15:
5123 fputs ("@GOTPCREL(%RIP)", file);
5124 break;
5125 default:
5126 output_operand_lossage ("invalid UNSPEC as operand");
5127 break;
5129 break;
5131 default:
5132 output_operand_lossage ("invalid expression as operand");
5136 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
5137 We need to handle our special PIC relocations. */
5139 void
5140 i386_dwarf_output_addr_const (file, x)
5141 FILE *file;
5142 rtx x;
5144 fprintf (file, "%s", INT_ASM_OP);
5145 if (flag_pic)
5146 output_pic_addr_const (file, x, '\0');
5147 else
5148 output_addr_const (file, x);
5149 fputc ('\n', file);
5152 /* In the name of slightly smaller debug output, and to cater to
5153 general assembler losage, recognize PIC+GOTOFF and turn it back
5154 into a direct symbol reference. */
5157 i386_simplify_dwarf_addr (orig_x)
5158 rtx orig_x;
5160 rtx x = orig_x;
5162 if (TARGET_64BIT)
5164 if (GET_CODE (x) != CONST
5165 || GET_CODE (XEXP (x, 0)) != UNSPEC
5166 || XINT (XEXP (x, 0), 1) != 15)
5167 return orig_x;
5168 return XVECEXP (XEXP (x, 0), 0, 0);
5171 if (GET_CODE (x) != PLUS
5172 || GET_CODE (XEXP (x, 0)) != REG
5173 || GET_CODE (XEXP (x, 1)) != CONST)
5174 return orig_x;
5176 x = XEXP (XEXP (x, 1), 0);
5177 if (GET_CODE (x) == UNSPEC
5178 && (XINT (x, 1) == 6
5179 || XINT (x, 1) == 7))
5180 return XVECEXP (x, 0, 0);
5182 if (GET_CODE (x) == PLUS
5183 && GET_CODE (XEXP (x, 0)) == UNSPEC
5184 && GET_CODE (XEXP (x, 1)) == CONST_INT
5185 && (XINT (XEXP (x, 0), 1) == 6
5186 || XINT (XEXP (x, 0), 1) == 7))
5187 return gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
5189 return orig_x;
5192 static void
5193 put_condition_code (code, mode, reverse, fp, file)
5194 enum rtx_code code;
5195 enum machine_mode mode;
5196 int reverse, fp;
5197 FILE *file;
5199 const char *suffix;
5201 if (mode == CCFPmode || mode == CCFPUmode)
5203 enum rtx_code second_code, bypass_code;
5204 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
5205 if (bypass_code != NIL || second_code != NIL)
5206 abort();
5207 code = ix86_fp_compare_code_to_integer (code);
5208 mode = CCmode;
5210 if (reverse)
5211 code = reverse_condition (code);
5213 switch (code)
5215 case EQ:
5216 suffix = "e";
5217 break;
5218 case NE:
5219 suffix = "ne";
5220 break;
5221 case GT:
5222 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
5223 abort ();
5224 suffix = "g";
5225 break;
5226 case GTU:
5227 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
5228 Those same assemblers have the same but opposite losage on cmov. */
5229 if (mode != CCmode)
5230 abort ();
5231 suffix = fp ? "nbe" : "a";
5232 break;
5233 case LT:
5234 if (mode == CCNOmode || mode == CCGOCmode)
5235 suffix = "s";
5236 else if (mode == CCmode || mode == CCGCmode)
5237 suffix = "l";
5238 else
5239 abort ();
5240 break;
5241 case LTU:
5242 if (mode != CCmode)
5243 abort ();
5244 suffix = "b";
5245 break;
5246 case GE:
5247 if (mode == CCNOmode || mode == CCGOCmode)
5248 suffix = "ns";
5249 else if (mode == CCmode || mode == CCGCmode)
5250 suffix = "ge";
5251 else
5252 abort ();
5253 break;
5254 case GEU:
5255 /* ??? As above. */
5256 if (mode != CCmode)
5257 abort ();
5258 suffix = fp ? "nb" : "ae";
5259 break;
5260 case LE:
5261 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
5262 abort ();
5263 suffix = "le";
5264 break;
5265 case LEU:
5266 if (mode != CCmode)
5267 abort ();
5268 suffix = "be";
5269 break;
5270 case UNORDERED:
5271 suffix = fp ? "u" : "p";
5272 break;
5273 case ORDERED:
5274 suffix = fp ? "nu" : "np";
5275 break;
5276 default:
5277 abort ();
5279 fputs (suffix, file);
5282 void
5283 print_reg (x, code, file)
5284 rtx x;
5285 int code;
5286 FILE *file;
5288 if (REGNO (x) == ARG_POINTER_REGNUM
5289 || REGNO (x) == FRAME_POINTER_REGNUM
5290 || REGNO (x) == FLAGS_REG
5291 || REGNO (x) == FPSR_REG)
5292 abort ();
5294 if (ASSEMBLER_DIALECT == 0 || USER_LABEL_PREFIX[0] == 0)
5295 putc ('%', file);
5297 if (code == 'w' || MMX_REG_P (x))
5298 code = 2;
5299 else if (code == 'b')
5300 code = 1;
5301 else if (code == 'k')
5302 code = 4;
5303 else if (code == 'q')
5304 code = 8;
5305 else if (code == 'y')
5306 code = 3;
5307 else if (code == 'h')
5308 code = 0;
5309 else
5310 code = GET_MODE_SIZE (GET_MODE (x));
5312 /* Irritatingly, AMD extended registers use different naming convention
5313 from the normal registers. */
5314 if (REX_INT_REG_P (x))
5316 if (!TARGET_64BIT)
5317 abort ();
5318 switch (code)
5320 case 0:
5321 error ("Extended registers have no high halves\n");
5322 break;
5323 case 1:
5324 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
5325 break;
5326 case 2:
5327 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
5328 break;
5329 case 4:
5330 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
5331 break;
5332 case 8:
5333 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
5334 break;
5335 default:
5336 error ("Unsupported operand size for extended register.\n");
5337 break;
5339 return;
5341 switch (code)
5343 case 3:
5344 if (STACK_TOP_P (x))
5346 fputs ("st(0)", file);
5347 break;
5349 /* FALLTHRU */
5350 case 8:
5351 case 4:
5352 case 12:
5353 if (! ANY_FP_REG_P (x))
5354 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
5355 /* FALLTHRU */
5356 case 16:
5357 case 2:
5358 fputs (hi_reg_name[REGNO (x)], file);
5359 break;
5360 case 1:
5361 fputs (qi_reg_name[REGNO (x)], file);
5362 break;
5363 case 0:
5364 fputs (qi_high_reg_name[REGNO (x)], file);
5365 break;
5366 default:
5367 abort ();
5371 /* Meaning of CODE:
5372 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
5373 C -- print opcode suffix for set/cmov insn.
5374 c -- like C, but print reversed condition
5375 F,f -- likewise, but for floating-point.
5376 R -- print the prefix for register names.
5377 z -- print the opcode suffix for the size of the current operand.
5378 * -- print a star (in certain assembler syntax)
5379 A -- print an absolute memory reference.
5380 w -- print the operand as if it's a "word" (HImode) even if it isn't.
5381 s -- print a shift double count, followed by the assemblers argument
5382 delimiter.
5383 b -- print the QImode name of the register for the indicated operand.
5384 %b0 would print %al if operands[0] is reg 0.
5385 w -- likewise, print the HImode name of the register.
5386 k -- likewise, print the SImode name of the register.
5387 q -- likewise, print the DImode name of the register.
5388 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
5389 y -- print "st(0)" instead of "st" as a register.
5390 D -- print condition for SSE cmp instruction.
5391 P -- if PIC, print an @PLT suffix.
5392 X -- don't print any sort of PIC '@' suffix for a symbol.
5395 void
5396 print_operand (file, x, code)
5397 FILE *file;
5398 rtx x;
5399 int code;
5401 if (code)
5403 switch (code)
5405 case '*':
5406 if (ASSEMBLER_DIALECT == 0)
5407 putc ('*', file);
5408 return;
5410 case 'A':
5411 if (ASSEMBLER_DIALECT == 0)
5412 putc ('*', file);
5413 else if (ASSEMBLER_DIALECT == 1)
5415 /* Intel syntax. For absolute addresses, registers should not
5416 be surrounded by braces. */
5417 if (GET_CODE (x) != REG)
5419 putc ('[', file);
5420 PRINT_OPERAND (file, x, 0);
5421 putc (']', file);
5422 return;
5426 PRINT_OPERAND (file, x, 0);
5427 return;
5430 case 'L':
5431 if (ASSEMBLER_DIALECT == 0)
5432 putc ('l', file);
5433 return;
5435 case 'W':
5436 if (ASSEMBLER_DIALECT == 0)
5437 putc ('w', file);
5438 return;
5440 case 'B':
5441 if (ASSEMBLER_DIALECT == 0)
5442 putc ('b', file);
5443 return;
5445 case 'Q':
5446 if (ASSEMBLER_DIALECT == 0)
5447 putc ('l', file);
5448 return;
5450 case 'S':
5451 if (ASSEMBLER_DIALECT == 0)
5452 putc ('s', file);
5453 return;
5455 case 'T':
5456 if (ASSEMBLER_DIALECT == 0)
5457 putc ('t', file);
5458 return;
5460 case 'z':
5461 /* 387 opcodes don't get size suffixes if the operands are
5462 registers. */
5464 if (STACK_REG_P (x))
5465 return;
5467 /* this is the size of op from size of operand */
5468 switch (GET_MODE_SIZE (GET_MODE (x)))
5470 case 2:
5471 #ifdef HAVE_GAS_FILDS_FISTS
5472 putc ('s', file);
5473 #endif
5474 return;
5476 case 4:
5477 if (GET_MODE (x) == SFmode)
5479 putc ('s', file);
5480 return;
5482 else
5483 putc ('l', file);
5484 return;
5486 case 12:
5487 case 16:
5488 putc ('t', file);
5489 return;
5491 case 8:
5492 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
5494 #ifdef GAS_MNEMONICS
5495 putc ('q', file);
5496 #else
5497 putc ('l', file);
5498 putc ('l', file);
5499 #endif
5501 else
5502 putc ('l', file);
5503 return;
5505 default:
5506 abort ();
5509 case 'b':
5510 case 'w':
5511 case 'k':
5512 case 'q':
5513 case 'h':
5514 case 'y':
5515 case 'X':
5516 case 'P':
5517 break;
5519 case 's':
5520 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
5522 PRINT_OPERAND (file, x, 0);
5523 putc (',', file);
5525 return;
5527 case 'D':
5528 /* Little bit of braindamage here. The SSE compare instructions
5529 does use completely different names for the comparisons that the
5530 fp conditional moves. */
5531 switch (GET_CODE (x))
5533 case EQ:
5534 case UNEQ:
5535 fputs ("eq", file);
5536 break;
5537 case LT:
5538 case UNLT:
5539 fputs ("lt", file);
5540 break;
5541 case LE:
5542 case UNLE:
5543 fputs ("le", file);
5544 break;
5545 case UNORDERED:
5546 fputs ("unord", file);
5547 break;
5548 case NE:
5549 case LTGT:
5550 fputs ("neq", file);
5551 break;
5552 case UNGE:
5553 case GE:
5554 fputs ("nlt", file);
5555 break;
5556 case UNGT:
5557 case GT:
5558 fputs ("nle", file);
5559 break;
5560 case ORDERED:
5561 fputs ("ord", file);
5562 break;
5563 default:
5564 abort ();
5565 break;
5567 return;
5568 case 'C':
5569 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
5570 return;
5571 case 'F':
5572 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
5573 return;
5575 /* Like above, but reverse condition */
5576 case 'c':
5577 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
5578 return;
5579 case 'f':
5580 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
5581 return;
5582 case '+':
5584 rtx x;
5586 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
5587 return;
5589 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
5590 if (x)
5592 int pred_val = INTVAL (XEXP (x, 0));
5594 if (pred_val < REG_BR_PROB_BASE * 45 / 100
5595 || pred_val > REG_BR_PROB_BASE * 55 / 100)
5597 int taken = pred_val > REG_BR_PROB_BASE / 2;
5598 int cputaken = final_forward_branch_p (current_output_insn) == 0;
5600 /* Emit hints only in the case default branch prediction
5601 heruistics would fail. */
5602 if (taken != cputaken)
5604 /* We use 3e (DS) prefix for taken branches and
5605 2e (CS) prefix for not taken branches. */
5606 if (taken)
5607 fputs ("ds ; ", file);
5608 else
5609 fputs ("cs ; ", file);
5613 return;
5615 default:
5617 char str[50];
5618 sprintf (str, "invalid operand code `%c'", code);
5619 output_operand_lossage (str);
5624 if (GET_CODE (x) == REG)
5626 PRINT_REG (x, code, file);
5629 else if (GET_CODE (x) == MEM)
5631 /* No `byte ptr' prefix for call instructions. */
5632 if (ASSEMBLER_DIALECT != 0 && code != 'X' && code != 'P')
5634 const char * size;
5635 switch (GET_MODE_SIZE (GET_MODE (x)))
5637 case 1: size = "BYTE"; break;
5638 case 2: size = "WORD"; break;
5639 case 4: size = "DWORD"; break;
5640 case 8: size = "QWORD"; break;
5641 case 12: size = "XWORD"; break;
5642 case 16: size = "XMMWORD"; break;
5643 default:
5644 abort ();
5647 /* Check for explicit size override (codes 'b', 'w' and 'k') */
5648 if (code == 'b')
5649 size = "BYTE";
5650 else if (code == 'w')
5651 size = "WORD";
5652 else if (code == 'k')
5653 size = "DWORD";
5655 fputs (size, file);
5656 fputs (" PTR ", file);
5659 x = XEXP (x, 0);
5660 if (flag_pic && CONSTANT_ADDRESS_P (x))
5661 output_pic_addr_const (file, x, code);
5662 /* Avoid (%rip) for call operands. */
5663 else if (CONSTANT_ADDRESS_P (x) && code =='P'
5664 && GET_CODE (x) != CONST_INT)
5665 output_addr_const (file, x);
5666 else
5667 output_address (x);
5670 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
5672 REAL_VALUE_TYPE r;
5673 long l;
5675 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
5676 REAL_VALUE_TO_TARGET_SINGLE (r, l);
5678 if (ASSEMBLER_DIALECT == 0)
5679 putc ('$', file);
5680 fprintf (file, "0x%lx", l);
5683 /* These float cases don't actually occur as immediate operands. */
5684 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
5686 REAL_VALUE_TYPE r;
5687 char dstr[30];
5689 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
5690 REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr);
5691 fprintf (file, "%s", dstr);
5694 else if (GET_CODE (x) == CONST_DOUBLE
5695 && (GET_MODE (x) == XFmode || GET_MODE (x) == TFmode))
5697 REAL_VALUE_TYPE r;
5698 char dstr[30];
5700 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
5701 REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr);
5702 fprintf (file, "%s", dstr);
5704 else
5706 if (code != 'P')
5708 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
5710 if (ASSEMBLER_DIALECT == 0)
5711 putc ('$', file);
5713 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
5714 || GET_CODE (x) == LABEL_REF)
5716 if (ASSEMBLER_DIALECT == 0)
5717 putc ('$', file);
5718 else
5719 fputs ("OFFSET FLAT:", file);
5722 if (GET_CODE (x) == CONST_INT)
5723 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
5724 else if (flag_pic)
5725 output_pic_addr_const (file, x, code);
5726 else
5727 output_addr_const (file, x);
5731 /* Print a memory operand whose address is ADDR. */
5733 void
5734 print_operand_address (file, addr)
5735 FILE *file;
5736 register rtx addr;
5738 struct ix86_address parts;
5739 rtx base, index, disp;
5740 int scale;
5742 if (! ix86_decompose_address (addr, &parts))
5743 abort ();
5745 base = parts.base;
5746 index = parts.index;
5747 disp = parts.disp;
5748 scale = parts.scale;
5750 if (!base && !index)
5752 /* Displacement only requires special attention. */
5754 if (GET_CODE (disp) == CONST_INT)
5756 if (ASSEMBLER_DIALECT != 0)
5758 if (USER_LABEL_PREFIX[0] == 0)
5759 putc ('%', file);
5760 fputs ("ds:", file);
5762 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (addr));
5764 else if (flag_pic)
5765 output_pic_addr_const (file, addr, 0);
5766 else
5767 output_addr_const (file, addr);
5769 /* Use one byte shorter RIP relative addressing for 64bit mode. */
5770 if (GET_CODE (disp) != CONST_INT && TARGET_64BIT)
5771 fputs ("(%rip)", file);
5773 else
5775 if (ASSEMBLER_DIALECT == 0)
5777 if (disp)
5779 if (flag_pic)
5780 output_pic_addr_const (file, disp, 0);
5781 else if (GET_CODE (disp) == LABEL_REF)
5782 output_asm_label (disp);
5783 else
5784 output_addr_const (file, disp);
5787 putc ('(', file);
5788 if (base)
5789 PRINT_REG (base, 0, file);
5790 if (index)
5792 putc (',', file);
5793 PRINT_REG (index, 0, file);
5794 if (scale != 1)
5795 fprintf (file, ",%d", scale);
5797 putc (')', file);
5799 else
5801 rtx offset = NULL_RTX;
5803 if (disp)
5805 /* Pull out the offset of a symbol; print any symbol itself. */
5806 if (GET_CODE (disp) == CONST
5807 && GET_CODE (XEXP (disp, 0)) == PLUS
5808 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
5810 offset = XEXP (XEXP (disp, 0), 1);
5811 disp = gen_rtx_CONST (VOIDmode,
5812 XEXP (XEXP (disp, 0), 0));
5815 if (flag_pic)
5816 output_pic_addr_const (file, disp, 0);
5817 else if (GET_CODE (disp) == LABEL_REF)
5818 output_asm_label (disp);
5819 else if (GET_CODE (disp) == CONST_INT)
5820 offset = disp;
5821 else
5822 output_addr_const (file, disp);
5825 putc ('[', file);
5826 if (base)
5828 PRINT_REG (base, 0, file);
5829 if (offset)
5831 if (INTVAL (offset) >= 0)
5832 putc ('+', file);
5833 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
5836 else if (offset)
5837 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
5838 else
5839 putc ('0', file);
5841 if (index)
5843 putc ('+', file);
5844 PRINT_REG (index, 0, file);
5845 if (scale != 1)
5846 fprintf (file, "*%d", scale);
5848 putc (']', file);
5853 /* Split one or more DImode RTL references into pairs of SImode
5854 references. The RTL can be REG, offsettable MEM, integer constant, or
5855 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
5856 split and "num" is its length. lo_half and hi_half are output arrays
5857 that parallel "operands". */
5859 void
5860 split_di (operands, num, lo_half, hi_half)
5861 rtx operands[];
5862 int num;
5863 rtx lo_half[], hi_half[];
5865 while (num--)
5867 rtx op = operands[num];
5868 if (CONSTANT_P (op))
5869 split_double (op, &lo_half[num], &hi_half[num]);
5870 else if (! reload_completed)
5872 lo_half[num] = gen_lowpart (SImode, op);
5873 hi_half[num] = gen_highpart (SImode, op);
5875 else if (GET_CODE (op) == REG)
5877 if (TARGET_64BIT)
5878 abort();
5879 lo_half[num] = gen_rtx_REG (SImode, REGNO (op));
5880 hi_half[num] = gen_rtx_REG (SImode, REGNO (op) + 1);
5882 else if (offsettable_memref_p (op))
5884 lo_half[num] = adjust_address (op, SImode, 0);
5885 hi_half[num] = adjust_address (op, SImode, 4);
5887 else
5888 abort ();
5892 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
5893 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
5894 is the expression of the binary operation. The output may either be
5895 emitted here, or returned to the caller, like all output_* functions.
5897 There is no guarantee that the operands are the same mode, as they
5898 might be within FLOAT or FLOAT_EXTEND expressions. */
5900 #ifndef SYSV386_COMPAT
5901 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
5902 wants to fix the assemblers because that causes incompatibility
5903 with gcc. No-one wants to fix gcc because that causes
5904 incompatibility with assemblers... You can use the option of
5905 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
5906 #define SYSV386_COMPAT 1
5907 #endif
5909 const char *
5910 output_387_binary_op (insn, operands)
5911 rtx insn;
5912 rtx *operands;
5914 static char buf[30];
5915 const char *p;
5916 const char *ssep;
5917 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
5919 #ifdef ENABLE_CHECKING
5920 /* Even if we do not want to check the inputs, this documents input
5921 constraints. Which helps in understanding the following code. */
5922 if (STACK_REG_P (operands[0])
5923 && ((REG_P (operands[1])
5924 && REGNO (operands[0]) == REGNO (operands[1])
5925 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
5926 || (REG_P (operands[2])
5927 && REGNO (operands[0]) == REGNO (operands[2])
5928 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
5929 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
5930 ; /* ok */
5931 else if (!is_sse)
5932 abort ();
5933 #endif
5935 switch (GET_CODE (operands[3]))
5937 case PLUS:
5938 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
5939 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
5940 p = "fiadd";
5941 else
5942 p = "fadd";
5943 ssep = "add";
5944 break;
5946 case MINUS:
5947 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
5948 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
5949 p = "fisub";
5950 else
5951 p = "fsub";
5952 ssep = "sub";
5953 break;
5955 case MULT:
5956 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
5957 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
5958 p = "fimul";
5959 else
5960 p = "fmul";
5961 ssep = "mul";
5962 break;
5964 case DIV:
5965 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
5966 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
5967 p = "fidiv";
5968 else
5969 p = "fdiv";
5970 ssep = "div";
5971 break;
5973 default:
5974 abort ();
5977 if (is_sse)
5979 strcpy (buf, ssep);
5980 if (GET_MODE (operands[0]) == SFmode)
5981 strcat (buf, "ss\t{%2, %0|%0, %2}");
5982 else
5983 strcat (buf, "sd\t{%2, %0|%0, %2}");
5984 return buf;
5986 strcpy (buf, p);
5988 switch (GET_CODE (operands[3]))
5990 case MULT:
5991 case PLUS:
5992 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
5994 rtx temp = operands[2];
5995 operands[2] = operands[1];
5996 operands[1] = temp;
5999 /* know operands[0] == operands[1]. */
6001 if (GET_CODE (operands[2]) == MEM)
6003 p = "%z2\t%2";
6004 break;
6007 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6009 if (STACK_TOP_P (operands[0]))
6010 /* How is it that we are storing to a dead operand[2]?
6011 Well, presumably operands[1] is dead too. We can't
6012 store the result to st(0) as st(0) gets popped on this
6013 instruction. Instead store to operands[2] (which I
6014 think has to be st(1)). st(1) will be popped later.
6015 gcc <= 2.8.1 didn't have this check and generated
6016 assembly code that the Unixware assembler rejected. */
6017 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6018 else
6019 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
6020 break;
6023 if (STACK_TOP_P (operands[0]))
6024 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
6025 else
6026 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
6027 break;
6029 case MINUS:
6030 case DIV:
6031 if (GET_CODE (operands[1]) == MEM)
6033 p = "r%z1\t%1";
6034 break;
6037 if (GET_CODE (operands[2]) == MEM)
6039 p = "%z2\t%2";
6040 break;
6043 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6045 #if SYSV386_COMPAT
6046 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
6047 derived assemblers, confusingly reverse the direction of
6048 the operation for fsub{r} and fdiv{r} when the
6049 destination register is not st(0). The Intel assembler
6050 doesn't have this brain damage. Read !SYSV386_COMPAT to
6051 figure out what the hardware really does. */
6052 if (STACK_TOP_P (operands[0]))
6053 p = "{p\t%0, %2|rp\t%2, %0}";
6054 else
6055 p = "{rp\t%2, %0|p\t%0, %2}";
6056 #else
6057 if (STACK_TOP_P (operands[0]))
6058 /* As above for fmul/fadd, we can't store to st(0). */
6059 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6060 else
6061 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
6062 #endif
6063 break;
6066 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
6068 #if SYSV386_COMPAT
6069 if (STACK_TOP_P (operands[0]))
6070 p = "{rp\t%0, %1|p\t%1, %0}";
6071 else
6072 p = "{p\t%1, %0|rp\t%0, %1}";
6073 #else
6074 if (STACK_TOP_P (operands[0]))
6075 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
6076 else
6077 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
6078 #endif
6079 break;
6082 if (STACK_TOP_P (operands[0]))
6084 if (STACK_TOP_P (operands[1]))
6085 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
6086 else
6087 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
6088 break;
6090 else if (STACK_TOP_P (operands[1]))
6092 #if SYSV386_COMPAT
6093 p = "{\t%1, %0|r\t%0, %1}";
6094 #else
6095 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
6096 #endif
6098 else
6100 #if SYSV386_COMPAT
6101 p = "{r\t%2, %0|\t%0, %2}";
6102 #else
6103 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
6104 #endif
6106 break;
6108 default:
6109 abort ();
6112 strcat (buf, p);
6113 return buf;
6116 /* Output code to initialize control word copies used by
6117 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
6118 is set to control word rounding downwards. */
6119 void
6120 emit_i387_cw_initialization (normal, round_down)
6121 rtx normal, round_down;
6123 rtx reg = gen_reg_rtx (HImode);
6125 emit_insn (gen_x86_fnstcw_1 (normal));
6126 emit_move_insn (reg, normal);
6127 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
6128 && !TARGET_64BIT)
6129 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
6130 else
6131 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00)));
6132 emit_move_insn (round_down, reg);
6135 /* Output code for INSN to convert a float to a signed int. OPERANDS
6136 are the insn operands. The output may be [HSD]Imode and the input
6137 operand may be [SDX]Fmode. */
6139 const char *
6140 output_fix_trunc (insn, operands)
6141 rtx insn;
6142 rtx *operands;
6144 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
6145 int dimode_p = GET_MODE (operands[0]) == DImode;
6147 /* Jump through a hoop or two for DImode, since the hardware has no
6148 non-popping instruction. We used to do this a different way, but
6149 that was somewhat fragile and broke with post-reload splitters. */
6150 if (dimode_p && !stack_top_dies)
6151 output_asm_insn ("fld\t%y1", operands);
6153 if (!STACK_TOP_P (operands[1]))
6154 abort ();
6156 if (GET_CODE (operands[0]) != MEM)
6157 abort ();
6159 output_asm_insn ("fldcw\t%3", operands);
6160 if (stack_top_dies || dimode_p)
6161 output_asm_insn ("fistp%z0\t%0", operands);
6162 else
6163 output_asm_insn ("fist%z0\t%0", operands);
6164 output_asm_insn ("fldcw\t%2", operands);
6166 return "";
6169 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
6170 should be used and 2 when fnstsw should be used. UNORDERED_P is true
6171 when fucom should be used. */
6173 const char *
6174 output_fp_compare (insn, operands, eflags_p, unordered_p)
6175 rtx insn;
6176 rtx *operands;
6177 int eflags_p, unordered_p;
6179 int stack_top_dies;
6180 rtx cmp_op0 = operands[0];
6181 rtx cmp_op1 = operands[1];
6182 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
6184 if (eflags_p == 2)
6186 cmp_op0 = cmp_op1;
6187 cmp_op1 = operands[2];
6189 if (is_sse)
6191 if (GET_MODE (operands[0]) == SFmode)
6192 if (unordered_p)
6193 return "ucomiss\t{%1, %0|%0, %1}";
6194 else
6195 return "comiss\t{%1, %0|%0, %y}";
6196 else
6197 if (unordered_p)
6198 return "ucomisd\t{%1, %0|%0, %1}";
6199 else
6200 return "comisd\t{%1, %0|%0, %y}";
6203 if (! STACK_TOP_P (cmp_op0))
6204 abort ();
6206 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
6208 if (STACK_REG_P (cmp_op1)
6209 && stack_top_dies
6210 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
6211 && REGNO (cmp_op1) != FIRST_STACK_REG)
6213 /* If both the top of the 387 stack dies, and the other operand
6214 is also a stack register that dies, then this must be a
6215 `fcompp' float compare */
6217 if (eflags_p == 1)
6219 /* There is no double popping fcomi variant. Fortunately,
6220 eflags is immune from the fstp's cc clobbering. */
6221 if (unordered_p)
6222 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
6223 else
6224 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
6225 return "fstp\t%y0";
6227 else
6229 if (eflags_p == 2)
6231 if (unordered_p)
6232 return "fucompp\n\tfnstsw\t%0";
6233 else
6234 return "fcompp\n\tfnstsw\t%0";
6236 else
6238 if (unordered_p)
6239 return "fucompp";
6240 else
6241 return "fcompp";
6245 else
6247 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
6249 static const char * const alt[24] =
6251 "fcom%z1\t%y1",
6252 "fcomp%z1\t%y1",
6253 "fucom%z1\t%y1",
6254 "fucomp%z1\t%y1",
6256 "ficom%z1\t%y1",
6257 "ficomp%z1\t%y1",
6258 NULL,
6259 NULL,
6261 "fcomi\t{%y1, %0|%0, %y1}",
6262 "fcomip\t{%y1, %0|%0, %y1}",
6263 "fucomi\t{%y1, %0|%0, %y1}",
6264 "fucomip\t{%y1, %0|%0, %y1}",
6266 NULL,
6267 NULL,
6268 NULL,
6269 NULL,
6271 "fcom%z2\t%y2\n\tfnstsw\t%0",
6272 "fcomp%z2\t%y2\n\tfnstsw\t%0",
6273 "fucom%z2\t%y2\n\tfnstsw\t%0",
6274 "fucomp%z2\t%y2\n\tfnstsw\t%0",
6276 "ficom%z2\t%y2\n\tfnstsw\t%0",
6277 "ficomp%z2\t%y2\n\tfnstsw\t%0",
6278 NULL,
6279 NULL
6282 int mask;
6283 const char *ret;
6285 mask = eflags_p << 3;
6286 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
6287 mask |= unordered_p << 1;
6288 mask |= stack_top_dies;
6290 if (mask >= 24)
6291 abort ();
6292 ret = alt[mask];
6293 if (ret == NULL)
6294 abort ();
6296 return ret;
6300 /* Output assembler code to FILE to initialize basic-block profiling.
6302 If profile_block_flag == 2
6304 Output code to call the subroutine `__bb_init_trace_func'
6305 and pass two parameters to it. The first parameter is
6306 the address of a block allocated in the object module.
6307 The second parameter is the number of the first basic block
6308 of the function.
6310 The name of the block is a local symbol made with this statement:
6312 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 0);
6314 Of course, since you are writing the definition of
6315 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
6316 can take a short cut in the definition of this macro and use the
6317 name that you know will result.
6319 The number of the first basic block of the function is
6320 passed to the macro in BLOCK_OR_LABEL.
6322 If described in a virtual assembler language the code to be
6323 output looks like:
6325 parameter1 <- LPBX0
6326 parameter2 <- BLOCK_OR_LABEL
6327 call __bb_init_trace_func
6329 else if profile_block_flag != 0
6331 Output code to call the subroutine `__bb_init_func'
6332 and pass one single parameter to it, which is the same
6333 as the first parameter to `__bb_init_trace_func'.
6335 The first word of this parameter is a flag which will be nonzero if
6336 the object module has already been initialized. So test this word
6337 first, and do not call `__bb_init_func' if the flag is nonzero.
6338 Note: When profile_block_flag == 2 the test need not be done
6339 but `__bb_init_trace_func' *must* be called.
6341 BLOCK_OR_LABEL may be used to generate a label number as a
6342 branch destination in case `__bb_init_func' will not be called.
6344 If described in a virtual assembler language the code to be
6345 output looks like:
6347 cmp (LPBX0),0
6348 jne local_label
6349 parameter1 <- LPBX0
6350 call __bb_init_func
6351 local_label:
6354 void
6355 ix86_output_function_block_profiler (file, block_or_label)
6356 FILE *file;
6357 int block_or_label;
6359 static int num_func = 0;
6360 rtx xops[8];
6361 char block_table[80], false_label[80];
6363 ASM_GENERATE_INTERNAL_LABEL (block_table, "LPBX", 0);
6365 xops[1] = gen_rtx_SYMBOL_REF (VOIDmode, block_table);
6366 xops[5] = stack_pointer_rtx;
6367 xops[7] = gen_rtx_REG (Pmode, 0); /* eax */
6369 CONSTANT_POOL_ADDRESS_P (xops[1]) = TRUE;
6371 switch (profile_block_flag)
6373 case 2:
6374 xops[2] = GEN_INT (block_or_label);
6375 xops[3] = gen_rtx_MEM (Pmode,
6376 gen_rtx_SYMBOL_REF (VOIDmode, "__bb_init_trace_func"));
6377 xops[6] = GEN_INT (8);
6379 output_asm_insn ("push{l}\t%2", xops);
6380 if (!flag_pic)
6381 output_asm_insn ("push{l}\t%1", xops);
6382 else
6384 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a1}", xops);
6385 output_asm_insn ("push{l}\t%7", xops);
6387 output_asm_insn ("call\t%P3", xops);
6388 output_asm_insn ("add{l}\t{%6, %5|%5, %6}", xops);
6389 break;
6391 default:
6392 ASM_GENERATE_INTERNAL_LABEL (false_label, "LPBZ", num_func);
6394 xops[0] = const0_rtx;
6395 xops[2] = gen_rtx_MEM (Pmode,
6396 gen_rtx_SYMBOL_REF (VOIDmode, false_label));
6397 xops[3] = gen_rtx_MEM (Pmode,
6398 gen_rtx_SYMBOL_REF (VOIDmode, "__bb_init_func"));
6399 xops[4] = gen_rtx_MEM (Pmode, xops[1]);
6400 xops[6] = GEN_INT (4);
6402 CONSTANT_POOL_ADDRESS_P (xops[2]) = TRUE;
6404 output_asm_insn ("cmp{l}\t{%0, %4|%4, %0}", xops);
6405 output_asm_insn ("jne\t%2", xops);
6407 if (!flag_pic)
6408 output_asm_insn ("push{l}\t%1", xops);
6409 else
6411 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a2}", xops);
6412 output_asm_insn ("push{l}\t%7", xops);
6414 output_asm_insn ("call\t%P3", xops);
6415 output_asm_insn ("add{l}\t{%6, %5|%5, %6}", xops);
6416 ASM_OUTPUT_INTERNAL_LABEL (file, "LPBZ", num_func);
6417 num_func++;
6418 break;
6422 /* Output assembler code to FILE to increment a counter associated
6423 with basic block number BLOCKNO.
6425 If profile_block_flag == 2
6427 Output code to initialize the global structure `__bb' and
6428 call the function `__bb_trace_func' which will increment the
6429 counter.
6431 `__bb' consists of two words. In the first word the number
6432 of the basic block has to be stored. In the second word
6433 the address of a block allocated in the object module
6434 has to be stored.
6436 The basic block number is given by BLOCKNO.
6438 The address of the block is given by the label created with
6440 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 0);
6442 by FUNCTION_BLOCK_PROFILER.
6444 Of course, since you are writing the definition of
6445 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
6446 can take a short cut in the definition of this macro and use the
6447 name that you know will result.
6449 If described in a virtual assembler language the code to be
6450 output looks like:
6452 move BLOCKNO -> (__bb)
6453 move LPBX0 -> (__bb+4)
6454 call __bb_trace_func
6456 Note that function `__bb_trace_func' must not change the
6457 machine state, especially the flag register. To grant
6458 this, you must output code to save and restore registers
6459 either in this macro or in the macros MACHINE_STATE_SAVE
6460 and MACHINE_STATE_RESTORE. The last two macros will be
6461 used in the function `__bb_trace_func', so you must make
6462 sure that the function prologue does not change any
6463 register prior to saving it with MACHINE_STATE_SAVE.
6465 else if profile_block_flag != 0
6467 Output code to increment the counter directly.
6468 Basic blocks are numbered separately from zero within each
6469 compiled object module. The count associated with block number
6470 BLOCKNO is at index BLOCKNO in an array of words; the name of
6471 this array is a local symbol made with this statement:
6473 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 2);
6475 Of course, since you are writing the definition of
6476 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
6477 can take a short cut in the definition of this macro and use the
6478 name that you know will result.
6480 If described in a virtual assembler language the code to be
6481 output looks like:
6483 inc (LPBX2+4*BLOCKNO)
6486 void
6487 ix86_output_block_profiler (file, blockno)
6488 FILE *file ATTRIBUTE_UNUSED;
6489 int blockno;
6491 rtx xops[8], cnt_rtx;
6492 char counts[80];
6493 char *block_table = counts;
6495 switch (profile_block_flag)
6497 case 2:
6498 ASM_GENERATE_INTERNAL_LABEL (block_table, "LPBX", 0);
6500 xops[1] = gen_rtx_SYMBOL_REF (VOIDmode, block_table);
6501 xops[2] = GEN_INT (blockno);
6502 xops[3] = gen_rtx_MEM (Pmode,
6503 gen_rtx_SYMBOL_REF (VOIDmode, "__bb_trace_func"));
6504 xops[4] = gen_rtx_SYMBOL_REF (VOIDmode, "__bb");
6505 xops[5] = plus_constant (xops[4], 4);
6506 xops[0] = gen_rtx_MEM (SImode, xops[4]);
6507 xops[6] = gen_rtx_MEM (SImode, xops[5]);
6509 CONSTANT_POOL_ADDRESS_P (xops[1]) = TRUE;
6511 output_asm_insn ("pushf", xops);
6512 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
6513 if (flag_pic)
6515 xops[7] = gen_rtx_REG (Pmode, 0); /* eax */
6516 output_asm_insn ("push{l}\t%7", xops);
6517 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a1}", xops);
6518 output_asm_insn ("mov{l}\t{%7, %6|%6, %7}", xops);
6519 output_asm_insn ("pop{l}\t%7", xops);
6521 else
6522 output_asm_insn ("mov{l}\t{%1, %6|%6, %1}", xops);
6523 output_asm_insn ("call\t%P3", xops);
6524 output_asm_insn ("popf", xops);
6526 break;
6528 default:
6529 ASM_GENERATE_INTERNAL_LABEL (counts, "LPBX", 2);
6530 cnt_rtx = gen_rtx_SYMBOL_REF (VOIDmode, counts);
6531 SYMBOL_REF_FLAG (cnt_rtx) = TRUE;
6533 if (blockno)
6534 cnt_rtx = plus_constant (cnt_rtx, blockno*4);
6536 if (flag_pic)
6537 cnt_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, cnt_rtx);
6539 xops[0] = gen_rtx_MEM (SImode, cnt_rtx);
6540 output_asm_insn ("inc{l}\t%0", xops);
6542 break;
6546 void
6547 ix86_expand_move (mode, operands)
6548 enum machine_mode mode;
6549 rtx operands[];
6551 int strict = (reload_in_progress || reload_completed);
6552 rtx insn;
6554 if (flag_pic && mode == Pmode && symbolic_operand (operands[1], Pmode))
6556 /* Emit insns to move operands[1] into operands[0]. */
6558 if (GET_CODE (operands[0]) == MEM)
6559 operands[1] = force_reg (Pmode, operands[1]);
6560 else
6562 rtx temp = operands[0];
6563 if (GET_CODE (temp) != REG)
6564 temp = gen_reg_rtx (Pmode);
6565 temp = legitimize_pic_address (operands[1], temp);
6566 if (temp == operands[0])
6567 return;
6568 operands[1] = temp;
6571 else
6573 if (GET_CODE (operands[0]) == MEM
6574 && (GET_MODE (operands[0]) == QImode
6575 || !push_operand (operands[0], mode))
6576 && GET_CODE (operands[1]) == MEM)
6577 operands[1] = force_reg (mode, operands[1]);
6579 if (push_operand (operands[0], mode)
6580 && ! general_no_elim_operand (operands[1], mode))
6581 operands[1] = copy_to_mode_reg (mode, operands[1]);
6583 if (FLOAT_MODE_P (mode))
6585 /* If we are loading a floating point constant to a register,
6586 force the value to memory now, since we'll get better code
6587 out the back end. */
6589 if (strict)
6591 else if (GET_CODE (operands[1]) == CONST_DOUBLE
6592 && register_operand (operands[0], mode))
6593 operands[1] = validize_mem (force_const_mem (mode, operands[1]));
6597 insn = gen_rtx_SET (VOIDmode, operands[0], operands[1]);
6599 emit_insn (insn);
6602 /* Attempt to expand a binary operator. Make the expansion closer to the
6603 actual machine, then just general_operand, which will allow 3 separate
6604 memory references (one output, two input) in a single insn. */
6606 void
6607 ix86_expand_binary_operator (code, mode, operands)
6608 enum rtx_code code;
6609 enum machine_mode mode;
6610 rtx operands[];
6612 int matching_memory;
6613 rtx src1, src2, dst, op, clob;
6615 dst = operands[0];
6616 src1 = operands[1];
6617 src2 = operands[2];
6619 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
6620 if (GET_RTX_CLASS (code) == 'c'
6621 && (rtx_equal_p (dst, src2)
6622 || immediate_operand (src1, mode)))
6624 rtx temp = src1;
6625 src1 = src2;
6626 src2 = temp;
6629 /* If the destination is memory, and we do not have matching source
6630 operands, do things in registers. */
6631 matching_memory = 0;
6632 if (GET_CODE (dst) == MEM)
6634 if (rtx_equal_p (dst, src1))
6635 matching_memory = 1;
6636 else if (GET_RTX_CLASS (code) == 'c'
6637 && rtx_equal_p (dst, src2))
6638 matching_memory = 2;
6639 else
6640 dst = gen_reg_rtx (mode);
6643 /* Both source operands cannot be in memory. */
6644 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
6646 if (matching_memory != 2)
6647 src2 = force_reg (mode, src2);
6648 else
6649 src1 = force_reg (mode, src1);
6652 /* If the operation is not commutable, source 1 cannot be a constant
6653 or non-matching memory. */
6654 if ((CONSTANT_P (src1)
6655 || (!matching_memory && GET_CODE (src1) == MEM))
6656 && GET_RTX_CLASS (code) != 'c')
6657 src1 = force_reg (mode, src1);
6659 /* If optimizing, copy to regs to improve CSE */
6660 if (optimize && ! no_new_pseudos)
6662 if (GET_CODE (dst) == MEM)
6663 dst = gen_reg_rtx (mode);
6664 if (GET_CODE (src1) == MEM)
6665 src1 = force_reg (mode, src1);
6666 if (GET_CODE (src2) == MEM)
6667 src2 = force_reg (mode, src2);
6670 /* Emit the instruction. */
6672 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
6673 if (reload_in_progress)
6675 /* Reload doesn't know about the flags register, and doesn't know that
6676 it doesn't want to clobber it. We can only do this with PLUS. */
6677 if (code != PLUS)
6678 abort ();
6679 emit_insn (op);
6681 else
6683 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
6684 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
6687 /* Fix up the destination if needed. */
6688 if (dst != operands[0])
6689 emit_move_insn (operands[0], dst);
6692 /* Return TRUE or FALSE depending on whether the binary operator meets the
6693 appropriate constraints. */
6696 ix86_binary_operator_ok (code, mode, operands)
6697 enum rtx_code code;
6698 enum machine_mode mode ATTRIBUTE_UNUSED;
6699 rtx operands[3];
6701 /* Both source operands cannot be in memory. */
6702 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
6703 return 0;
6704 /* If the operation is not commutable, source 1 cannot be a constant. */
6705 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
6706 return 0;
6707 /* If the destination is memory, we must have a matching source operand. */
6708 if (GET_CODE (operands[0]) == MEM
6709 && ! (rtx_equal_p (operands[0], operands[1])
6710 || (GET_RTX_CLASS (code) == 'c'
6711 && rtx_equal_p (operands[0], operands[2]))))
6712 return 0;
6713 /* If the operation is not commutable and the source 1 is memory, we must
6714 have a matching destionation. */
6715 if (GET_CODE (operands[1]) == MEM
6716 && GET_RTX_CLASS (code) != 'c'
6717 && ! rtx_equal_p (operands[0], operands[1]))
6718 return 0;
6719 return 1;
6722 /* Attempt to expand a unary operator. Make the expansion closer to the
6723 actual machine, then just general_operand, which will allow 2 separate
6724 memory references (one output, one input) in a single insn. */
6726 void
6727 ix86_expand_unary_operator (code, mode, operands)
6728 enum rtx_code code;
6729 enum machine_mode mode;
6730 rtx operands[];
6732 int matching_memory;
6733 rtx src, dst, op, clob;
6735 dst = operands[0];
6736 src = operands[1];
6738 /* If the destination is memory, and we do not have matching source
6739 operands, do things in registers. */
6740 matching_memory = 0;
6741 if (GET_CODE (dst) == MEM)
6743 if (rtx_equal_p (dst, src))
6744 matching_memory = 1;
6745 else
6746 dst = gen_reg_rtx (mode);
6749 /* When source operand is memory, destination must match. */
6750 if (!matching_memory && GET_CODE (src) == MEM)
6751 src = force_reg (mode, src);
6753 /* If optimizing, copy to regs to improve CSE */
6754 if (optimize && ! no_new_pseudos)
6756 if (GET_CODE (dst) == MEM)
6757 dst = gen_reg_rtx (mode);
6758 if (GET_CODE (src) == MEM)
6759 src = force_reg (mode, src);
6762 /* Emit the instruction. */
6764 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
6765 if (reload_in_progress || code == NOT)
6767 /* Reload doesn't know about the flags register, and doesn't know that
6768 it doesn't want to clobber it. */
6769 if (code != NOT)
6770 abort ();
6771 emit_insn (op);
6773 else
6775 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
6776 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
6779 /* Fix up the destination if needed. */
6780 if (dst != operands[0])
6781 emit_move_insn (operands[0], dst);
6784 /* Return TRUE or FALSE depending on whether the unary operator meets the
6785 appropriate constraints. */
6788 ix86_unary_operator_ok (code, mode, operands)
6789 enum rtx_code code ATTRIBUTE_UNUSED;
6790 enum machine_mode mode ATTRIBUTE_UNUSED;
6791 rtx operands[2] ATTRIBUTE_UNUSED;
6793 /* If one of operands is memory, source and destination must match. */
6794 if ((GET_CODE (operands[0]) == MEM
6795 || GET_CODE (operands[1]) == MEM)
6796 && ! rtx_equal_p (operands[0], operands[1]))
6797 return FALSE;
6798 return TRUE;
6801 /* Return TRUE or FALSE depending on whether the first SET in INSN
6802 has source and destination with matching CC modes, and that the
6803 CC mode is at least as constrained as REQ_MODE. */
6806 ix86_match_ccmode (insn, req_mode)
6807 rtx insn;
6808 enum machine_mode req_mode;
6810 rtx set;
6811 enum machine_mode set_mode;
6813 set = PATTERN (insn);
6814 if (GET_CODE (set) == PARALLEL)
6815 set = XVECEXP (set, 0, 0);
6816 if (GET_CODE (set) != SET)
6817 abort ();
6818 if (GET_CODE (SET_SRC (set)) != COMPARE)
6819 abort ();
6821 set_mode = GET_MODE (SET_DEST (set));
6822 switch (set_mode)
6824 case CCNOmode:
6825 if (req_mode != CCNOmode
6826 && (req_mode != CCmode
6827 || XEXP (SET_SRC (set), 1) != const0_rtx))
6828 return 0;
6829 break;
6830 case CCmode:
6831 if (req_mode == CCGCmode)
6832 return 0;
6833 /* FALLTHRU */
6834 case CCGCmode:
6835 if (req_mode == CCGOCmode || req_mode == CCNOmode)
6836 return 0;
6837 /* FALLTHRU */
6838 case CCGOCmode:
6839 if (req_mode == CCZmode)
6840 return 0;
6841 /* FALLTHRU */
6842 case CCZmode:
6843 break;
6845 default:
6846 abort ();
6849 return (GET_MODE (SET_SRC (set)) == set_mode);
6852 /* Generate insn patterns to do an integer compare of OPERANDS. */
6854 static rtx
6855 ix86_expand_int_compare (code, op0, op1)
6856 enum rtx_code code;
6857 rtx op0, op1;
6859 enum machine_mode cmpmode;
6860 rtx tmp, flags;
6862 cmpmode = SELECT_CC_MODE (code, op0, op1);
6863 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
6865 /* This is very simple, but making the interface the same as in the
6866 FP case makes the rest of the code easier. */
6867 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
6868 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
6870 /* Return the test that should be put into the flags user, i.e.
6871 the bcc, scc, or cmov instruction. */
6872 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
6875 /* Figure out whether to use ordered or unordered fp comparisons.
6876 Return the appropriate mode to use. */
6878 enum machine_mode
6879 ix86_fp_compare_mode (code)
6880 enum rtx_code code ATTRIBUTE_UNUSED;
6882 /* ??? In order to make all comparisons reversible, we do all comparisons
6883 non-trapping when compiling for IEEE. Once gcc is able to distinguish
6884 all forms trapping and nontrapping comparisons, we can make inequality
6885 comparisons trapping again, since it results in better code when using
6886 FCOM based compares. */
6887 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
6890 enum machine_mode
6891 ix86_cc_mode (code, op0, op1)
6892 enum rtx_code code;
6893 rtx op0, op1;
6895 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
6896 return ix86_fp_compare_mode (code);
6897 switch (code)
6899 /* Only zero flag is needed. */
6900 case EQ: /* ZF=0 */
6901 case NE: /* ZF!=0 */
6902 return CCZmode;
6903 /* Codes needing carry flag. */
6904 case GEU: /* CF=0 */
6905 case GTU: /* CF=0 & ZF=0 */
6906 case LTU: /* CF=1 */
6907 case LEU: /* CF=1 | ZF=1 */
6908 return CCmode;
6909 /* Codes possibly doable only with sign flag when
6910 comparing against zero. */
6911 case GE: /* SF=OF or SF=0 */
6912 case LT: /* SF<>OF or SF=1 */
6913 if (op1 == const0_rtx)
6914 return CCGOCmode;
6915 else
6916 /* For other cases Carry flag is not required. */
6917 return CCGCmode;
6918 /* Codes doable only with sign flag when comparing
6919 against zero, but we miss jump instruction for it
6920 so we need to use relational tests agains overflow
6921 that thus needs to be zero. */
6922 case GT: /* ZF=0 & SF=OF */
6923 case LE: /* ZF=1 | SF<>OF */
6924 if (op1 == const0_rtx)
6925 return CCNOmode;
6926 else
6927 return CCGCmode;
6928 default:
6929 abort ();
6933 /* Return true if we should use an FCOMI instruction for this fp comparison. */
6936 ix86_use_fcomi_compare (code)
6937 enum rtx_code code ATTRIBUTE_UNUSED;
6939 enum rtx_code swapped_code = swap_condition (code);
6940 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
6941 || (ix86_fp_comparison_cost (swapped_code)
6942 == ix86_fp_comparison_fcomi_cost (swapped_code)));
6945 /* Swap, force into registers, or otherwise massage the two operands
6946 to a fp comparison. The operands are updated in place; the new
6947 comparsion code is returned. */
6949 static enum rtx_code
6950 ix86_prepare_fp_compare_args (code, pop0, pop1)
6951 enum rtx_code code;
6952 rtx *pop0, *pop1;
6954 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
6955 rtx op0 = *pop0, op1 = *pop1;
6956 enum machine_mode op_mode = GET_MODE (op0);
6957 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
6959 /* All of the unordered compare instructions only work on registers.
6960 The same is true of the XFmode compare instructions. The same is
6961 true of the fcomi compare instructions. */
6963 if (!is_sse
6964 && (fpcmp_mode == CCFPUmode
6965 || op_mode == XFmode
6966 || op_mode == TFmode
6967 || ix86_use_fcomi_compare (code)))
6969 op0 = force_reg (op_mode, op0);
6970 op1 = force_reg (op_mode, op1);
6972 else
6974 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
6975 things around if they appear profitable, otherwise force op0
6976 into a register. */
6978 if (standard_80387_constant_p (op0) == 0
6979 || (GET_CODE (op0) == MEM
6980 && ! (standard_80387_constant_p (op1) == 0
6981 || GET_CODE (op1) == MEM)))
6983 rtx tmp;
6984 tmp = op0, op0 = op1, op1 = tmp;
6985 code = swap_condition (code);
6988 if (GET_CODE (op0) != REG)
6989 op0 = force_reg (op_mode, op0);
6991 if (CONSTANT_P (op1))
6993 if (standard_80387_constant_p (op1))
6994 op1 = force_reg (op_mode, op1);
6995 else
6996 op1 = validize_mem (force_const_mem (op_mode, op1));
7000 /* Try to rearrange the comparison to make it cheaper. */
7001 if (ix86_fp_comparison_cost (code)
7002 > ix86_fp_comparison_cost (swap_condition (code))
7003 && (GET_CODE (op0) == REG || !reload_completed))
7005 rtx tmp;
7006 tmp = op0, op0 = op1, op1 = tmp;
7007 code = swap_condition (code);
7008 if (GET_CODE (op0) != REG)
7009 op0 = force_reg (op_mode, op0);
7012 *pop0 = op0;
7013 *pop1 = op1;
7014 return code;
7017 /* Convert comparison codes we use to represent FP comparison to integer
7018 code that will result in proper branch. Return UNKNOWN if no such code
7019 is available. */
7020 static enum rtx_code
7021 ix86_fp_compare_code_to_integer (code)
7022 enum rtx_code code;
7024 switch (code)
7026 case GT:
7027 return GTU;
7028 case GE:
7029 return GEU;
7030 case ORDERED:
7031 case UNORDERED:
7032 return code;
7033 break;
7034 case UNEQ:
7035 return EQ;
7036 break;
7037 case UNLT:
7038 return LTU;
7039 break;
7040 case UNLE:
7041 return LEU;
7042 break;
7043 case LTGT:
7044 return NE;
7045 break;
7046 default:
7047 return UNKNOWN;
7051 /* Split comparison code CODE into comparisons we can do using branch
7052 instructions. BYPASS_CODE is comparison code for branch that will
7053 branch around FIRST_CODE and SECOND_CODE. If some of branches
7054 is not required, set value to NIL.
7055 We never require more than two branches. */
7056 static void
7057 ix86_fp_comparison_codes (code, bypass_code, first_code, second_code)
7058 enum rtx_code code, *bypass_code, *first_code, *second_code;
7060 *first_code = code;
7061 *bypass_code = NIL;
7062 *second_code = NIL;
7064 /* The fcomi comparison sets flags as follows:
7066 cmp ZF PF CF
7067 > 0 0 0
7068 < 0 0 1
7069 = 1 0 0
7070 un 1 1 1 */
7072 switch (code)
7074 case GT: /* GTU - CF=0 & ZF=0 */
7075 case GE: /* GEU - CF=0 */
7076 case ORDERED: /* PF=0 */
7077 case UNORDERED: /* PF=1 */
7078 case UNEQ: /* EQ - ZF=1 */
7079 case UNLT: /* LTU - CF=1 */
7080 case UNLE: /* LEU - CF=1 | ZF=1 */
7081 case LTGT: /* EQ - ZF=0 */
7082 break;
7083 case LT: /* LTU - CF=1 - fails on unordered */
7084 *first_code = UNLT;
7085 *bypass_code = UNORDERED;
7086 break;
7087 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
7088 *first_code = UNLE;
7089 *bypass_code = UNORDERED;
7090 break;
7091 case EQ: /* EQ - ZF=1 - fails on unordered */
7092 *first_code = UNEQ;
7093 *bypass_code = UNORDERED;
7094 break;
7095 case NE: /* NE - ZF=0 - fails on unordered */
7096 *first_code = LTGT;
7097 *second_code = UNORDERED;
7098 break;
7099 case UNGE: /* GEU - CF=0 - fails on unordered */
7100 *first_code = GE;
7101 *second_code = UNORDERED;
7102 break;
7103 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
7104 *first_code = GT;
7105 *second_code = UNORDERED;
7106 break;
7107 default:
7108 abort ();
7110 if (!TARGET_IEEE_FP)
7112 *second_code = NIL;
7113 *bypass_code = NIL;
7117 /* Return cost of comparison done fcom + arithmetics operations on AX.
7118 All following functions do use number of instructions as an cost metrics.
7119 In future this should be tweaked to compute bytes for optimize_size and
7120 take into account performance of various instructions on various CPUs. */
7121 static int
7122 ix86_fp_comparison_arithmetics_cost (code)
7123 enum rtx_code code;
7125 if (!TARGET_IEEE_FP)
7126 return 4;
7127 /* The cost of code output by ix86_expand_fp_compare. */
7128 switch (code)
7130 case UNLE:
7131 case UNLT:
7132 case LTGT:
7133 case GT:
7134 case GE:
7135 case UNORDERED:
7136 case ORDERED:
7137 case UNEQ:
7138 return 4;
7139 break;
7140 case LT:
7141 case NE:
7142 case EQ:
7143 case UNGE:
7144 return 5;
7145 break;
7146 case LE:
7147 case UNGT:
7148 return 6;
7149 break;
7150 default:
7151 abort ();
7155 /* Return cost of comparison done using fcomi operation.
7156 See ix86_fp_comparison_arithmetics_cost for the metrics. */
7157 static int
7158 ix86_fp_comparison_fcomi_cost (code)
7159 enum rtx_code code;
7161 enum rtx_code bypass_code, first_code, second_code;
7162 /* Return arbitarily high cost when instruction is not supported - this
7163 prevents gcc from using it. */
7164 if (!TARGET_CMOVE)
7165 return 1024;
7166 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
7167 return (bypass_code != NIL || second_code != NIL) + 2;
7170 /* Return cost of comparison done using sahf operation.
7171 See ix86_fp_comparison_arithmetics_cost for the metrics. */
7172 static int
7173 ix86_fp_comparison_sahf_cost (code)
7174 enum rtx_code code;
7176 enum rtx_code bypass_code, first_code, second_code;
7177 /* Return arbitarily high cost when instruction is not preferred - this
7178 avoids gcc from using it. */
7179 if (!TARGET_USE_SAHF && !optimize_size)
7180 return 1024;
7181 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
7182 return (bypass_code != NIL || second_code != NIL) + 3;
7185 /* Compute cost of the comparison done using any method.
7186 See ix86_fp_comparison_arithmetics_cost for the metrics. */
7187 static int
7188 ix86_fp_comparison_cost (code)
7189 enum rtx_code code;
7191 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
7192 int min;
7194 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
7195 sahf_cost = ix86_fp_comparison_sahf_cost (code);
7197 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
7198 if (min > sahf_cost)
7199 min = sahf_cost;
7200 if (min > fcomi_cost)
7201 min = fcomi_cost;
7202 return min;
7205 /* Generate insn patterns to do a floating point compare of OPERANDS. */
7207 static rtx
7208 ix86_expand_fp_compare (code, op0, op1, scratch, second_test, bypass_test)
7209 enum rtx_code code;
7210 rtx op0, op1, scratch;
7211 rtx *second_test;
7212 rtx *bypass_test;
7214 enum machine_mode fpcmp_mode, intcmp_mode;
7215 rtx tmp, tmp2;
7216 int cost = ix86_fp_comparison_cost (code);
7217 enum rtx_code bypass_code, first_code, second_code;
7219 fpcmp_mode = ix86_fp_compare_mode (code);
7220 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
7222 if (second_test)
7223 *second_test = NULL_RTX;
7224 if (bypass_test)
7225 *bypass_test = NULL_RTX;
7227 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
7229 /* Do fcomi/sahf based test when profitable. */
7230 if ((bypass_code == NIL || bypass_test)
7231 && (second_code == NIL || second_test)
7232 && ix86_fp_comparison_arithmetics_cost (code) > cost)
7234 if (TARGET_CMOVE)
7236 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
7237 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
7238 tmp);
7239 emit_insn (tmp);
7241 else
7243 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
7244 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), 9);
7245 if (!scratch)
7246 scratch = gen_reg_rtx (HImode);
7247 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
7248 emit_insn (gen_x86_sahf_1 (scratch));
7251 /* The FP codes work out to act like unsigned. */
7252 intcmp_mode = fpcmp_mode;
7253 code = first_code;
7254 if (bypass_code != NIL)
7255 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
7256 gen_rtx_REG (intcmp_mode, FLAGS_REG),
7257 const0_rtx);
7258 if (second_code != NIL)
7259 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
7260 gen_rtx_REG (intcmp_mode, FLAGS_REG),
7261 const0_rtx);
7263 else
7265 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
7266 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
7267 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), 9);
7268 if (!scratch)
7269 scratch = gen_reg_rtx (HImode);
7270 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
7272 /* In the unordered case, we have to check C2 for NaN's, which
7273 doesn't happen to work out to anything nice combination-wise.
7274 So do some bit twiddling on the value we've got in AH to come
7275 up with an appropriate set of condition codes. */
7277 intcmp_mode = CCNOmode;
7278 switch (code)
7280 case GT:
7281 case UNGT:
7282 if (code == GT || !TARGET_IEEE_FP)
7284 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
7285 code = EQ;
7287 else
7289 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
7290 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
7291 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
7292 intcmp_mode = CCmode;
7293 code = GEU;
7295 break;
7296 case LT:
7297 case UNLT:
7298 if (code == LT && TARGET_IEEE_FP)
7300 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
7301 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
7302 intcmp_mode = CCmode;
7303 code = EQ;
7305 else
7307 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
7308 code = NE;
7310 break;
7311 case GE:
7312 case UNGE:
7313 if (code == GE || !TARGET_IEEE_FP)
7315 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
7316 code = EQ;
7318 else
7320 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
7321 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
7322 GEN_INT (0x01)));
7323 code = NE;
7325 break;
7326 case LE:
7327 case UNLE:
7328 if (code == LE && TARGET_IEEE_FP)
7330 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
7331 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
7332 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
7333 intcmp_mode = CCmode;
7334 code = LTU;
7336 else
7338 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
7339 code = NE;
7341 break;
7342 case EQ:
7343 case UNEQ:
7344 if (code == EQ && TARGET_IEEE_FP)
7346 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
7347 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
7348 intcmp_mode = CCmode;
7349 code = EQ;
7351 else
7353 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
7354 code = NE;
7355 break;
7357 break;
7358 case NE:
7359 case LTGT:
7360 if (code == NE && TARGET_IEEE_FP)
7362 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
7363 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
7364 GEN_INT (0x40)));
7365 code = NE;
7367 else
7369 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
7370 code = EQ;
7372 break;
7374 case UNORDERED:
7375 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
7376 code = NE;
7377 break;
7378 case ORDERED:
7379 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
7380 code = EQ;
7381 break;
7383 default:
7384 abort ();
7388 /* Return the test that should be put into the flags user, i.e.
7389 the bcc, scc, or cmov instruction. */
7390 return gen_rtx_fmt_ee (code, VOIDmode,
7391 gen_rtx_REG (intcmp_mode, FLAGS_REG),
7392 const0_rtx);
7396 ix86_expand_compare (code, second_test, bypass_test)
7397 enum rtx_code code;
7398 rtx *second_test, *bypass_test;
7400 rtx op0, op1, ret;
7401 op0 = ix86_compare_op0;
7402 op1 = ix86_compare_op1;
7404 if (second_test)
7405 *second_test = NULL_RTX;
7406 if (bypass_test)
7407 *bypass_test = NULL_RTX;
7409 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
7410 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
7411 second_test, bypass_test);
7412 else
7413 ret = ix86_expand_int_compare (code, op0, op1);
7415 return ret;
7418 /* Return true if the CODE will result in nontrivial jump sequence. */
7419 bool
7420 ix86_fp_jump_nontrivial_p (code)
7421 enum rtx_code code;
7423 enum rtx_code bypass_code, first_code, second_code;
7424 if (!TARGET_CMOVE)
7425 return true;
7426 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
7427 return bypass_code != NIL || second_code != NIL;
7430 void
7431 ix86_expand_branch (code, label)
7432 enum rtx_code code;
7433 rtx label;
7435 rtx tmp;
7437 switch (GET_MODE (ix86_compare_op0))
7439 case QImode:
7440 case HImode:
7441 case SImode:
7442 simple:
7443 tmp = ix86_expand_compare (code, NULL, NULL);
7444 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
7445 gen_rtx_LABEL_REF (VOIDmode, label),
7446 pc_rtx);
7447 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
7448 return;
7450 case SFmode:
7451 case DFmode:
7452 case XFmode:
7453 case TFmode:
7455 rtvec vec;
7456 int use_fcomi;
7457 enum rtx_code bypass_code, first_code, second_code;
7459 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
7460 &ix86_compare_op1);
7462 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
7464 /* Check whether we will use the natural sequence with one jump. If
7465 so, we can expand jump early. Otherwise delay expansion by
7466 creating compound insn to not confuse optimizers. */
7467 if (bypass_code == NIL && second_code == NIL
7468 && TARGET_CMOVE)
7470 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
7471 gen_rtx_LABEL_REF (VOIDmode, label),
7472 pc_rtx, NULL_RTX);
7474 else
7476 tmp = gen_rtx_fmt_ee (code, VOIDmode,
7477 ix86_compare_op0, ix86_compare_op1);
7478 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
7479 gen_rtx_LABEL_REF (VOIDmode, label),
7480 pc_rtx);
7481 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
7483 use_fcomi = ix86_use_fcomi_compare (code);
7484 vec = rtvec_alloc (3 + !use_fcomi);
7485 RTVEC_ELT (vec, 0) = tmp;
7486 RTVEC_ELT (vec, 1)
7487 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
7488 RTVEC_ELT (vec, 2)
7489 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
7490 if (! use_fcomi)
7491 RTVEC_ELT (vec, 3)
7492 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
7494 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
7496 return;
7499 case DImode:
7500 if (TARGET_64BIT)
7501 goto simple;
7502 /* Expand DImode branch into multiple compare+branch. */
7504 rtx lo[2], hi[2], label2;
7505 enum rtx_code code1, code2, code3;
7507 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
7509 tmp = ix86_compare_op0;
7510 ix86_compare_op0 = ix86_compare_op1;
7511 ix86_compare_op1 = tmp;
7512 code = swap_condition (code);
7514 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
7515 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
7517 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
7518 avoid two branches. This costs one extra insn, so disable when
7519 optimizing for size. */
7521 if ((code == EQ || code == NE)
7522 && (!optimize_size
7523 || hi[1] == const0_rtx || lo[1] == const0_rtx))
7525 rtx xor0, xor1;
7527 xor1 = hi[0];
7528 if (hi[1] != const0_rtx)
7529 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
7530 NULL_RTX, 0, OPTAB_WIDEN);
7532 xor0 = lo[0];
7533 if (lo[1] != const0_rtx)
7534 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
7535 NULL_RTX, 0, OPTAB_WIDEN);
7537 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
7538 NULL_RTX, 0, OPTAB_WIDEN);
7540 ix86_compare_op0 = tmp;
7541 ix86_compare_op1 = const0_rtx;
7542 ix86_expand_branch (code, label);
7543 return;
7546 /* Otherwise, if we are doing less-than or greater-or-equal-than,
7547 op1 is a constant and the low word is zero, then we can just
7548 examine the high word. */
7550 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
7551 switch (code)
7553 case LT: case LTU: case GE: case GEU:
7554 ix86_compare_op0 = hi[0];
7555 ix86_compare_op1 = hi[1];
7556 ix86_expand_branch (code, label);
7557 return;
7558 default:
7559 break;
7562 /* Otherwise, we need two or three jumps. */
7564 label2 = gen_label_rtx ();
7566 code1 = code;
7567 code2 = swap_condition (code);
7568 code3 = unsigned_condition (code);
7570 switch (code)
7572 case LT: case GT: case LTU: case GTU:
7573 break;
7575 case LE: code1 = LT; code2 = GT; break;
7576 case GE: code1 = GT; code2 = LT; break;
7577 case LEU: code1 = LTU; code2 = GTU; break;
7578 case GEU: code1 = GTU; code2 = LTU; break;
7580 case EQ: code1 = NIL; code2 = NE; break;
7581 case NE: code2 = NIL; break;
7583 default:
7584 abort ();
7588 * a < b =>
7589 * if (hi(a) < hi(b)) goto true;
7590 * if (hi(a) > hi(b)) goto false;
7591 * if (lo(a) < lo(b)) goto true;
7592 * false:
7595 ix86_compare_op0 = hi[0];
7596 ix86_compare_op1 = hi[1];
7598 if (code1 != NIL)
7599 ix86_expand_branch (code1, label);
7600 if (code2 != NIL)
7601 ix86_expand_branch (code2, label2);
7603 ix86_compare_op0 = lo[0];
7604 ix86_compare_op1 = lo[1];
7605 ix86_expand_branch (code3, label);
7607 if (code2 != NIL)
7608 emit_label (label2);
7609 return;
7612 default:
7613 abort ();
7617 /* Split branch based on floating point condition. */
7618 void
7619 ix86_split_fp_branch (code, op1, op2, target1, target2, tmp)
7620 enum rtx_code code;
7621 rtx op1, op2, target1, target2, tmp;
7623 rtx second, bypass;
7624 rtx label = NULL_RTX;
7625 rtx condition;
7626 int bypass_probability = -1, second_probability = -1, probability = -1;
7627 rtx i;
7629 if (target2 != pc_rtx)
7631 rtx tmp = target2;
7632 code = reverse_condition_maybe_unordered (code);
7633 target2 = target1;
7634 target1 = tmp;
7637 condition = ix86_expand_fp_compare (code, op1, op2,
7638 tmp, &second, &bypass);
7640 if (split_branch_probability >= 0)
7642 /* Distribute the probabilities across the jumps.
7643 Assume the BYPASS and SECOND to be always test
7644 for UNORDERED. */
7645 probability = split_branch_probability;
7647 /* Value of 1 is low enought to make no need for probability
7648 to be updated. Later we may run some experiments and see
7649 if unordered values are more frequent in practice. */
7650 if (bypass)
7651 bypass_probability = 1;
7652 if (second)
7653 second_probability = 1;
7655 if (bypass != NULL_RTX)
7657 label = gen_label_rtx ();
7658 i = emit_jump_insn (gen_rtx_SET
7659 (VOIDmode, pc_rtx,
7660 gen_rtx_IF_THEN_ELSE (VOIDmode,
7661 bypass,
7662 gen_rtx_LABEL_REF (VOIDmode,
7663 label),
7664 pc_rtx)));
7665 if (bypass_probability >= 0)
7666 REG_NOTES (i)
7667 = gen_rtx_EXPR_LIST (REG_BR_PROB,
7668 GEN_INT (bypass_probability),
7669 REG_NOTES (i));
7671 i = emit_jump_insn (gen_rtx_SET
7672 (VOIDmode, pc_rtx,
7673 gen_rtx_IF_THEN_ELSE (VOIDmode,
7674 condition, target1, target2)));
7675 if (probability >= 0)
7676 REG_NOTES (i)
7677 = gen_rtx_EXPR_LIST (REG_BR_PROB,
7678 GEN_INT (probability),
7679 REG_NOTES (i));
7680 if (second != NULL_RTX)
7682 i = emit_jump_insn (gen_rtx_SET
7683 (VOIDmode, pc_rtx,
7684 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
7685 target2)));
7686 if (second_probability >= 0)
7687 REG_NOTES (i)
7688 = gen_rtx_EXPR_LIST (REG_BR_PROB,
7689 GEN_INT (second_probability),
7690 REG_NOTES (i));
7692 if (label != NULL_RTX)
7693 emit_label (label);
7697 ix86_expand_setcc (code, dest)
7698 enum rtx_code code;
7699 rtx dest;
7701 rtx ret, tmp, tmpreg;
7702 rtx second_test, bypass_test;
7703 int type;
7705 if (GET_MODE (ix86_compare_op0) == DImode
7706 && !TARGET_64BIT)
7707 return 0; /* FAIL */
7709 /* Three modes of generation:
7710 0 -- destination does not overlap compare sources:
7711 clear dest first, emit strict_low_part setcc.
7712 1 -- destination does overlap compare sources:
7713 emit subreg setcc, zero extend.
7714 2 -- destination is in QImode:
7715 emit setcc only.
7717 We don't use mode 0 early in compilation because it confuses CSE.
7718 There are peepholes to turn mode 1 into mode 0 if things work out
7719 nicely after reload. */
7721 type = cse_not_expected ? 0 : 1;
7723 if (GET_MODE (dest) == QImode)
7724 type = 2;
7725 else if (reg_overlap_mentioned_p (dest, ix86_compare_op0)
7726 || reg_overlap_mentioned_p (dest, ix86_compare_op1))
7727 type = 1;
7729 if (type == 0)
7730 emit_move_insn (dest, const0_rtx);
7732 ret = ix86_expand_compare (code, &second_test, &bypass_test);
7733 PUT_MODE (ret, QImode);
7735 tmp = dest;
7736 tmpreg = dest;
7737 if (type == 0)
7739 tmp = gen_lowpart (QImode, dest);
7740 tmpreg = tmp;
7741 tmp = gen_rtx_STRICT_LOW_PART (VOIDmode, tmp);
7743 else if (type == 1)
7745 if (!cse_not_expected)
7746 tmp = gen_reg_rtx (QImode);
7747 else
7748 tmp = gen_lowpart (QImode, dest);
7749 tmpreg = tmp;
7752 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
7753 if (bypass_test || second_test)
7755 rtx test = second_test;
7756 int bypass = 0;
7757 rtx tmp2 = gen_reg_rtx (QImode);
7758 if (bypass_test)
7760 if (second_test)
7761 abort();
7762 test = bypass_test;
7763 bypass = 1;
7764 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
7766 PUT_MODE (test, QImode);
7767 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
7769 if (bypass)
7770 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
7771 else
7772 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
7775 if (type == 1)
7777 rtx clob;
7779 tmp = gen_rtx_ZERO_EXTEND (GET_MODE (dest), tmp);
7780 tmp = gen_rtx_SET (VOIDmode, dest, tmp);
7781 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
7782 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
7783 emit_insn (tmp);
7786 return 1; /* DONE */
7790 ix86_expand_int_movcc (operands)
7791 rtx operands[];
7793 enum rtx_code code = GET_CODE (operands[1]), compare_code;
7794 rtx compare_seq, compare_op;
7795 rtx second_test, bypass_test;
7797 /* When the compare code is not LTU or GEU, we can not use sbbl case.
7798 In case comparsion is done with immediate, we can convert it to LTU or
7799 GEU by altering the integer. */
7801 if ((code == LEU || code == GTU)
7802 && GET_CODE (ix86_compare_op1) == CONST_INT
7803 && GET_MODE (operands[0]) != HImode
7804 && (unsigned int)INTVAL (ix86_compare_op1) != 0xffffffff
7805 && GET_CODE (operands[2]) == CONST_INT
7806 && GET_CODE (operands[3]) == CONST_INT)
7808 if (code == LEU)
7809 code = LTU;
7810 else
7811 code = GEU;
7812 ix86_compare_op1 = GEN_INT (INTVAL (ix86_compare_op1) + 1);
7815 start_sequence ();
7816 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
7817 compare_seq = gen_sequence ();
7818 end_sequence ();
7820 compare_code = GET_CODE (compare_op);
7822 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
7823 HImode insns, we'd be swallowed in word prefix ops. */
7825 if (GET_MODE (operands[0]) != HImode
7826 && GET_MODE (operands[0]) != DImode
7827 && GET_CODE (operands[2]) == CONST_INT
7828 && GET_CODE (operands[3]) == CONST_INT)
7830 rtx out = operands[0];
7831 HOST_WIDE_INT ct = INTVAL (operands[2]);
7832 HOST_WIDE_INT cf = INTVAL (operands[3]);
7833 HOST_WIDE_INT diff;
7835 if ((compare_code == LTU || compare_code == GEU)
7836 && !second_test && !bypass_test)
7839 /* Detect overlap between destination and compare sources. */
7840 rtx tmp = out;
7842 /* To simplify rest of code, restrict to the GEU case. */
7843 if (compare_code == LTU)
7845 int tmp = ct;
7846 ct = cf;
7847 cf = tmp;
7848 compare_code = reverse_condition (compare_code);
7849 code = reverse_condition (code);
7851 diff = ct - cf;
7853 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
7854 || reg_overlap_mentioned_p (out, ix86_compare_op1))
7855 tmp = gen_reg_rtx (SImode);
7857 emit_insn (compare_seq);
7858 emit_insn (gen_x86_movsicc_0_m1 (tmp));
7860 if (diff == 1)
7863 * cmpl op0,op1
7864 * sbbl dest,dest
7865 * [addl dest, ct]
7867 * Size 5 - 8.
7869 if (ct)
7870 emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (ct)));
7872 else if (cf == -1)
7875 * cmpl op0,op1
7876 * sbbl dest,dest
7877 * orl $ct, dest
7879 * Size 8.
7881 emit_insn (gen_iorsi3 (tmp, tmp, GEN_INT (ct)));
7883 else if (diff == -1 && ct)
7886 * cmpl op0,op1
7887 * sbbl dest,dest
7888 * xorl $-1, dest
7889 * [addl dest, cf]
7891 * Size 8 - 11.
7893 emit_insn (gen_one_cmplsi2 (tmp, tmp));
7894 if (cf)
7895 emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (cf)));
7897 else
7900 * cmpl op0,op1
7901 * sbbl dest,dest
7902 * andl cf - ct, dest
7903 * [addl dest, ct]
7905 * Size 8 - 11.
7907 emit_insn (gen_andsi3 (tmp, tmp, GEN_INT (trunc_int_for_mode
7908 (cf - ct, SImode))));
7909 if (ct)
7910 emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (ct)));
7913 if (tmp != out)
7914 emit_move_insn (out, tmp);
7916 return 1; /* DONE */
7919 diff = ct - cf;
7920 if (diff < 0)
7922 HOST_WIDE_INT tmp;
7923 tmp = ct, ct = cf, cf = tmp;
7924 diff = -diff;
7925 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
7927 /* We may be reversing unordered compare to normal compare, that
7928 is not valid in general (we may convert non-trapping condition
7929 to trapping one), however on i386 we currently emit all
7930 comparisons unordered. */
7931 compare_code = reverse_condition_maybe_unordered (compare_code);
7932 code = reverse_condition_maybe_unordered (code);
7934 else
7936 compare_code = reverse_condition (compare_code);
7937 code = reverse_condition (code);
7940 if (diff == 1 || diff == 2 || diff == 4 || diff == 8
7941 || diff == 3 || diff == 5 || diff == 9)
7944 * xorl dest,dest
7945 * cmpl op1,op2
7946 * setcc dest
7947 * lea cf(dest*(ct-cf)),dest
7949 * Size 14.
7951 * This also catches the degenerate setcc-only case.
7954 rtx tmp;
7955 int nops;
7957 out = emit_store_flag (out, code, ix86_compare_op0,
7958 ix86_compare_op1, VOIDmode, 0, 1);
7960 nops = 0;
7961 /* On x86_64 the lea instruction operates on Pmode, so we need to get arithmetics
7962 done in proper mode to match. */
7963 if (diff == 1)
7965 if (Pmode != SImode)
7966 tmp = gen_lowpart (Pmode, out);
7967 else
7968 tmp = out;
7970 else
7972 rtx out1;
7973 if (Pmode != SImode)
7974 out1 = gen_lowpart (Pmode, out);
7975 else
7976 out1 = out;
7977 tmp = gen_rtx_MULT (Pmode, out1, GEN_INT (diff & ~1));
7978 nops++;
7979 if (diff & 1)
7981 tmp = gen_rtx_PLUS (Pmode, tmp, out1);
7982 nops++;
7985 if (cf != 0)
7987 tmp = gen_rtx_PLUS (Pmode, tmp, GEN_INT (cf));
7988 nops++;
7990 if (tmp != out
7991 && (GET_CODE (tmp) != SUBREG || SUBREG_REG (tmp) != out))
7993 if (Pmode != SImode)
7994 tmp = gen_rtx_SUBREG (SImode, tmp, 0);
7996 /* ??? We should to take care for outputing non-lea arithmetics
7997 for Pmode != SImode case too, but it is quite tricky and not
7998 too important, since all TARGET_64BIT machines support real
7999 conditional moves. */
8000 if (nops == 1 && Pmode == SImode)
8002 rtx clob;
8004 clob = gen_rtx_REG (CCmode, FLAGS_REG);
8005 clob = gen_rtx_CLOBBER (VOIDmode, clob);
8007 tmp = gen_rtx_SET (VOIDmode, out, tmp);
8008 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
8009 emit_insn (tmp);
8011 else
8012 emit_insn (gen_rtx_SET (VOIDmode, out, tmp));
8014 if (out != operands[0])
8015 emit_move_insn (operands[0], out);
8017 return 1; /* DONE */
8021 * General case: Jumpful:
8022 * xorl dest,dest cmpl op1, op2
8023 * cmpl op1, op2 movl ct, dest
8024 * setcc dest jcc 1f
8025 * decl dest movl cf, dest
8026 * andl (cf-ct),dest 1:
8027 * addl ct,dest
8029 * Size 20. Size 14.
8031 * This is reasonably steep, but branch mispredict costs are
8032 * high on modern cpus, so consider failing only if optimizing
8033 * for space.
8035 * %%% Parameterize branch_cost on the tuning architecture, then
8036 * use that. The 80386 couldn't care less about mispredicts.
8039 if (!optimize_size && !TARGET_CMOVE)
8041 if (ct == 0)
8043 ct = cf;
8044 cf = 0;
8045 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
8047 /* We may be reversing unordered compare to normal compare,
8048 that is not valid in general (we may convert non-trapping
8049 condition to trapping one), however on i386 we currently
8050 emit all comparisons unordered. */
8051 compare_code = reverse_condition_maybe_unordered (compare_code);
8052 code = reverse_condition_maybe_unordered (code);
8054 else
8056 compare_code = reverse_condition (compare_code);
8057 code = reverse_condition (code);
8061 out = emit_store_flag (out, code, ix86_compare_op0,
8062 ix86_compare_op1, VOIDmode, 0, 1);
8064 emit_insn (gen_addsi3 (out, out, constm1_rtx));
8065 emit_insn (gen_andsi3 (out, out, GEN_INT (trunc_int_for_mode
8066 (cf - ct, SImode))));
8067 if (ct != 0)
8068 emit_insn (gen_addsi3 (out, out, GEN_INT (ct)));
8069 if (out != operands[0])
8070 emit_move_insn (operands[0], out);
8072 return 1; /* DONE */
8076 if (!TARGET_CMOVE)
8078 /* Try a few things more with specific constants and a variable. */
8080 optab op;
8081 rtx var, orig_out, out, tmp;
8083 if (optimize_size)
8084 return 0; /* FAIL */
8086 /* If one of the two operands is an interesting constant, load a
8087 constant with the above and mask it in with a logical operation. */
8089 if (GET_CODE (operands[2]) == CONST_INT)
8091 var = operands[3];
8092 if (INTVAL (operands[2]) == 0)
8093 operands[3] = constm1_rtx, op = and_optab;
8094 else if (INTVAL (operands[2]) == -1)
8095 operands[3] = const0_rtx, op = ior_optab;
8096 else
8097 return 0; /* FAIL */
8099 else if (GET_CODE (operands[3]) == CONST_INT)
8101 var = operands[2];
8102 if (INTVAL (operands[3]) == 0)
8103 operands[2] = constm1_rtx, op = and_optab;
8104 else if (INTVAL (operands[3]) == -1)
8105 operands[2] = const0_rtx, op = ior_optab;
8106 else
8107 return 0; /* FAIL */
8109 else
8110 return 0; /* FAIL */
8112 orig_out = operands[0];
8113 tmp = gen_reg_rtx (GET_MODE (orig_out));
8114 operands[0] = tmp;
8116 /* Recurse to get the constant loaded. */
8117 if (ix86_expand_int_movcc (operands) == 0)
8118 return 0; /* FAIL */
8120 /* Mask in the interesting variable. */
8121 out = expand_binop (GET_MODE (orig_out), op, var, tmp, orig_out, 0,
8122 OPTAB_WIDEN);
8123 if (out != orig_out)
8124 emit_move_insn (orig_out, out);
8126 return 1; /* DONE */
8130 * For comparison with above,
8132 * movl cf,dest
8133 * movl ct,tmp
8134 * cmpl op1,op2
8135 * cmovcc tmp,dest
8137 * Size 15.
8140 if (! nonimmediate_operand (operands[2], GET_MODE (operands[0])))
8141 operands[2] = force_reg (GET_MODE (operands[0]), operands[2]);
8142 if (! nonimmediate_operand (operands[3], GET_MODE (operands[0])))
8143 operands[3] = force_reg (GET_MODE (operands[0]), operands[3]);
8145 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
8147 rtx tmp = gen_reg_rtx (GET_MODE (operands[0]));
8148 emit_move_insn (tmp, operands[3]);
8149 operands[3] = tmp;
8151 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
8153 rtx tmp = gen_reg_rtx (GET_MODE (operands[0]));
8154 emit_move_insn (tmp, operands[2]);
8155 operands[2] = tmp;
8157 if (! register_operand (operands[2], VOIDmode)
8158 && ! register_operand (operands[3], VOIDmode))
8159 operands[2] = force_reg (GET_MODE (operands[0]), operands[2]);
8161 emit_insn (compare_seq);
8162 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
8163 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
8164 compare_op, operands[2],
8165 operands[3])));
8166 if (bypass_test)
8167 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
8168 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
8169 bypass_test,
8170 operands[3],
8171 operands[0])));
8172 if (second_test)
8173 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
8174 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
8175 second_test,
8176 operands[2],
8177 operands[0])));
8179 return 1; /* DONE */
8183 ix86_expand_fp_movcc (operands)
8184 rtx operands[];
8186 enum rtx_code code;
8187 rtx tmp;
8188 rtx compare_op, second_test, bypass_test;
8190 /* For SF/DFmode conditional moves based on comparisons
8191 in same mode, we may want to use SSE min/max instructions. */
8192 if (((TARGET_SSE && GET_MODE (operands[0]) == SFmode)
8193 || (TARGET_SSE2 && GET_MODE (operands[0]) == DFmode))
8194 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
8195 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
8196 && (!TARGET_IEEE_FP
8197 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
8198 /* We may be called from the post-reload splitter. */
8199 && (!REG_P (operands[0])
8200 || SSE_REG_P (operands[0])
8201 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
8203 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
8204 code = GET_CODE (operands[1]);
8206 /* See if we have (cross) match between comparison operands and
8207 conditional move operands. */
8208 if (rtx_equal_p (operands[2], op1))
8210 rtx tmp = op0;
8211 op0 = op1;
8212 op1 = tmp;
8213 code = reverse_condition_maybe_unordered (code);
8215 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
8217 /* Check for min operation. */
8218 if (code == LT)
8220 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
8221 if (memory_operand (op0, VOIDmode))
8222 op0 = force_reg (GET_MODE (operands[0]), op0);
8223 if (GET_MODE (operands[0]) == SFmode)
8224 emit_insn (gen_minsf3 (operands[0], op0, op1));
8225 else
8226 emit_insn (gen_mindf3 (operands[0], op0, op1));
8227 return 1;
8229 /* Check for max operation. */
8230 if (code == GT)
8232 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
8233 if (memory_operand (op0, VOIDmode))
8234 op0 = force_reg (GET_MODE (operands[0]), op0);
8235 if (GET_MODE (operands[0]) == SFmode)
8236 emit_insn (gen_maxsf3 (operands[0], op0, op1));
8237 else
8238 emit_insn (gen_maxdf3 (operands[0], op0, op1));
8239 return 1;
8242 /* Manage condition to be sse_comparison_operator. In case we are
8243 in non-ieee mode, try to canonicalize the destination operand
8244 to be first in the comparison - this helps reload to avoid extra
8245 moves. */
8246 if (!sse_comparison_operator (operands[1], VOIDmode)
8247 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
8249 rtx tmp = ix86_compare_op0;
8250 ix86_compare_op0 = ix86_compare_op1;
8251 ix86_compare_op1 = tmp;
8252 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
8253 VOIDmode, ix86_compare_op0,
8254 ix86_compare_op1);
8256 /* Similary try to manage result to be first operand of conditional
8257 move. We also don't support the NE comparison on SSE, so try to
8258 avoid it. */
8259 if ((rtx_equal_p (operands[0], operands[3])
8260 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
8261 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
8263 rtx tmp = operands[2];
8264 operands[2] = operands[3];
8265 operands[3] = tmp;
8266 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
8267 (GET_CODE (operands[1])),
8268 VOIDmode, ix86_compare_op0,
8269 ix86_compare_op1);
8271 if (GET_MODE (operands[0]) == SFmode)
8272 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
8273 operands[2], operands[3],
8274 ix86_compare_op0, ix86_compare_op1));
8275 else
8276 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
8277 operands[2], operands[3],
8278 ix86_compare_op0, ix86_compare_op1));
8279 return 1;
8282 /* The floating point conditional move instructions don't directly
8283 support conditions resulting from a signed integer comparison. */
8285 code = GET_CODE (operands[1]);
8286 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
8288 /* The floating point conditional move instructions don't directly
8289 support signed integer comparisons. */
8291 if (!fcmov_comparison_operator (compare_op, VOIDmode))
8293 if (second_test != NULL || bypass_test != NULL)
8294 abort();
8295 tmp = gen_reg_rtx (QImode);
8296 ix86_expand_setcc (code, tmp);
8297 code = NE;
8298 ix86_compare_op0 = tmp;
8299 ix86_compare_op1 = const0_rtx;
8300 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
8302 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
8304 tmp = gen_reg_rtx (GET_MODE (operands[0]));
8305 emit_move_insn (tmp, operands[3]);
8306 operands[3] = tmp;
8308 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
8310 tmp = gen_reg_rtx (GET_MODE (operands[0]));
8311 emit_move_insn (tmp, operands[2]);
8312 operands[2] = tmp;
8315 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
8316 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
8317 compare_op,
8318 operands[2],
8319 operands[3])));
8320 if (bypass_test)
8321 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
8322 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
8323 bypass_test,
8324 operands[3],
8325 operands[0])));
8326 if (second_test)
8327 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
8328 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
8329 second_test,
8330 operands[2],
8331 operands[0])));
8333 return 1;
8336 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
8337 works for floating pointer parameters and nonoffsetable memories.
8338 For pushes, it returns just stack offsets; the values will be saved
8339 in the right order. Maximally three parts are generated. */
8341 static int
8342 ix86_split_to_parts (operand, parts, mode)
8343 rtx operand;
8344 rtx *parts;
8345 enum machine_mode mode;
8347 int size;
8349 if (!TARGET_64BIT)
8350 size = mode == TFmode ? 3 : (GET_MODE_SIZE (mode) / 4);
8351 else
8352 size = (GET_MODE_SIZE (mode) + 4) / 8;
8354 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
8355 abort ();
8356 if (size < 2 || size > 3)
8357 abort ();
8359 /* Optimize constant pool reference to immediates. This is used by fp moves,
8360 that force all constants to memory to allow combining. */
8362 if (GET_CODE (operand) == MEM
8363 && GET_CODE (XEXP (operand, 0)) == SYMBOL_REF
8364 && CONSTANT_POOL_ADDRESS_P (XEXP (operand, 0)))
8365 operand = get_pool_constant (XEXP (operand, 0));
8367 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
8369 /* The only non-offsetable memories we handle are pushes. */
8370 if (! push_operand (operand, VOIDmode))
8371 abort ();
8373 operand = copy_rtx (operand);
8374 PUT_MODE (operand, Pmode);
8375 parts[0] = parts[1] = parts[2] = operand;
8377 else if (!TARGET_64BIT)
8379 if (mode == DImode)
8380 split_di (&operand, 1, &parts[0], &parts[1]);
8381 else
8383 if (REG_P (operand))
8385 if (!reload_completed)
8386 abort ();
8387 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
8388 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
8389 if (size == 3)
8390 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
8392 else if (offsettable_memref_p (operand))
8394 operand = adjust_address (operand, SImode, 0);
8395 parts[0] = operand;
8396 parts[1] = adjust_address (operand, SImode, 4);
8397 if (size == 3)
8398 parts[2] = adjust_address (operand, SImode, 8);
8400 else if (GET_CODE (operand) == CONST_DOUBLE)
8402 REAL_VALUE_TYPE r;
8403 long l[4];
8405 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
8406 switch (mode)
8408 case XFmode:
8409 case TFmode:
8410 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
8411 parts[2] = GEN_INT (l[2]);
8412 break;
8413 case DFmode:
8414 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
8415 break;
8416 default:
8417 abort ();
8419 parts[1] = GEN_INT (l[1]);
8420 parts[0] = GEN_INT (l[0]);
8422 else
8423 abort ();
8426 else
8428 if (mode == XFmode || mode == TFmode)
8430 if (REG_P (operand))
8432 if (!reload_completed)
8433 abort ();
8434 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
8435 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
8437 else if (offsettable_memref_p (operand))
8439 operand = adjust_address (operand, DImode, 0);
8440 parts[0] = operand;
8441 parts[1] = adjust_address (operand, SImode, 8);
8443 else if (GET_CODE (operand) == CONST_DOUBLE)
8445 REAL_VALUE_TYPE r;
8446 long l[3];
8448 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
8449 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
8450 /* Do not use shift by 32 to avoid warning on 32bit systems. */
8451 if (HOST_BITS_PER_WIDE_INT >= 64)
8452 parts[0] = GEN_INT (l[0] + ((l[1] << 31) << 1));
8453 else
8454 parts[0] = immed_double_const (l[0], l[1], DImode);
8455 parts[1] = GEN_INT (l[2]);
8457 else
8458 abort ();
8462 return size;
8465 /* Emit insns to perform a move or push of DI, DF, and XF values.
8466 Return false when normal moves are needed; true when all required
8467 insns have been emitted. Operands 2-4 contain the input values
8468 int the correct order; operands 5-7 contain the output values. */
8470 void
8471 ix86_split_long_move (operands)
8472 rtx operands[];
8474 rtx part[2][3];
8475 int nparts;
8476 int push = 0;
8477 int collisions = 0;
8478 enum machine_mode mode = GET_MODE (operands[0]);
8480 /* The DFmode expanders may ask us to move double.
8481 For 64bit target this is single move. By hiding the fact
8482 here we simplify i386.md splitters. */
8483 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
8485 /* Optimize constant pool reference to immediates. This is used by fp moves,
8486 that force all constants to memory to allow combining. */
8488 if (GET_CODE (operands[1]) == MEM
8489 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
8490 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
8491 operands[1] = get_pool_constant (XEXP (operands[1], 0));
8492 if (push_operand (operands[0], VOIDmode))
8494 operands[0] = copy_rtx (operands[0]);
8495 PUT_MODE (operands[0], Pmode);
8497 else
8498 operands[0] = gen_lowpart (DImode, operands[0]);
8499 operands[1] = gen_lowpart (DImode, operands[1]);
8500 emit_move_insn (operands[0], operands[1]);
8501 return;
8504 /* The only non-offsettable memory we handle is push. */
8505 if (push_operand (operands[0], VOIDmode))
8506 push = 1;
8507 else if (GET_CODE (operands[0]) == MEM
8508 && ! offsettable_memref_p (operands[0]))
8509 abort ();
8511 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
8512 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
8514 /* When emitting push, take care for source operands on the stack. */
8515 if (push && GET_CODE (operands[1]) == MEM
8516 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
8518 if (nparts == 3)
8519 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
8520 XEXP (part[1][2], 0));
8521 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
8522 XEXP (part[1][1], 0));
8525 /* We need to do copy in the right order in case an address register
8526 of the source overlaps the destination. */
8527 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
8529 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
8530 collisions++;
8531 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
8532 collisions++;
8533 if (nparts == 3
8534 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
8535 collisions++;
8537 /* Collision in the middle part can be handled by reordering. */
8538 if (collisions == 1 && nparts == 3
8539 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
8541 rtx tmp;
8542 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
8543 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
8546 /* If there are more collisions, we can't handle it by reordering.
8547 Do an lea to the last part and use only one colliding move. */
8548 else if (collisions > 1)
8550 collisions = 1;
8551 emit_insn (gen_rtx_SET (VOIDmode, part[0][nparts - 1],
8552 XEXP (part[1][0], 0)));
8553 part[1][0] = change_address (part[1][0],
8554 TARGET_64BIT ? DImode : SImode,
8555 part[0][nparts - 1]);
8556 part[1][1] = adjust_address (part[1][0], VOIDmode, UNITS_PER_WORD);
8557 if (nparts == 3)
8558 part[1][2] = adjust_address (part[1][0], VOIDmode, 8);
8562 if (push)
8564 if (!TARGET_64BIT)
8566 if (nparts == 3)
8568 /* We use only first 12 bytes of TFmode value, but for pushing we
8569 are required to adjust stack as if we were pushing real 16byte
8570 value. */
8571 if (mode == TFmode && !TARGET_64BIT)
8572 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
8573 GEN_INT (-4)));
8574 emit_move_insn (part[0][2], part[1][2]);
8577 else
8579 /* In 64bit mode we don't have 32bit push available. In case this is
8580 register, it is OK - we will just use larger counterpart. We also
8581 retype memory - these comes from attempt to avoid REX prefix on
8582 moving of second half of TFmode value. */
8583 if (GET_MODE (part[1][1]) == SImode)
8585 if (GET_CODE (part[1][1]) == MEM)
8586 part[1][1] = adjust_address (part[1][1], DImode, 0);
8587 else if (REG_P (part[1][1]))
8588 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
8589 else
8590 abort();
8591 if (GET_MODE (part[1][0]) == SImode)
8592 part[1][0] = part[1][1];
8595 emit_move_insn (part[0][1], part[1][1]);
8596 emit_move_insn (part[0][0], part[1][0]);
8597 return;
8600 /* Choose correct order to not overwrite the source before it is copied. */
8601 if ((REG_P (part[0][0])
8602 && REG_P (part[1][1])
8603 && (REGNO (part[0][0]) == REGNO (part[1][1])
8604 || (nparts == 3
8605 && REGNO (part[0][0]) == REGNO (part[1][2]))))
8606 || (collisions > 0
8607 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
8609 if (nparts == 3)
8611 operands[2] = part[0][2];
8612 operands[3] = part[0][1];
8613 operands[4] = part[0][0];
8614 operands[5] = part[1][2];
8615 operands[6] = part[1][1];
8616 operands[7] = part[1][0];
8618 else
8620 operands[2] = part[0][1];
8621 operands[3] = part[0][0];
8622 operands[5] = part[1][1];
8623 operands[6] = part[1][0];
8626 else
8628 if (nparts == 3)
8630 operands[2] = part[0][0];
8631 operands[3] = part[0][1];
8632 operands[4] = part[0][2];
8633 operands[5] = part[1][0];
8634 operands[6] = part[1][1];
8635 operands[7] = part[1][2];
8637 else
8639 operands[2] = part[0][0];
8640 operands[3] = part[0][1];
8641 operands[5] = part[1][0];
8642 operands[6] = part[1][1];
8645 emit_move_insn (operands[2], operands[5]);
8646 emit_move_insn (operands[3], operands[6]);
8647 if (nparts == 3)
8648 emit_move_insn (operands[4], operands[7]);
8650 return;
8653 void
8654 ix86_split_ashldi (operands, scratch)
8655 rtx *operands, scratch;
8657 rtx low[2], high[2];
8658 int count;
8660 if (GET_CODE (operands[2]) == CONST_INT)
8662 split_di (operands, 2, low, high);
8663 count = INTVAL (operands[2]) & 63;
8665 if (count >= 32)
8667 emit_move_insn (high[0], low[1]);
8668 emit_move_insn (low[0], const0_rtx);
8670 if (count > 32)
8671 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
8673 else
8675 if (!rtx_equal_p (operands[0], operands[1]))
8676 emit_move_insn (operands[0], operands[1]);
8677 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
8678 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
8681 else
8683 if (!rtx_equal_p (operands[0], operands[1]))
8684 emit_move_insn (operands[0], operands[1]);
8686 split_di (operands, 1, low, high);
8688 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
8689 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
8691 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
8693 if (! no_new_pseudos)
8694 scratch = force_reg (SImode, const0_rtx);
8695 else
8696 emit_move_insn (scratch, const0_rtx);
8698 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
8699 scratch));
8701 else
8702 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
8706 void
8707 ix86_split_ashrdi (operands, scratch)
8708 rtx *operands, scratch;
8710 rtx low[2], high[2];
8711 int count;
8713 if (GET_CODE (operands[2]) == CONST_INT)
8715 split_di (operands, 2, low, high);
8716 count = INTVAL (operands[2]) & 63;
8718 if (count >= 32)
8720 emit_move_insn (low[0], high[1]);
8722 if (! reload_completed)
8723 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
8724 else
8726 emit_move_insn (high[0], low[0]);
8727 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
8730 if (count > 32)
8731 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
8733 else
8735 if (!rtx_equal_p (operands[0], operands[1]))
8736 emit_move_insn (operands[0], operands[1]);
8737 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
8738 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
8741 else
8743 if (!rtx_equal_p (operands[0], operands[1]))
8744 emit_move_insn (operands[0], operands[1]);
8746 split_di (operands, 1, low, high);
8748 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
8749 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
8751 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
8753 if (! no_new_pseudos)
8754 scratch = gen_reg_rtx (SImode);
8755 emit_move_insn (scratch, high[0]);
8756 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
8757 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
8758 scratch));
8760 else
8761 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
8765 void
8766 ix86_split_lshrdi (operands, scratch)
8767 rtx *operands, scratch;
8769 rtx low[2], high[2];
8770 int count;
8772 if (GET_CODE (operands[2]) == CONST_INT)
8774 split_di (operands, 2, low, high);
8775 count = INTVAL (operands[2]) & 63;
8777 if (count >= 32)
8779 emit_move_insn (low[0], high[1]);
8780 emit_move_insn (high[0], const0_rtx);
8782 if (count > 32)
8783 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
8785 else
8787 if (!rtx_equal_p (operands[0], operands[1]))
8788 emit_move_insn (operands[0], operands[1]);
8789 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
8790 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
8793 else
8795 if (!rtx_equal_p (operands[0], operands[1]))
8796 emit_move_insn (operands[0], operands[1]);
8798 split_di (operands, 1, low, high);
8800 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
8801 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
8803 /* Heh. By reversing the arguments, we can reuse this pattern. */
8804 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
8806 if (! no_new_pseudos)
8807 scratch = force_reg (SImode, const0_rtx);
8808 else
8809 emit_move_insn (scratch, const0_rtx);
8811 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
8812 scratch));
8814 else
8815 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
8819 /* Helper function for the string operations below. Dest VARIABLE whether
8820 it is aligned to VALUE bytes. If true, jump to the label. */
8821 static rtx
8822 ix86_expand_aligntest (variable, value)
8823 rtx variable;
8824 int value;
8826 rtx label = gen_label_rtx ();
8827 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
8828 if (GET_MODE (variable) == DImode)
8829 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
8830 else
8831 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
8832 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
8833 1, 0, label);
8834 return label;
8837 /* Adjust COUNTER by the VALUE. */
8838 static void
8839 ix86_adjust_counter (countreg, value)
8840 rtx countreg;
8841 HOST_WIDE_INT value;
8843 if (GET_MODE (countreg) == DImode)
8844 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
8845 else
8846 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
8849 /* Zero extend possibly SImode EXP to Pmode register. */
8851 ix86_zero_extend_to_Pmode (exp)
8852 rtx exp;
8854 rtx r;
8855 if (GET_MODE (exp) == VOIDmode)
8856 return force_reg (Pmode, exp);
8857 if (GET_MODE (exp) == Pmode)
8858 return copy_to_mode_reg (Pmode, exp);
8859 r = gen_reg_rtx (Pmode);
8860 emit_insn (gen_zero_extendsidi2 (r, exp));
8861 return r;
8864 /* Expand string move (memcpy) operation. Use i386 string operations when
8865 profitable. expand_clrstr contains similar code. */
8867 ix86_expand_movstr (dst, src, count_exp, align_exp)
8868 rtx dst, src, count_exp, align_exp;
8870 rtx srcreg, destreg, countreg;
8871 enum machine_mode counter_mode;
8872 HOST_WIDE_INT align = 0;
8873 unsigned HOST_WIDE_INT count = 0;
8874 rtx insns;
8876 start_sequence ();
8878 if (GET_CODE (align_exp) == CONST_INT)
8879 align = INTVAL (align_exp);
8881 /* This simple hack avoids all inlining code and simplifies code bellow. */
8882 if (!TARGET_ALIGN_STRINGOPS)
8883 align = 64;
8885 if (GET_CODE (count_exp) == CONST_INT)
8886 count = INTVAL (count_exp);
8888 /* Figure out proper mode for counter. For 32bits it is always SImode,
8889 for 64bits use SImode when possible, otherwise DImode.
8890 Set count to number of bytes copied when known at compile time. */
8891 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
8892 || x86_64_zero_extended_value (count_exp))
8893 counter_mode = SImode;
8894 else
8895 counter_mode = DImode;
8897 if (counter_mode != SImode && counter_mode != DImode)
8898 abort ();
8900 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
8901 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
8903 emit_insn (gen_cld ());
8905 /* When optimizing for size emit simple rep ; movsb instruction for
8906 counts not divisible by 4. */
8908 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
8910 countreg = ix86_zero_extend_to_Pmode (count_exp);
8911 if (TARGET_64BIT)
8912 emit_insn (gen_rep_movqi_rex64 (destreg, srcreg, countreg,
8913 destreg, srcreg, countreg));
8914 else
8915 emit_insn (gen_rep_movqi (destreg, srcreg, countreg,
8916 destreg, srcreg, countreg));
8919 /* For constant aligned (or small unaligned) copies use rep movsl
8920 followed by code copying the rest. For PentiumPro ensure 8 byte
8921 alignment to allow rep movsl acceleration. */
8923 else if (count != 0
8924 && (align >= 8
8925 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
8926 || optimize_size || count < (unsigned int)64))
8928 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
8929 if (count & ~(size - 1))
8931 countreg = copy_to_mode_reg (counter_mode,
8932 GEN_INT ((count >> (size == 4 ? 2 : 3))
8933 & (TARGET_64BIT ? -1 : 0x3fffffff)));
8934 countreg = ix86_zero_extend_to_Pmode (countreg);
8935 if (size == 4)
8937 if (TARGET_64BIT)
8938 emit_insn (gen_rep_movsi_rex64 (destreg, srcreg, countreg,
8939 destreg, srcreg, countreg));
8940 else
8941 emit_insn (gen_rep_movsi (destreg, srcreg, countreg,
8942 destreg, srcreg, countreg));
8944 else
8945 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg,
8946 destreg, srcreg, countreg));
8948 if (size == 8 && (count & 0x04))
8949 emit_insn (gen_strmovsi (destreg, srcreg));
8950 if (count & 0x02)
8951 emit_insn (gen_strmovhi (destreg, srcreg));
8952 if (count & 0x01)
8953 emit_insn (gen_strmovqi (destreg, srcreg));
8955 /* The generic code based on the glibc implementation:
8956 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
8957 allowing accelerated copying there)
8958 - copy the data using rep movsl
8959 - copy the rest. */
8960 else
8962 rtx countreg2;
8963 rtx label = NULL;
8965 /* In case we don't know anything about the alignment, default to
8966 library version, since it is usually equally fast and result in
8967 shorter code. */
8968 if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD)
8970 end_sequence ();
8971 return 0;
8974 if (TARGET_SINGLE_STRINGOP)
8975 emit_insn (gen_cld ());
8977 countreg2 = gen_reg_rtx (Pmode);
8978 countreg = copy_to_mode_reg (counter_mode, count_exp);
8980 /* We don't use loops to align destination and to copy parts smaller
8981 than 4 bytes, because gcc is able to optimize such code better (in
8982 the case the destination or the count really is aligned, gcc is often
8983 able to predict the branches) and also it is friendlier to the
8984 hardware branch prediction.
8986 Using loops is benefical for generic case, because we can
8987 handle small counts using the loops. Many CPUs (such as Athlon)
8988 have large REP prefix setup costs.
8990 This is quite costy. Maybe we can revisit this decision later or
8991 add some customizability to this code. */
8993 if (count == 0
8994 && align < (TARGET_PENTIUMPRO && (count == 0
8995 || count >= (unsigned int)260)
8996 ? 8 : UNITS_PER_WORD))
8998 label = gen_label_rtx ();
8999 emit_cmp_and_jump_insns (countreg, GEN_INT (UNITS_PER_WORD - 1),
9000 LEU, 0, counter_mode, 1, 0, label);
9002 if (align <= 1)
9004 rtx label = ix86_expand_aligntest (destreg, 1);
9005 emit_insn (gen_strmovqi (destreg, srcreg));
9006 ix86_adjust_counter (countreg, 1);
9007 emit_label (label);
9008 LABEL_NUSES (label) = 1;
9010 if (align <= 2)
9012 rtx label = ix86_expand_aligntest (destreg, 2);
9013 emit_insn (gen_strmovhi (destreg, srcreg));
9014 ix86_adjust_counter (countreg, 2);
9015 emit_label (label);
9016 LABEL_NUSES (label) = 1;
9018 if (align <= 4
9019 && ((TARGET_PENTIUMPRO && (count == 0
9020 || count >= (unsigned int)260))
9021 || TARGET_64BIT))
9023 rtx label = ix86_expand_aligntest (destreg, 4);
9024 emit_insn (gen_strmovsi (destreg, srcreg));
9025 ix86_adjust_counter (countreg, 4);
9026 emit_label (label);
9027 LABEL_NUSES (label) = 1;
9030 if (!TARGET_SINGLE_STRINGOP)
9031 emit_insn (gen_cld ());
9032 if (TARGET_64BIT)
9034 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
9035 GEN_INT (3)));
9036 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg2,
9037 destreg, srcreg, countreg2));
9039 else
9041 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
9042 emit_insn (gen_rep_movsi (destreg, srcreg, countreg2,
9043 destreg, srcreg, countreg2));
9046 if (label)
9048 emit_label (label);
9049 LABEL_NUSES (label) = 1;
9051 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
9052 emit_insn (gen_strmovsi (destreg, srcreg));
9053 if ((align <= 4 || count == 0) && TARGET_64BIT)
9055 rtx label = ix86_expand_aligntest (countreg, 4);
9056 emit_insn (gen_strmovsi (destreg, srcreg));
9057 emit_label (label);
9058 LABEL_NUSES (label) = 1;
9060 if (align > 2 && count != 0 && (count & 2))
9061 emit_insn (gen_strmovhi (destreg, srcreg));
9062 if (align <= 2 || count == 0)
9064 rtx label = ix86_expand_aligntest (countreg, 2);
9065 emit_insn (gen_strmovhi (destreg, srcreg));
9066 emit_label (label);
9067 LABEL_NUSES (label) = 1;
9069 if (align > 1 && count != 0 && (count & 1))
9070 emit_insn (gen_strmovqi (destreg, srcreg));
9071 if (align <= 1 || count == 0)
9073 rtx label = ix86_expand_aligntest (countreg, 1);
9074 emit_insn (gen_strmovqi (destreg, srcreg));
9075 emit_label (label);
9076 LABEL_NUSES (label) = 1;
9080 insns = get_insns ();
9081 end_sequence ();
9083 ix86_set_move_mem_attrs (insns, dst, src, destreg, srcreg);
9084 emit_insns (insns);
9085 return 1;
9088 /* Expand string clear operation (bzero). Use i386 string operations when
9089 profitable. expand_movstr contains similar code. */
9091 ix86_expand_clrstr (src, count_exp, align_exp)
9092 rtx src, count_exp, align_exp;
9094 rtx destreg, zeroreg, countreg;
9095 enum machine_mode counter_mode;
9096 HOST_WIDE_INT align = 0;
9097 unsigned HOST_WIDE_INT count = 0;
9099 if (GET_CODE (align_exp) == CONST_INT)
9100 align = INTVAL (align_exp);
9102 /* This simple hack avoids all inlining code and simplifies code bellow. */
9103 if (!TARGET_ALIGN_STRINGOPS)
9104 align = 32;
9106 if (GET_CODE (count_exp) == CONST_INT)
9107 count = INTVAL (count_exp);
9108 /* Figure out proper mode for counter. For 32bits it is always SImode,
9109 for 64bits use SImode when possible, otherwise DImode.
9110 Set count to number of bytes copied when known at compile time. */
9111 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
9112 || x86_64_zero_extended_value (count_exp))
9113 counter_mode = SImode;
9114 else
9115 counter_mode = DImode;
9117 destreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
9119 emit_insn (gen_cld ());
9121 /* When optimizing for size emit simple rep ; movsb instruction for
9122 counts not divisible by 4. */
9124 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
9126 countreg = ix86_zero_extend_to_Pmode (count_exp);
9127 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
9128 if (TARGET_64BIT)
9129 emit_insn (gen_rep_stosqi_rex64 (destreg, countreg, zeroreg,
9130 destreg, countreg));
9131 else
9132 emit_insn (gen_rep_stosqi (destreg, countreg, zeroreg,
9133 destreg, countreg));
9135 else if (count != 0
9136 && (align >= 8
9137 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
9138 || optimize_size || count < (unsigned int)64))
9140 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
9141 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
9142 if (count & ~(size - 1))
9144 countreg = copy_to_mode_reg (counter_mode,
9145 GEN_INT ((count >> (size == 4 ? 2 : 3))
9146 & (TARGET_64BIT ? -1 : 0x3fffffff)));
9147 countreg = ix86_zero_extend_to_Pmode (countreg);
9148 if (size == 4)
9150 if (TARGET_64BIT)
9151 emit_insn (gen_rep_stossi_rex64 (destreg, countreg, zeroreg,
9152 destreg, countreg));
9153 else
9154 emit_insn (gen_rep_stossi (destreg, countreg, zeroreg,
9155 destreg, countreg));
9157 else
9158 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg, zeroreg,
9159 destreg, countreg));
9161 if (size == 8 && (count & 0x04))
9162 emit_insn (gen_strsetsi (destreg,
9163 gen_rtx_SUBREG (SImode, zeroreg, 0)));
9164 if (count & 0x02)
9165 emit_insn (gen_strsethi (destreg,
9166 gen_rtx_SUBREG (HImode, zeroreg, 0)));
9167 if (count & 0x01)
9168 emit_insn (gen_strsetqi (destreg,
9169 gen_rtx_SUBREG (QImode, zeroreg, 0)));
9171 else
9173 rtx countreg2;
9174 rtx label = NULL;
9176 /* In case we don't know anything about the alignment, default to
9177 library version, since it is usually equally fast and result in
9178 shorter code. */
9179 if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD)
9180 return 0;
9182 if (TARGET_SINGLE_STRINGOP)
9183 emit_insn (gen_cld ());
9185 countreg2 = gen_reg_rtx (Pmode);
9186 countreg = copy_to_mode_reg (counter_mode, count_exp);
9187 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
9189 if (count == 0
9190 && align < (TARGET_PENTIUMPRO && (count == 0
9191 || count >= (unsigned int)260)
9192 ? 8 : UNITS_PER_WORD))
9194 label = gen_label_rtx ();
9195 emit_cmp_and_jump_insns (countreg, GEN_INT (UNITS_PER_WORD - 1),
9196 LEU, 0, counter_mode, 1, 0, label);
9198 if (align <= 1)
9200 rtx label = ix86_expand_aligntest (destreg, 1);
9201 emit_insn (gen_strsetqi (destreg,
9202 gen_rtx_SUBREG (QImode, zeroreg, 0)));
9203 ix86_adjust_counter (countreg, 1);
9204 emit_label (label);
9205 LABEL_NUSES (label) = 1;
9207 if (align <= 2)
9209 rtx label = ix86_expand_aligntest (destreg, 2);
9210 emit_insn (gen_strsethi (destreg,
9211 gen_rtx_SUBREG (HImode, zeroreg, 0)));
9212 ix86_adjust_counter (countreg, 2);
9213 emit_label (label);
9214 LABEL_NUSES (label) = 1;
9216 if (align <= 4 && TARGET_PENTIUMPRO && (count == 0
9217 || count >= (unsigned int)260))
9219 rtx label = ix86_expand_aligntest (destreg, 4);
9220 emit_insn (gen_strsetsi (destreg, (TARGET_64BIT
9221 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
9222 : zeroreg)));
9223 ix86_adjust_counter (countreg, 4);
9224 emit_label (label);
9225 LABEL_NUSES (label) = 1;
9228 if (!TARGET_SINGLE_STRINGOP)
9229 emit_insn (gen_cld ());
9230 if (TARGET_64BIT)
9232 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
9233 GEN_INT (3)));
9234 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg2, zeroreg,
9235 destreg, countreg2));
9237 else
9239 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
9240 emit_insn (gen_rep_stossi (destreg, countreg2, zeroreg,
9241 destreg, countreg2));
9244 if (label)
9246 emit_label (label);
9247 LABEL_NUSES (label) = 1;
9249 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
9250 emit_insn (gen_strsetsi (destreg,
9251 gen_rtx_SUBREG (SImode, zeroreg, 0)));
9252 if (TARGET_64BIT && (align <= 4 || count == 0))
9254 rtx label = ix86_expand_aligntest (destreg, 2);
9255 emit_insn (gen_strsetsi (destreg,
9256 gen_rtx_SUBREG (SImode, zeroreg, 0)));
9257 emit_label (label);
9258 LABEL_NUSES (label) = 1;
9260 if (align > 2 && count != 0 && (count & 2))
9261 emit_insn (gen_strsethi (destreg,
9262 gen_rtx_SUBREG (HImode, zeroreg, 0)));
9263 if (align <= 2 || count == 0)
9265 rtx label = ix86_expand_aligntest (destreg, 2);
9266 emit_insn (gen_strsethi (destreg,
9267 gen_rtx_SUBREG (HImode, zeroreg, 0)));
9268 emit_label (label);
9269 LABEL_NUSES (label) = 1;
9271 if (align > 1 && count != 0 && (count & 1))
9272 emit_insn (gen_strsetqi (destreg,
9273 gen_rtx_SUBREG (QImode, zeroreg, 0)));
9274 if (align <= 1 || count == 0)
9276 rtx label = ix86_expand_aligntest (destreg, 1);
9277 emit_insn (gen_strsetqi (destreg,
9278 gen_rtx_SUBREG (QImode, zeroreg, 0)));
9279 emit_label (label);
9280 LABEL_NUSES (label) = 1;
9283 return 1;
9285 /* Expand strlen. */
9287 ix86_expand_strlen (out, src, eoschar, align)
9288 rtx out, src, eoschar, align;
9290 rtx addr, scratch1, scratch2, scratch3, scratch4;
9292 /* The generic case of strlen expander is long. Avoid it's
9293 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
9295 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
9296 && !TARGET_INLINE_ALL_STRINGOPS
9297 && !optimize_size
9298 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
9299 return 0;
9301 addr = force_reg (Pmode, XEXP (src, 0));
9302 scratch1 = gen_reg_rtx (Pmode);
9304 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
9305 && !optimize_size)
9307 /* Well it seems that some optimizer does not combine a call like
9308 foo(strlen(bar), strlen(bar));
9309 when the move and the subtraction is done here. It does calculate
9310 the length just once when these instructions are done inside of
9311 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
9312 often used and I use one fewer register for the lifetime of
9313 output_strlen_unroll() this is better. */
9315 emit_move_insn (out, addr);
9317 ix86_expand_strlensi_unroll_1 (out, align);
9319 /* strlensi_unroll_1 returns the address of the zero at the end of
9320 the string, like memchr(), so compute the length by subtracting
9321 the start address. */
9322 if (TARGET_64BIT)
9323 emit_insn (gen_subdi3 (out, out, addr));
9324 else
9325 emit_insn (gen_subsi3 (out, out, addr));
9327 else
9329 scratch2 = gen_reg_rtx (Pmode);
9330 scratch3 = gen_reg_rtx (Pmode);
9331 scratch4 = force_reg (Pmode, constm1_rtx);
9333 emit_move_insn (scratch3, addr);
9334 eoschar = force_reg (QImode, eoschar);
9336 emit_insn (gen_cld ());
9337 if (TARGET_64BIT)
9339 emit_insn (gen_strlenqi_rex_1 (scratch1, scratch3, eoschar,
9340 align, scratch4, scratch3));
9341 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
9342 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
9344 else
9346 emit_insn (gen_strlenqi_1 (scratch1, scratch3, eoschar,
9347 align, scratch4, scratch3));
9348 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
9349 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
9352 return 1;
9355 /* Expand the appropriate insns for doing strlen if not just doing
9356 repnz; scasb
9358 out = result, initialized with the start address
9359 align_rtx = alignment of the address.
9360 scratch = scratch register, initialized with the startaddress when
9361 not aligned, otherwise undefined
9363 This is just the body. It needs the initialisations mentioned above and
9364 some address computing at the end. These things are done in i386.md. */
9366 static void
9367 ix86_expand_strlensi_unroll_1 (out, align_rtx)
9368 rtx out, align_rtx;
9370 int align;
9371 rtx tmp;
9372 rtx align_2_label = NULL_RTX;
9373 rtx align_3_label = NULL_RTX;
9374 rtx align_4_label = gen_label_rtx ();
9375 rtx end_0_label = gen_label_rtx ();
9376 rtx mem;
9377 rtx tmpreg = gen_reg_rtx (SImode);
9378 rtx scratch = gen_reg_rtx (SImode);
9380 align = 0;
9381 if (GET_CODE (align_rtx) == CONST_INT)
9382 align = INTVAL (align_rtx);
9384 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
9386 /* Is there a known alignment and is it less than 4? */
9387 if (align < 4)
9389 rtx scratch1 = gen_reg_rtx (Pmode);
9390 emit_move_insn (scratch1, out);
9391 /* Is there a known alignment and is it not 2? */
9392 if (align != 2)
9394 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
9395 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
9397 /* Leave just the 3 lower bits. */
9398 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
9399 NULL_RTX, 0, OPTAB_WIDEN);
9401 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
9402 Pmode, 1, 0, align_4_label);
9403 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL,
9404 Pmode, 1, 0, align_2_label);
9405 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL,
9406 Pmode, 1, 0, align_3_label);
9408 else
9410 /* Since the alignment is 2, we have to check 2 or 0 bytes;
9411 check if is aligned to 4 - byte. */
9413 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (2),
9414 NULL_RTX, 0, OPTAB_WIDEN);
9416 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
9417 Pmode, 1, 0, align_4_label);
9420 mem = gen_rtx_MEM (QImode, out);
9422 /* Now compare the bytes. */
9424 /* Compare the first n unaligned byte on a byte per byte basis. */
9425 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
9426 QImode, 1, 0, end_0_label);
9428 /* Increment the address. */
9429 if (TARGET_64BIT)
9430 emit_insn (gen_adddi3 (out, out, const1_rtx));
9431 else
9432 emit_insn (gen_addsi3 (out, out, const1_rtx));
9434 /* Not needed with an alignment of 2 */
9435 if (align != 2)
9437 emit_label (align_2_label);
9439 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
9440 QImode, 1, 0, end_0_label);
9442 if (TARGET_64BIT)
9443 emit_insn (gen_adddi3 (out, out, const1_rtx));
9444 else
9445 emit_insn (gen_addsi3 (out, out, const1_rtx));
9447 emit_label (align_3_label);
9450 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
9451 QImode, 1, 0, end_0_label);
9453 if (TARGET_64BIT)
9454 emit_insn (gen_adddi3 (out, out, const1_rtx));
9455 else
9456 emit_insn (gen_addsi3 (out, out, const1_rtx));
9459 /* Generate loop to check 4 bytes at a time. It is not a good idea to
9460 align this loop. It gives only huge programs, but does not help to
9461 speed up. */
9462 emit_label (align_4_label);
9464 mem = gen_rtx_MEM (SImode, out);
9465 emit_move_insn (scratch, mem);
9466 if (TARGET_64BIT)
9467 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
9468 else
9469 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
9471 /* This formula yields a nonzero result iff one of the bytes is zero.
9472 This saves three branches inside loop and many cycles. */
9474 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
9475 emit_insn (gen_one_cmplsi2 (scratch, scratch));
9476 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
9477 emit_insn (gen_andsi3 (tmpreg, tmpreg,
9478 GEN_INT (trunc_int_for_mode
9479 (0x80808080, SImode))));
9480 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0,
9481 SImode, 1, 0, align_4_label);
9483 if (TARGET_CMOVE)
9485 rtx reg = gen_reg_rtx (SImode);
9486 rtx reg2 = gen_reg_rtx (Pmode);
9487 emit_move_insn (reg, tmpreg);
9488 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
9490 /* If zero is not in the first two bytes, move two bytes forward. */
9491 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
9492 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
9493 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
9494 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
9495 gen_rtx_IF_THEN_ELSE (SImode, tmp,
9496 reg,
9497 tmpreg)));
9498 /* Emit lea manually to avoid clobbering of flags. */
9499 emit_insn (gen_rtx_SET (SImode, reg2,
9500 gen_rtx_PLUS (Pmode, out, GEN_INT (2))));
9502 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
9503 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
9504 emit_insn (gen_rtx_SET (VOIDmode, out,
9505 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
9506 reg2,
9507 out)));
9510 else
9512 rtx end_2_label = gen_label_rtx ();
9513 /* Is zero in the first two bytes? */
9515 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
9516 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
9517 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
9518 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9519 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
9520 pc_rtx);
9521 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
9522 JUMP_LABEL (tmp) = end_2_label;
9524 /* Not in the first two. Move two bytes forward. */
9525 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
9526 if (TARGET_64BIT)
9527 emit_insn (gen_adddi3 (out, out, GEN_INT (2)));
9528 else
9529 emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
9531 emit_label (end_2_label);
9535 /* Avoid branch in fixing the byte. */
9536 tmpreg = gen_lowpart (QImode, tmpreg);
9537 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
9538 if (TARGET_64BIT)
9539 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3)));
9540 else
9541 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3)));
9543 emit_label (end_0_label);
9546 /* Clear stack slot assignments remembered from previous functions.
9547 This is called from INIT_EXPANDERS once before RTL is emitted for each
9548 function. */
9550 static void
9551 ix86_init_machine_status (p)
9552 struct function *p;
9554 p->machine = (struct machine_function *)
9555 xcalloc (1, sizeof (struct machine_function));
9558 /* Mark machine specific bits of P for GC. */
9559 static void
9560 ix86_mark_machine_status (p)
9561 struct function *p;
9563 struct machine_function *machine = p->machine;
9564 enum machine_mode mode;
9565 int n;
9567 if (! machine)
9568 return;
9570 for (mode = VOIDmode; (int) mode < (int) MAX_MACHINE_MODE;
9571 mode = (enum machine_mode) ((int) mode + 1))
9572 for (n = 0; n < MAX_386_STACK_LOCALS; n++)
9573 ggc_mark_rtx (machine->stack_locals[(int) mode][n]);
9576 static void
9577 ix86_free_machine_status (p)
9578 struct function *p;
9580 free (p->machine);
9581 p->machine = NULL;
9584 /* Return a MEM corresponding to a stack slot with mode MODE.
9585 Allocate a new slot if necessary.
9587 The RTL for a function can have several slots available: N is
9588 which slot to use. */
9591 assign_386_stack_local (mode, n)
9592 enum machine_mode mode;
9593 int n;
9595 if (n < 0 || n >= MAX_386_STACK_LOCALS)
9596 abort ();
9598 if (ix86_stack_locals[(int) mode][n] == NULL_RTX)
9599 ix86_stack_locals[(int) mode][n]
9600 = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
9602 return ix86_stack_locals[(int) mode][n];
9605 /* Calculate the length of the memory address in the instruction
9606 encoding. Does not include the one-byte modrm, opcode, or prefix. */
9608 static int
9609 memory_address_length (addr)
9610 rtx addr;
9612 struct ix86_address parts;
9613 rtx base, index, disp;
9614 int len;
9616 if (GET_CODE (addr) == PRE_DEC
9617 || GET_CODE (addr) == POST_INC
9618 || GET_CODE (addr) == PRE_MODIFY
9619 || GET_CODE (addr) == POST_MODIFY)
9620 return 0;
9622 if (! ix86_decompose_address (addr, &parts))
9623 abort ();
9625 base = parts.base;
9626 index = parts.index;
9627 disp = parts.disp;
9628 len = 0;
9630 /* Register Indirect. */
9631 if (base && !index && !disp)
9633 /* Special cases: ebp and esp need the two-byte modrm form. */
9634 if (addr == stack_pointer_rtx
9635 || addr == arg_pointer_rtx
9636 || addr == frame_pointer_rtx
9637 || addr == hard_frame_pointer_rtx)
9638 len = 1;
9641 /* Direct Addressing. */
9642 else if (disp && !base && !index)
9643 len = 4;
9645 else
9647 /* Find the length of the displacement constant. */
9648 if (disp)
9650 if (GET_CODE (disp) == CONST_INT
9651 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K'))
9652 len = 1;
9653 else
9654 len = 4;
9657 /* An index requires the two-byte modrm form. */
9658 if (index)
9659 len += 1;
9662 return len;
9665 /* Compute default value for "length_immediate" attribute. When SHORTFORM is set
9666 expect that insn have 8bit immediate alternative. */
9668 ix86_attr_length_immediate_default (insn, shortform)
9669 rtx insn;
9670 int shortform;
9672 int len = 0;
9673 int i;
9674 extract_insn_cached (insn);
9675 for (i = recog_data.n_operands - 1; i >= 0; --i)
9676 if (CONSTANT_P (recog_data.operand[i]))
9678 if (len)
9679 abort ();
9680 if (shortform
9681 && GET_CODE (recog_data.operand[i]) == CONST_INT
9682 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
9683 len = 1;
9684 else
9686 switch (get_attr_mode (insn))
9688 case MODE_QI:
9689 len+=1;
9690 break;
9691 case MODE_HI:
9692 len+=2;
9693 break;
9694 case MODE_SI:
9695 len+=4;
9696 break;
9697 default:
9698 fatal_insn ("Unknown insn mode", insn);
9702 return len;
9704 /* Compute default value for "length_address" attribute. */
9706 ix86_attr_length_address_default (insn)
9707 rtx insn;
9709 int i;
9710 extract_insn_cached (insn);
9711 for (i = recog_data.n_operands - 1; i >= 0; --i)
9712 if (GET_CODE (recog_data.operand[i]) == MEM)
9714 return memory_address_length (XEXP (recog_data.operand[i], 0));
9715 break;
9717 return 0;
9720 /* Return the maximum number of instructions a cpu can issue. */
9722 static int
9723 ix86_issue_rate ()
9725 switch (ix86_cpu)
9727 case PROCESSOR_PENTIUM:
9728 case PROCESSOR_K6:
9729 return 2;
9731 case PROCESSOR_PENTIUMPRO:
9732 case PROCESSOR_PENTIUM4:
9733 case PROCESSOR_ATHLON:
9734 return 3;
9736 default:
9737 return 1;
9741 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
9742 by DEP_INSN and nothing set by DEP_INSN. */
9744 static int
9745 ix86_flags_dependant (insn, dep_insn, insn_type)
9746 rtx insn, dep_insn;
9747 enum attr_type insn_type;
9749 rtx set, set2;
9751 /* Simplify the test for uninteresting insns. */
9752 if (insn_type != TYPE_SETCC
9753 && insn_type != TYPE_ICMOV
9754 && insn_type != TYPE_FCMOV
9755 && insn_type != TYPE_IBR)
9756 return 0;
9758 if ((set = single_set (dep_insn)) != 0)
9760 set = SET_DEST (set);
9761 set2 = NULL_RTX;
9763 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
9764 && XVECLEN (PATTERN (dep_insn), 0) == 2
9765 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
9766 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
9768 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
9769 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
9771 else
9772 return 0;
9774 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
9775 return 0;
9777 /* This test is true if the dependant insn reads the flags but
9778 not any other potentially set register. */
9779 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
9780 return 0;
9782 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
9783 return 0;
9785 return 1;
9788 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
9789 address with operands set by DEP_INSN. */
9791 static int
9792 ix86_agi_dependant (insn, dep_insn, insn_type)
9793 rtx insn, dep_insn;
9794 enum attr_type insn_type;
9796 rtx addr;
9798 if (insn_type == TYPE_LEA
9799 && TARGET_PENTIUM)
9801 addr = PATTERN (insn);
9802 if (GET_CODE (addr) == SET)
9804 else if (GET_CODE (addr) == PARALLEL
9805 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
9806 addr = XVECEXP (addr, 0, 0);
9807 else
9808 abort ();
9809 addr = SET_SRC (addr);
9811 else
9813 int i;
9814 extract_insn_cached (insn);
9815 for (i = recog_data.n_operands - 1; i >= 0; --i)
9816 if (GET_CODE (recog_data.operand[i]) == MEM)
9818 addr = XEXP (recog_data.operand[i], 0);
9819 goto found;
9821 return 0;
9822 found:;
9825 return modified_in_p (addr, dep_insn);
9828 static int
9829 ix86_adjust_cost (insn, link, dep_insn, cost)
9830 rtx insn, link, dep_insn;
9831 int cost;
9833 enum attr_type insn_type, dep_insn_type;
9834 enum attr_memory memory, dep_memory;
9835 rtx set, set2;
9836 int dep_insn_code_number;
9838 /* Anti and output depenancies have zero cost on all CPUs. */
9839 if (REG_NOTE_KIND (link) != 0)
9840 return 0;
9842 dep_insn_code_number = recog_memoized (dep_insn);
9844 /* If we can't recognize the insns, we can't really do anything. */
9845 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
9846 return cost;
9848 insn_type = get_attr_type (insn);
9849 dep_insn_type = get_attr_type (dep_insn);
9851 switch (ix86_cpu)
9853 case PROCESSOR_PENTIUM:
9854 /* Address Generation Interlock adds a cycle of latency. */
9855 if (ix86_agi_dependant (insn, dep_insn, insn_type))
9856 cost += 1;
9858 /* ??? Compares pair with jump/setcc. */
9859 if (ix86_flags_dependant (insn, dep_insn, insn_type))
9860 cost = 0;
9862 /* Floating point stores require value to be ready one cycle ealier. */
9863 if (insn_type == TYPE_FMOV
9864 && get_attr_memory (insn) == MEMORY_STORE
9865 && !ix86_agi_dependant (insn, dep_insn, insn_type))
9866 cost += 1;
9867 break;
9869 case PROCESSOR_PENTIUMPRO:
9870 memory = get_attr_memory (insn);
9871 dep_memory = get_attr_memory (dep_insn);
9873 /* Since we can't represent delayed latencies of load+operation,
9874 increase the cost here for non-imov insns. */
9875 if (dep_insn_type != TYPE_IMOV
9876 && dep_insn_type != TYPE_FMOV
9877 && (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH))
9878 cost += 1;
9880 /* INT->FP conversion is expensive. */
9881 if (get_attr_fp_int_src (dep_insn))
9882 cost += 5;
9884 /* There is one cycle extra latency between an FP op and a store. */
9885 if (insn_type == TYPE_FMOV
9886 && (set = single_set (dep_insn)) != NULL_RTX
9887 && (set2 = single_set (insn)) != NULL_RTX
9888 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
9889 && GET_CODE (SET_DEST (set2)) == MEM)
9890 cost += 1;
9892 /* Show ability of reorder buffer to hide latency of load by executing
9893 in parallel with previous instruction in case
9894 previous instruction is not needed to compute the address. */
9895 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
9896 && !ix86_agi_dependant (insn, dep_insn, insn_type))
9898 /* Claim moves to take one cycle, as core can issue one load
9899 at time and the next load can start cycle later. */
9900 if (dep_insn_type == TYPE_IMOV
9901 || dep_insn_type == TYPE_FMOV)
9902 cost = 1;
9903 else if (cost > 1)
9904 cost--;
9906 break;
9908 case PROCESSOR_K6:
9909 memory = get_attr_memory (insn);
9910 dep_memory = get_attr_memory (dep_insn);
9911 /* The esp dependency is resolved before the instruction is really
9912 finished. */
9913 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
9914 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
9915 return 1;
9917 /* Since we can't represent delayed latencies of load+operation,
9918 increase the cost here for non-imov insns. */
9919 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
9920 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
9922 /* INT->FP conversion is expensive. */
9923 if (get_attr_fp_int_src (dep_insn))
9924 cost += 5;
9926 /* Show ability of reorder buffer to hide latency of load by executing
9927 in parallel with previous instruction in case
9928 previous instruction is not needed to compute the address. */
9929 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
9930 && !ix86_agi_dependant (insn, dep_insn, insn_type))
9932 /* Claim moves to take one cycle, as core can issue one load
9933 at time and the next load can start cycle later. */
9934 if (dep_insn_type == TYPE_IMOV
9935 || dep_insn_type == TYPE_FMOV)
9936 cost = 1;
9937 else if (cost > 2)
9938 cost -= 2;
9939 else
9940 cost = 1;
9942 break;
9944 case PROCESSOR_ATHLON:
9945 memory = get_attr_memory (insn);
9946 dep_memory = get_attr_memory (dep_insn);
9948 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
9950 if (dep_insn_type == TYPE_IMOV || dep_insn_type == TYPE_FMOV)
9951 cost += 2;
9952 else
9953 cost += 3;
9955 /* Show ability of reorder buffer to hide latency of load by executing
9956 in parallel with previous instruction in case
9957 previous instruction is not needed to compute the address. */
9958 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
9959 && !ix86_agi_dependant (insn, dep_insn, insn_type))
9961 /* Claim moves to take one cycle, as core can issue one load
9962 at time and the next load can start cycle later. */
9963 if (dep_insn_type == TYPE_IMOV
9964 || dep_insn_type == TYPE_FMOV)
9965 cost = 0;
9966 else if (cost >= 3)
9967 cost -= 3;
9968 else
9969 cost = 0;
9972 default:
9973 break;
9976 return cost;
9979 static union
9981 struct ppro_sched_data
9983 rtx decode[3];
9984 int issued_this_cycle;
9985 } ppro;
9986 } ix86_sched_data;
9988 static int
9989 ix86_safe_length (insn)
9990 rtx insn;
9992 if (recog_memoized (insn) >= 0)
9993 return get_attr_length(insn);
9994 else
9995 return 128;
9998 static int
9999 ix86_safe_length_prefix (insn)
10000 rtx insn;
10002 if (recog_memoized (insn) >= 0)
10003 return get_attr_length(insn);
10004 else
10005 return 0;
10008 static enum attr_memory
10009 ix86_safe_memory (insn)
10010 rtx insn;
10012 if (recog_memoized (insn) >= 0)
10013 return get_attr_memory(insn);
10014 else
10015 return MEMORY_UNKNOWN;
10018 static enum attr_pent_pair
10019 ix86_safe_pent_pair (insn)
10020 rtx insn;
10022 if (recog_memoized (insn) >= 0)
10023 return get_attr_pent_pair(insn);
10024 else
10025 return PENT_PAIR_NP;
10028 static enum attr_ppro_uops
10029 ix86_safe_ppro_uops (insn)
10030 rtx insn;
10032 if (recog_memoized (insn) >= 0)
10033 return get_attr_ppro_uops (insn);
10034 else
10035 return PPRO_UOPS_MANY;
10038 static void
10039 ix86_dump_ppro_packet (dump)
10040 FILE *dump;
10042 if (ix86_sched_data.ppro.decode[0])
10044 fprintf (dump, "PPRO packet: %d",
10045 INSN_UID (ix86_sched_data.ppro.decode[0]));
10046 if (ix86_sched_data.ppro.decode[1])
10047 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
10048 if (ix86_sched_data.ppro.decode[2])
10049 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
10050 fputc ('\n', dump);
10054 /* We're beginning a new block. Initialize data structures as necessary. */
10056 static void
10057 ix86_sched_init (dump, sched_verbose, veclen)
10058 FILE *dump ATTRIBUTE_UNUSED;
10059 int sched_verbose ATTRIBUTE_UNUSED;
10060 int veclen ATTRIBUTE_UNUSED;
10062 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
10065 /* Shift INSN to SLOT, and shift everything else down. */
10067 static void
10068 ix86_reorder_insn (insnp, slot)
10069 rtx *insnp, *slot;
10071 if (insnp != slot)
10073 rtx insn = *insnp;
10075 insnp[0] = insnp[1];
10076 while (++insnp != slot);
10077 *insnp = insn;
10081 /* Find an instruction with given pairability and minimal amount of cycles
10082 lost by the fact that the CPU waits for both pipelines to finish before
10083 reading next instructions. Also take care that both instructions together
10084 can not exceed 7 bytes. */
10086 static rtx *
10087 ix86_pent_find_pair (e_ready, ready, type, first)
10088 rtx *e_ready;
10089 rtx *ready;
10090 enum attr_pent_pair type;
10091 rtx first;
10093 int mincycles, cycles;
10094 enum attr_pent_pair tmp;
10095 enum attr_memory memory;
10096 rtx *insnp, *bestinsnp = NULL;
10098 if (ix86_safe_length (first) > 7 + ix86_safe_length_prefix (first))
10099 return NULL;
10101 memory = ix86_safe_memory (first);
10102 cycles = result_ready_cost (first);
10103 mincycles = INT_MAX;
10105 for (insnp = e_ready; insnp >= ready && mincycles; --insnp)
10106 if ((tmp = ix86_safe_pent_pair (*insnp)) == type
10107 && ix86_safe_length (*insnp) <= 7 + ix86_safe_length_prefix (*insnp))
10109 enum attr_memory second_memory;
10110 int secondcycles, currentcycles;
10112 second_memory = ix86_safe_memory (*insnp);
10113 secondcycles = result_ready_cost (*insnp);
10114 currentcycles = abs (cycles - secondcycles);
10116 if (secondcycles >= 1 && cycles >= 1)
10118 /* Two read/modify/write instructions together takes two
10119 cycles longer. */
10120 if (memory == MEMORY_BOTH && second_memory == MEMORY_BOTH)
10121 currentcycles += 2;
10123 /* Read modify/write instruction followed by read/modify
10124 takes one cycle longer. */
10125 if (memory == MEMORY_BOTH && second_memory == MEMORY_LOAD
10126 && tmp != PENT_PAIR_UV
10127 && ix86_safe_pent_pair (first) != PENT_PAIR_UV)
10128 currentcycles += 1;
10130 if (currentcycles < mincycles)
10131 bestinsnp = insnp, mincycles = currentcycles;
10134 return bestinsnp;
10137 /* Subroutines of ix86_sched_reorder. */
10139 static void
10140 ix86_sched_reorder_pentium (ready, e_ready)
10141 rtx *ready;
10142 rtx *e_ready;
10144 enum attr_pent_pair pair1, pair2;
10145 rtx *insnp;
10147 /* This wouldn't be necessary if Haifa knew that static insn ordering
10148 is important to which pipe an insn is issued to. So we have to make
10149 some minor rearrangements. */
10151 pair1 = ix86_safe_pent_pair (*e_ready);
10153 /* If the first insn is non-pairable, let it be. */
10154 if (pair1 == PENT_PAIR_NP)
10155 return;
10157 pair2 = PENT_PAIR_NP;
10158 insnp = 0;
10160 /* If the first insn is UV or PV pairable, search for a PU
10161 insn to go with. */
10162 if (pair1 == PENT_PAIR_UV || pair1 == PENT_PAIR_PV)
10164 insnp = ix86_pent_find_pair (e_ready-1, ready,
10165 PENT_PAIR_PU, *e_ready);
10166 if (insnp)
10167 pair2 = PENT_PAIR_PU;
10170 /* If the first insn is PU or UV pairable, search for a PV
10171 insn to go with. */
10172 if (pair2 == PENT_PAIR_NP
10173 && (pair1 == PENT_PAIR_PU || pair1 == PENT_PAIR_UV))
10175 insnp = ix86_pent_find_pair (e_ready-1, ready,
10176 PENT_PAIR_PV, *e_ready);
10177 if (insnp)
10178 pair2 = PENT_PAIR_PV;
10181 /* If the first insn is pairable, search for a UV
10182 insn to go with. */
10183 if (pair2 == PENT_PAIR_NP)
10185 insnp = ix86_pent_find_pair (e_ready-1, ready,
10186 PENT_PAIR_UV, *e_ready);
10187 if (insnp)
10188 pair2 = PENT_PAIR_UV;
10191 if (pair2 == PENT_PAIR_NP)
10192 return;
10194 /* Found something! Decide if we need to swap the order. */
10195 if (pair1 == PENT_PAIR_PV || pair2 == PENT_PAIR_PU
10196 || (pair1 == PENT_PAIR_UV && pair2 == PENT_PAIR_UV
10197 && ix86_safe_memory (*e_ready) == MEMORY_BOTH
10198 && ix86_safe_memory (*insnp) == MEMORY_LOAD))
10199 ix86_reorder_insn (insnp, e_ready);
10200 else
10201 ix86_reorder_insn (insnp, e_ready - 1);
10204 static void
10205 ix86_sched_reorder_ppro (ready, e_ready)
10206 rtx *ready;
10207 rtx *e_ready;
10209 rtx decode[3];
10210 enum attr_ppro_uops cur_uops;
10211 int issued_this_cycle;
10212 rtx *insnp;
10213 int i;
10215 /* At this point .ppro.decode contains the state of the three
10216 decoders from last "cycle". That is, those insns that were
10217 actually independent. But here we're scheduling for the
10218 decoder, and we may find things that are decodable in the
10219 same cycle. */
10221 memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode));
10222 issued_this_cycle = 0;
10224 insnp = e_ready;
10225 cur_uops = ix86_safe_ppro_uops (*insnp);
10227 /* If the decoders are empty, and we've a complex insn at the
10228 head of the priority queue, let it issue without complaint. */
10229 if (decode[0] == NULL)
10231 if (cur_uops == PPRO_UOPS_MANY)
10233 decode[0] = *insnp;
10234 goto ppro_done;
10237 /* Otherwise, search for a 2-4 uop unsn to issue. */
10238 while (cur_uops != PPRO_UOPS_FEW)
10240 if (insnp == ready)
10241 break;
10242 cur_uops = ix86_safe_ppro_uops (*--insnp);
10245 /* If so, move it to the head of the line. */
10246 if (cur_uops == PPRO_UOPS_FEW)
10247 ix86_reorder_insn (insnp, e_ready);
10249 /* Issue the head of the queue. */
10250 issued_this_cycle = 1;
10251 decode[0] = *e_ready--;
10254 /* Look for simple insns to fill in the other two slots. */
10255 for (i = 1; i < 3; ++i)
10256 if (decode[i] == NULL)
10258 if (ready >= e_ready)
10259 goto ppro_done;
10261 insnp = e_ready;
10262 cur_uops = ix86_safe_ppro_uops (*insnp);
10263 while (cur_uops != PPRO_UOPS_ONE)
10265 if (insnp == ready)
10266 break;
10267 cur_uops = ix86_safe_ppro_uops (*--insnp);
10270 /* Found one. Move it to the head of the queue and issue it. */
10271 if (cur_uops == PPRO_UOPS_ONE)
10273 ix86_reorder_insn (insnp, e_ready);
10274 decode[i] = *e_ready--;
10275 issued_this_cycle++;
10276 continue;
10279 /* ??? Didn't find one. Ideally, here we would do a lazy split
10280 of 2-uop insns, issue one and queue the other. */
10283 ppro_done:
10284 if (issued_this_cycle == 0)
10285 issued_this_cycle = 1;
10286 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
10289 /* We are about to being issuing insns for this clock cycle.
10290 Override the default sort algorithm to better slot instructions. */
10291 static int
10292 ix86_sched_reorder (dump, sched_verbose, ready, n_readyp, clock_var)
10293 FILE *dump ATTRIBUTE_UNUSED;
10294 int sched_verbose ATTRIBUTE_UNUSED;
10295 rtx *ready;
10296 int *n_readyp;
10297 int clock_var ATTRIBUTE_UNUSED;
10299 int n_ready = *n_readyp;
10300 rtx *e_ready = ready + n_ready - 1;
10302 if (n_ready < 2)
10303 goto out;
10305 switch (ix86_cpu)
10307 default:
10308 break;
10310 case PROCESSOR_PENTIUM:
10311 ix86_sched_reorder_pentium (ready, e_ready);
10312 break;
10314 case PROCESSOR_PENTIUMPRO:
10315 ix86_sched_reorder_ppro (ready, e_ready);
10316 break;
10319 out:
10320 return ix86_issue_rate ();
10323 /* We are about to issue INSN. Return the number of insns left on the
10324 ready queue that can be issued this cycle. */
10326 static int
10327 ix86_variable_issue (dump, sched_verbose, insn, can_issue_more)
10328 FILE *dump;
10329 int sched_verbose;
10330 rtx insn;
10331 int can_issue_more;
10333 int i;
10334 switch (ix86_cpu)
10336 default:
10337 return can_issue_more - 1;
10339 case PROCESSOR_PENTIUMPRO:
10341 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
10343 if (uops == PPRO_UOPS_MANY)
10345 if (sched_verbose)
10346 ix86_dump_ppro_packet (dump);
10347 ix86_sched_data.ppro.decode[0] = insn;
10348 ix86_sched_data.ppro.decode[1] = NULL;
10349 ix86_sched_data.ppro.decode[2] = NULL;
10350 if (sched_verbose)
10351 ix86_dump_ppro_packet (dump);
10352 ix86_sched_data.ppro.decode[0] = NULL;
10354 else if (uops == PPRO_UOPS_FEW)
10356 if (sched_verbose)
10357 ix86_dump_ppro_packet (dump);
10358 ix86_sched_data.ppro.decode[0] = insn;
10359 ix86_sched_data.ppro.decode[1] = NULL;
10360 ix86_sched_data.ppro.decode[2] = NULL;
10362 else
10364 for (i = 0; i < 3; ++i)
10365 if (ix86_sched_data.ppro.decode[i] == NULL)
10367 ix86_sched_data.ppro.decode[i] = insn;
10368 break;
10370 if (i == 3)
10371 abort ();
10372 if (i == 2)
10374 if (sched_verbose)
10375 ix86_dump_ppro_packet (dump);
10376 ix86_sched_data.ppro.decode[0] = NULL;
10377 ix86_sched_data.ppro.decode[1] = NULL;
10378 ix86_sched_data.ppro.decode[2] = NULL;
10382 return --ix86_sched_data.ppro.issued_this_cycle;
10386 /* Walk through INSNS and look for MEM references whose address is DSTREG or
10387 SRCREG and set the memory attribute to those of DSTREF and SRCREF, as
10388 appropriate. */
10390 void
10391 ix86_set_move_mem_attrs (insns, dstref, srcref, dstreg, srcreg)
10392 rtx insns;
10393 rtx dstref, srcref, dstreg, srcreg;
10395 rtx insn;
10397 for (insn = insns; insn != 0 ; insn = NEXT_INSN (insn))
10398 if (INSN_P (insn))
10399 ix86_set_move_mem_attrs_1 (PATTERN (insn), dstref, srcref,
10400 dstreg, srcreg);
10403 /* Subroutine of above to actually do the updating by recursively walking
10404 the rtx. */
10406 static void
10407 ix86_set_move_mem_attrs_1 (x, dstref, srcref, dstreg, srcreg)
10408 rtx x;
10409 rtx dstref, srcref, dstreg, srcreg;
10411 enum rtx_code code = GET_CODE (x);
10412 const char *format_ptr = GET_RTX_FORMAT (code);
10413 int i, j;
10415 if (code == MEM && XEXP (x, 0) == dstreg)
10416 MEM_COPY_ATTRIBUTES (x, dstref);
10417 else if (code == MEM && XEXP (x, 0) == srcreg)
10418 MEM_COPY_ATTRIBUTES (x, srcref);
10420 for (i = 0; i < GET_RTX_LENGTH (code); i++, format_ptr++)
10422 if (*format_ptr == 'e')
10423 ix86_set_move_mem_attrs_1 (XEXP (x, i), dstref, srcref,
10424 dstreg, srcreg);
10425 else if (*format_ptr == 'E')
10426 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
10427 ix86_set_move_mem_attrs_1 (XVECEXP (x, i, j), dstref, srcref,
10428 dstreg, srcreg);
10432 /* Compute the alignment given to a constant that is being placed in memory.
10433 EXP is the constant and ALIGN is the alignment that the object would
10434 ordinarily have.
10435 The value of this function is used instead of that alignment to align
10436 the object. */
10439 ix86_constant_alignment (exp, align)
10440 tree exp;
10441 int align;
10443 if (TREE_CODE (exp) == REAL_CST)
10445 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
10446 return 64;
10447 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
10448 return 128;
10450 else if (TREE_CODE (exp) == STRING_CST && TREE_STRING_LENGTH (exp) >= 31
10451 && align < 256)
10452 return 256;
10454 return align;
10457 /* Compute the alignment for a static variable.
10458 TYPE is the data type, and ALIGN is the alignment that
10459 the object would ordinarily have. The value of this function is used
10460 instead of that alignment to align the object. */
10463 ix86_data_alignment (type, align)
10464 tree type;
10465 int align;
10467 if (AGGREGATE_TYPE_P (type)
10468 && TYPE_SIZE (type)
10469 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
10470 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
10471 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
10472 return 256;
10474 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
10475 to 16byte boundary. */
10476 if (TARGET_64BIT)
10478 if (AGGREGATE_TYPE_P (type)
10479 && TYPE_SIZE (type)
10480 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
10481 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
10482 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
10483 return 128;
10486 if (TREE_CODE (type) == ARRAY_TYPE)
10488 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
10489 return 64;
10490 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
10491 return 128;
10493 else if (TREE_CODE (type) == COMPLEX_TYPE)
10496 if (TYPE_MODE (type) == DCmode && align < 64)
10497 return 64;
10498 if (TYPE_MODE (type) == XCmode && align < 128)
10499 return 128;
10501 else if ((TREE_CODE (type) == RECORD_TYPE
10502 || TREE_CODE (type) == UNION_TYPE
10503 || TREE_CODE (type) == QUAL_UNION_TYPE)
10504 && TYPE_FIELDS (type))
10506 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
10507 return 64;
10508 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
10509 return 128;
10511 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
10512 || TREE_CODE (type) == INTEGER_TYPE)
10514 if (TYPE_MODE (type) == DFmode && align < 64)
10515 return 64;
10516 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
10517 return 128;
10520 return align;
10523 /* Compute the alignment for a local variable.
10524 TYPE is the data type, and ALIGN is the alignment that
10525 the object would ordinarily have. The value of this macro is used
10526 instead of that alignment to align the object. */
10529 ix86_local_alignment (type, align)
10530 tree type;
10531 int align;
10533 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
10534 to 16byte boundary. */
10535 if (TARGET_64BIT)
10537 if (AGGREGATE_TYPE_P (type)
10538 && TYPE_SIZE (type)
10539 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
10540 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
10541 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
10542 return 128;
10544 if (TREE_CODE (type) == ARRAY_TYPE)
10546 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
10547 return 64;
10548 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
10549 return 128;
10551 else if (TREE_CODE (type) == COMPLEX_TYPE)
10553 if (TYPE_MODE (type) == DCmode && align < 64)
10554 return 64;
10555 if (TYPE_MODE (type) == XCmode && align < 128)
10556 return 128;
10558 else if ((TREE_CODE (type) == RECORD_TYPE
10559 || TREE_CODE (type) == UNION_TYPE
10560 || TREE_CODE (type) == QUAL_UNION_TYPE)
10561 && TYPE_FIELDS (type))
10563 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
10564 return 64;
10565 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
10566 return 128;
10568 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
10569 || TREE_CODE (type) == INTEGER_TYPE)
10572 if (TYPE_MODE (type) == DFmode && align < 64)
10573 return 64;
10574 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
10575 return 128;
10577 return align;
10580 /* Emit RTL insns to initialize the variable parts of a trampoline.
10581 FNADDR is an RTX for the address of the function's pure code.
10582 CXT is an RTX for the static chain value for the function. */
10583 void
10584 x86_initialize_trampoline (tramp, fnaddr, cxt)
10585 rtx tramp, fnaddr, cxt;
10587 if (!TARGET_64BIT)
10589 /* Compute offset from the end of the jmp to the target function. */
10590 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
10591 plus_constant (tramp, 10),
10592 NULL_RTX, 1, OPTAB_DIRECT);
10593 emit_move_insn (gen_rtx_MEM (QImode, tramp),
10594 GEN_INT (trunc_int_for_mode (0xb9, QImode)));
10595 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
10596 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
10597 GEN_INT (trunc_int_for_mode (0xe9, QImode)));
10598 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
10600 else
10602 int offset = 0;
10603 /* Try to load address using shorter movl instead of movabs.
10604 We may want to support movq for kernel mode, but kernel does not use
10605 trampolines at the moment. */
10606 if (x86_64_zero_extended_value (fnaddr))
10608 fnaddr = copy_to_mode_reg (DImode, fnaddr);
10609 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
10610 GEN_INT (trunc_int_for_mode (0xbb41, HImode)));
10611 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
10612 gen_lowpart (SImode, fnaddr));
10613 offset += 6;
10615 else
10617 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
10618 GEN_INT (trunc_int_for_mode (0xbb49, HImode)));
10619 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
10620 fnaddr);
10621 offset += 10;
10623 /* Load static chain using movabs to r10. */
10624 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
10625 GEN_INT (trunc_int_for_mode (0xba49, HImode)));
10626 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
10627 cxt);
10628 offset += 10;
10629 /* Jump to the r11 */
10630 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
10631 GEN_INT (trunc_int_for_mode (0xff49, HImode)));
10632 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
10633 GEN_INT (trunc_int_for_mode (0xe3, HImode)));
10634 offset += 3;
10635 if (offset > TRAMPOLINE_SIZE)
10636 abort();
10640 #define def_builtin(MASK, NAME, TYPE, CODE) \
10641 do { \
10642 if ((MASK) & target_flags) \
10643 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, NULL); \
10644 } while (0)
10646 struct builtin_description
10648 unsigned int mask;
10649 enum insn_code icode;
10650 const char * name;
10651 enum ix86_builtins code;
10652 enum rtx_code comparison;
10653 unsigned int flag;
10656 static struct builtin_description bdesc_comi[] =
10658 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, EQ, 0 },
10659 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, LT, 0 },
10660 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, LE, 0 },
10661 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, LT, 1 },
10662 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, LE, 1 },
10663 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, NE, 0 },
10664 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, EQ, 0 },
10665 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, LT, 0 },
10666 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, LE, 0 },
10667 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, LT, 1 },
10668 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, LE, 1 },
10669 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, NE, 0 }
10672 static struct builtin_description bdesc_2arg[] =
10674 /* SSE */
10675 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
10676 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
10677 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
10678 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
10679 { MASK_SSE, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
10680 { MASK_SSE, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
10681 { MASK_SSE, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
10682 { MASK_SSE, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
10684 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
10685 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
10686 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
10687 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
10688 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
10689 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
10690 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
10691 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
10692 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
10693 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
10694 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
10695 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
10696 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
10697 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
10698 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
10699 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgtss", IX86_BUILTIN_CMPGTSS, LT, 1 },
10700 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgess", IX86_BUILTIN_CMPGESS, LE, 1 },
10701 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
10702 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
10703 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
10704 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
10705 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, LT, 1 },
10706 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, LE, 1 },
10707 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
10709 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
10710 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
10711 { MASK_SSE, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
10712 { MASK_SSE, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
10714 { MASK_SSE, CODE_FOR_sse_andti3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
10715 { MASK_SSE, CODE_FOR_sse_nandti3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
10716 { MASK_SSE, CODE_FOR_sse_iorti3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
10717 { MASK_SSE, CODE_FOR_sse_xorti3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
10719 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
10720 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
10721 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
10722 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
10723 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
10725 /* MMX */
10726 { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
10727 { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
10728 { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
10729 { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
10730 { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
10731 { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
10733 { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
10734 { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
10735 { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
10736 { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
10737 { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
10738 { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
10739 { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
10740 { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
10742 { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
10743 { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
10744 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
10746 { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
10747 { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
10748 { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
10749 { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
10751 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
10752 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
10754 { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
10755 { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
10756 { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
10757 { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
10758 { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
10759 { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
10761 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
10762 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
10763 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
10764 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
10766 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
10767 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
10768 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
10769 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
10770 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
10771 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
10773 /* Special. */
10774 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
10775 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
10776 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
10778 { MASK_SSE, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
10779 { MASK_SSE, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
10781 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
10782 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
10783 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
10784 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
10785 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
10786 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
10788 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
10789 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
10790 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
10791 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
10792 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
10793 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
10795 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
10796 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
10797 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
10798 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
10800 { MASK_SSE, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
10801 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 }
10805 static struct builtin_description bdesc_1arg[] =
10807 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
10808 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
10810 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
10811 { MASK_SSE, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
10812 { MASK_SSE, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
10814 { MASK_SSE, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
10815 { MASK_SSE, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
10816 { MASK_SSE, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
10817 { MASK_SSE, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 }
10821 void
10822 ix86_init_builtins ()
10824 if (TARGET_MMX)
10825 ix86_init_mmx_sse_builtins ();
10828 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
10829 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
10830 builtins. */
10831 void
10832 ix86_init_mmx_sse_builtins ()
10834 struct builtin_description * d;
10835 size_t i;
10836 tree endlink = void_list_node;
10838 tree pchar_type_node = build_pointer_type (char_type_node);
10839 tree pfloat_type_node = build_pointer_type (float_type_node);
10840 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
10841 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
10843 /* Comparisons. */
10844 tree int_ftype_v4sf_v4sf
10845 = build_function_type (integer_type_node,
10846 tree_cons (NULL_TREE, V4SF_type_node,
10847 tree_cons (NULL_TREE,
10848 V4SF_type_node,
10849 endlink)));
10850 tree v4si_ftype_v4sf_v4sf
10851 = build_function_type (V4SI_type_node,
10852 tree_cons (NULL_TREE, V4SF_type_node,
10853 tree_cons (NULL_TREE,
10854 V4SF_type_node,
10855 endlink)));
10856 /* MMX/SSE/integer conversions. */
10857 tree int_ftype_v4sf
10858 = build_function_type (integer_type_node,
10859 tree_cons (NULL_TREE, V4SF_type_node,
10860 endlink));
10861 tree int_ftype_v8qi
10862 = build_function_type (integer_type_node,
10863 tree_cons (NULL_TREE, V8QI_type_node,
10864 endlink));
10865 tree int_ftype_v2si
10866 = build_function_type (integer_type_node,
10867 tree_cons (NULL_TREE, V2SI_type_node,
10868 endlink));
10869 tree v2si_ftype_int
10870 = build_function_type (V2SI_type_node,
10871 tree_cons (NULL_TREE, integer_type_node,
10872 endlink));
10873 tree v4sf_ftype_v4sf_int
10874 = build_function_type (V4SF_type_node,
10875 tree_cons (NULL_TREE, V4SF_type_node,
10876 tree_cons (NULL_TREE, integer_type_node,
10877 endlink)));
10878 tree v4sf_ftype_v4sf_v2si
10879 = build_function_type (V4SF_type_node,
10880 tree_cons (NULL_TREE, V4SF_type_node,
10881 tree_cons (NULL_TREE, V2SI_type_node,
10882 endlink)));
10883 tree int_ftype_v4hi_int
10884 = build_function_type (integer_type_node,
10885 tree_cons (NULL_TREE, V4HI_type_node,
10886 tree_cons (NULL_TREE, integer_type_node,
10887 endlink)));
10888 tree v4hi_ftype_v4hi_int_int
10889 = build_function_type (V4HI_type_node,
10890 tree_cons (NULL_TREE, V4HI_type_node,
10891 tree_cons (NULL_TREE, integer_type_node,
10892 tree_cons (NULL_TREE,
10893 integer_type_node,
10894 endlink))));
10895 /* Miscellaneous. */
10896 tree v8qi_ftype_v4hi_v4hi
10897 = build_function_type (V8QI_type_node,
10898 tree_cons (NULL_TREE, V4HI_type_node,
10899 tree_cons (NULL_TREE, V4HI_type_node,
10900 endlink)));
10901 tree v4hi_ftype_v2si_v2si
10902 = build_function_type (V4HI_type_node,
10903 tree_cons (NULL_TREE, V2SI_type_node,
10904 tree_cons (NULL_TREE, V2SI_type_node,
10905 endlink)));
10906 tree v4sf_ftype_v4sf_v4sf_int
10907 = build_function_type (V4SF_type_node,
10908 tree_cons (NULL_TREE, V4SF_type_node,
10909 tree_cons (NULL_TREE, V4SF_type_node,
10910 tree_cons (NULL_TREE,
10911 integer_type_node,
10912 endlink))));
10913 tree v4hi_ftype_v8qi_v8qi
10914 = build_function_type (V4HI_type_node,
10915 tree_cons (NULL_TREE, V8QI_type_node,
10916 tree_cons (NULL_TREE, V8QI_type_node,
10917 endlink)));
10918 tree v2si_ftype_v4hi_v4hi
10919 = build_function_type (V2SI_type_node,
10920 tree_cons (NULL_TREE, V4HI_type_node,
10921 tree_cons (NULL_TREE, V4HI_type_node,
10922 endlink)));
10923 tree v4hi_ftype_v4hi_int
10924 = build_function_type (V4HI_type_node,
10925 tree_cons (NULL_TREE, V4HI_type_node,
10926 tree_cons (NULL_TREE, integer_type_node,
10927 endlink)));
10928 tree v4hi_ftype_v4hi_di
10929 = build_function_type (V4HI_type_node,
10930 tree_cons (NULL_TREE, V4HI_type_node,
10931 tree_cons (NULL_TREE,
10932 long_long_integer_type_node,
10933 endlink)));
10934 tree v2si_ftype_v2si_di
10935 = build_function_type (V2SI_type_node,
10936 tree_cons (NULL_TREE, V2SI_type_node,
10937 tree_cons (NULL_TREE,
10938 long_long_integer_type_node,
10939 endlink)));
10940 tree void_ftype_void
10941 = build_function_type (void_type_node, endlink);
10942 tree void_ftype_pchar_int
10943 = build_function_type (void_type_node,
10944 tree_cons (NULL_TREE, pchar_type_node,
10945 tree_cons (NULL_TREE, integer_type_node,
10946 endlink)));
10947 tree void_ftype_unsigned
10948 = build_function_type (void_type_node,
10949 tree_cons (NULL_TREE, unsigned_type_node,
10950 endlink));
10951 tree unsigned_ftype_void
10952 = build_function_type (unsigned_type_node, endlink);
10953 tree di_ftype_void
10954 = build_function_type (long_long_unsigned_type_node, endlink);
10955 tree ti_ftype_void
10956 = build_function_type (intTI_type_node, endlink);
10957 tree v2si_ftype_v4sf
10958 = build_function_type (V2SI_type_node,
10959 tree_cons (NULL_TREE, V4SF_type_node,
10960 endlink));
10961 /* Loads/stores. */
10962 tree maskmovq_args = tree_cons (NULL_TREE, V8QI_type_node,
10963 tree_cons (NULL_TREE, V8QI_type_node,
10964 tree_cons (NULL_TREE,
10965 pchar_type_node,
10966 endlink)));
10967 tree void_ftype_v8qi_v8qi_pchar
10968 = build_function_type (void_type_node, maskmovq_args);
10969 tree v4sf_ftype_pfloat
10970 = build_function_type (V4SF_type_node,
10971 tree_cons (NULL_TREE, pfloat_type_node,
10972 endlink));
10973 tree v4sf_ftype_float
10974 = build_function_type (V4SF_type_node,
10975 tree_cons (NULL_TREE, float_type_node,
10976 endlink));
10977 tree v4sf_ftype_float_float_float_float
10978 = build_function_type (V4SF_type_node,
10979 tree_cons (NULL_TREE, float_type_node,
10980 tree_cons (NULL_TREE, float_type_node,
10981 tree_cons (NULL_TREE,
10982 float_type_node,
10983 tree_cons (NULL_TREE,
10984 float_type_node,
10985 endlink)))));
10986 /* @@@ the type is bogus */
10987 tree v4sf_ftype_v4sf_pv2si
10988 = build_function_type (V4SF_type_node,
10989 tree_cons (NULL_TREE, V4SF_type_node,
10990 tree_cons (NULL_TREE, pv2si_type_node,
10991 endlink)));
10992 tree void_ftype_pv2si_v4sf
10993 = build_function_type (void_type_node,
10994 tree_cons (NULL_TREE, pv2si_type_node,
10995 tree_cons (NULL_TREE, V4SF_type_node,
10996 endlink)));
10997 tree void_ftype_pfloat_v4sf
10998 = build_function_type (void_type_node,
10999 tree_cons (NULL_TREE, pfloat_type_node,
11000 tree_cons (NULL_TREE, V4SF_type_node,
11001 endlink)));
11002 tree void_ftype_pdi_di
11003 = build_function_type (void_type_node,
11004 tree_cons (NULL_TREE, pdi_type_node,
11005 tree_cons (NULL_TREE,
11006 long_long_unsigned_type_node,
11007 endlink)));
11008 /* Normal vector unops. */
11009 tree v4sf_ftype_v4sf
11010 = build_function_type (V4SF_type_node,
11011 tree_cons (NULL_TREE, V4SF_type_node,
11012 endlink));
11014 /* Normal vector binops. */
11015 tree v4sf_ftype_v4sf_v4sf
11016 = build_function_type (V4SF_type_node,
11017 tree_cons (NULL_TREE, V4SF_type_node,
11018 tree_cons (NULL_TREE, V4SF_type_node,
11019 endlink)));
11020 tree v8qi_ftype_v8qi_v8qi
11021 = build_function_type (V8QI_type_node,
11022 tree_cons (NULL_TREE, V8QI_type_node,
11023 tree_cons (NULL_TREE, V8QI_type_node,
11024 endlink)));
11025 tree v4hi_ftype_v4hi_v4hi
11026 = build_function_type (V4HI_type_node,
11027 tree_cons (NULL_TREE, V4HI_type_node,
11028 tree_cons (NULL_TREE, V4HI_type_node,
11029 endlink)));
11030 tree v2si_ftype_v2si_v2si
11031 = build_function_type (V2SI_type_node,
11032 tree_cons (NULL_TREE, V2SI_type_node,
11033 tree_cons (NULL_TREE, V2SI_type_node,
11034 endlink)));
11035 tree ti_ftype_ti_ti
11036 = build_function_type (intTI_type_node,
11037 tree_cons (NULL_TREE, intTI_type_node,
11038 tree_cons (NULL_TREE, intTI_type_node,
11039 endlink)));
11040 tree di_ftype_di_di
11041 = build_function_type (long_long_unsigned_type_node,
11042 tree_cons (NULL_TREE, long_long_unsigned_type_node,
11043 tree_cons (NULL_TREE,
11044 long_long_unsigned_type_node,
11045 endlink)));
11047 tree v2si_ftype_v2sf
11048 = build_function_type (V2SI_type_node,
11049 tree_cons (NULL_TREE, V2SF_type_node,
11050 endlink));
11051 tree v2sf_ftype_v2si
11052 = build_function_type (V2SF_type_node,
11053 tree_cons (NULL_TREE, V2SI_type_node,
11054 endlink));
11055 tree v2si_ftype_v2si
11056 = build_function_type (V2SI_type_node,
11057 tree_cons (NULL_TREE, V2SI_type_node,
11058 endlink));
11059 tree v2sf_ftype_v2sf
11060 = build_function_type (V2SF_type_node,
11061 tree_cons (NULL_TREE, V2SF_type_node,
11062 endlink));
11063 tree v2sf_ftype_v2sf_v2sf
11064 = build_function_type (V2SF_type_node,
11065 tree_cons (NULL_TREE, V2SF_type_node,
11066 tree_cons (NULL_TREE,
11067 V2SF_type_node,
11068 endlink)));
11069 tree v2si_ftype_v2sf_v2sf
11070 = build_function_type (V2SI_type_node,
11071 tree_cons (NULL_TREE, V2SF_type_node,
11072 tree_cons (NULL_TREE,
11073 V2SF_type_node,
11074 endlink)));
11076 tree void_ftype_pchar
11077 = build_function_type (void_type_node,
11078 tree_cons (NULL_TREE, pchar_type_node,
11079 endlink));
11081 /* Add all builtins that are more or less simple operations on two
11082 operands. */
11083 for (i = 0, d = bdesc_2arg; i < sizeof (bdesc_2arg) / sizeof *d; i++, d++)
11085 /* Use one of the operands; the target can have a different mode for
11086 mask-generating compares. */
11087 enum machine_mode mode;
11088 tree type;
11090 if (d->name == 0)
11091 continue;
11092 mode = insn_data[d->icode].operand[1].mode;
11094 switch (mode)
11096 case V4SFmode:
11097 type = v4sf_ftype_v4sf_v4sf;
11098 break;
11099 case V8QImode:
11100 type = v8qi_ftype_v8qi_v8qi;
11101 break;
11102 case V4HImode:
11103 type = v4hi_ftype_v4hi_v4hi;
11104 break;
11105 case V2SImode:
11106 type = v2si_ftype_v2si_v2si;
11107 break;
11108 case TImode:
11109 type = ti_ftype_ti_ti;
11110 break;
11111 case DImode:
11112 type = di_ftype_di_di;
11113 break;
11115 default:
11116 abort ();
11119 /* Override for comparisons. */
11120 if (d->icode == CODE_FOR_maskcmpv4sf3
11121 || d->icode == CODE_FOR_maskncmpv4sf3
11122 || d->icode == CODE_FOR_vmmaskcmpv4sf3
11123 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
11124 type = v4si_ftype_v4sf_v4sf;
11126 def_builtin (d->mask, d->name, type, d->code);
11129 /* Add the remaining MMX insns with somewhat more complicated types. */
11130 def_builtin (MASK_MMX, "__builtin_ia32_m_from_int", v2si_ftype_int, IX86_BUILTIN_M_FROM_INT);
11131 def_builtin (MASK_MMX, "__builtin_ia32_m_to_int", int_ftype_v2si, IX86_BUILTIN_M_TO_INT);
11132 def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
11133 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
11134 def_builtin (MASK_MMX, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
11135 def_builtin (MASK_MMX, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
11136 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
11137 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
11138 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
11140 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
11141 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
11142 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
11144 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
11145 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
11147 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
11148 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
11150 /* comi/ucomi insns. */
11151 for (i = 0, d = bdesc_comi; i < sizeof (bdesc_comi) / sizeof *d; i++, d++)
11152 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
11154 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
11155 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
11156 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
11158 def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
11159 def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
11160 def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
11161 def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
11162 def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
11163 def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
11165 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
11166 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
11168 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
11170 def_builtin (MASK_SSE, "__builtin_ia32_loadaps", v4sf_ftype_pfloat, IX86_BUILTIN_LOADAPS);
11171 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pfloat, IX86_BUILTIN_LOADUPS);
11172 def_builtin (MASK_SSE, "__builtin_ia32_loadss", v4sf_ftype_pfloat, IX86_BUILTIN_LOADSS);
11173 def_builtin (MASK_SSE, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
11174 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
11175 def_builtin (MASK_SSE, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
11177 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
11178 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
11179 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
11180 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
11182 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
11183 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
11184 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
11185 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
11187 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
11188 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_prefetch", void_ftype_pchar_int, IX86_BUILTIN_PREFETCH);
11190 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", v4hi_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
11192 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
11193 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
11194 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
11195 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
11196 def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
11197 def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
11199 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
11201 /* Original 3DNow! */
11202 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
11203 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
11204 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
11205 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
11206 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
11207 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
11208 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
11209 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
11210 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
11211 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
11212 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
11213 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
11214 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
11215 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
11216 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
11217 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
11218 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
11219 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
11220 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
11221 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
11222 def_builtin (MASK_3DNOW, "__builtin_ia32_prefetch_3dnow", void_ftype_pchar, IX86_BUILTIN_PREFETCH_3DNOW);
11223 def_builtin (MASK_3DNOW, "__builtin_ia32_prefetchw", void_ftype_pchar, IX86_BUILTIN_PREFETCHW);
11225 /* 3DNow! extension as used in the Athlon CPU. */
11226 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
11227 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
11228 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
11229 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
11230 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
11231 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
11233 /* Composite intrinsics. */
11234 def_builtin (MASK_SSE, "__builtin_ia32_setps1", v4sf_ftype_float, IX86_BUILTIN_SETPS1);
11235 def_builtin (MASK_SSE, "__builtin_ia32_setps", v4sf_ftype_float_float_float_float, IX86_BUILTIN_SETPS);
11236 def_builtin (MASK_SSE, "__builtin_ia32_setzerops", ti_ftype_void, IX86_BUILTIN_CLRPS);
11237 def_builtin (MASK_SSE, "__builtin_ia32_loadps1", v4sf_ftype_pfloat, IX86_BUILTIN_LOADPS1);
11238 def_builtin (MASK_SSE, "__builtin_ia32_loadrps", v4sf_ftype_pfloat, IX86_BUILTIN_LOADRPS);
11239 def_builtin (MASK_SSE, "__builtin_ia32_storeps1", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREPS1);
11240 def_builtin (MASK_SSE, "__builtin_ia32_storerps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORERPS);
11243 /* Errors in the source file can cause expand_expr to return const0_rtx
11244 where we expect a vector. To avoid crashing, use one of the vector
11245 clear instructions. */
11246 static rtx
11247 safe_vector_operand (x, mode)
11248 rtx x;
11249 enum machine_mode mode;
11251 if (x != const0_rtx)
11252 return x;
11253 x = gen_reg_rtx (mode);
11255 if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
11256 emit_insn (gen_mmx_clrdi (mode == DImode ? x
11257 : gen_rtx_SUBREG (DImode, x, 0)));
11258 else
11259 emit_insn (gen_sse_clrti (mode == TImode ? x
11260 : gen_rtx_SUBREG (TImode, x, 0)));
11261 return x;
11264 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
11266 static rtx
11267 ix86_expand_binop_builtin (icode, arglist, target)
11268 enum insn_code icode;
11269 tree arglist;
11270 rtx target;
11272 rtx pat;
11273 tree arg0 = TREE_VALUE (arglist);
11274 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11275 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11276 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11277 enum machine_mode tmode = insn_data[icode].operand[0].mode;
11278 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
11279 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
11281 if (VECTOR_MODE_P (mode0))
11282 op0 = safe_vector_operand (op0, mode0);
11283 if (VECTOR_MODE_P (mode1))
11284 op1 = safe_vector_operand (op1, mode1);
11286 if (! target
11287 || GET_MODE (target) != tmode
11288 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11289 target = gen_reg_rtx (tmode);
11291 /* In case the insn wants input operands in modes different from
11292 the result, abort. */
11293 if (GET_MODE (op0) != mode0 || GET_MODE (op1) != mode1)
11294 abort ();
11296 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11297 op0 = copy_to_mode_reg (mode0, op0);
11298 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
11299 op1 = copy_to_mode_reg (mode1, op1);
11301 pat = GEN_FCN (icode) (target, op0, op1);
11302 if (! pat)
11303 return 0;
11304 emit_insn (pat);
11305 return target;
11308 /* Subroutine of ix86_expand_builtin to take care of stores. */
11310 static rtx
11311 ix86_expand_store_builtin (icode, arglist, shuffle)
11312 enum insn_code icode;
11313 tree arglist;
11314 int shuffle;
11316 rtx pat;
11317 tree arg0 = TREE_VALUE (arglist);
11318 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11319 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11320 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11321 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
11322 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
11324 if (VECTOR_MODE_P (mode1))
11325 op1 = safe_vector_operand (op1, mode1);
11327 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
11328 if (shuffle >= 0 || ! (*insn_data[icode].operand[1].predicate) (op1, mode1))
11329 op1 = copy_to_mode_reg (mode1, op1);
11330 if (shuffle >= 0)
11331 emit_insn (gen_sse_shufps (op1, op1, op1, GEN_INT (shuffle)));
11332 pat = GEN_FCN (icode) (op0, op1);
11333 if (pat)
11334 emit_insn (pat);
11335 return 0;
11338 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
11340 static rtx
11341 ix86_expand_unop_builtin (icode, arglist, target, do_load)
11342 enum insn_code icode;
11343 tree arglist;
11344 rtx target;
11345 int do_load;
11347 rtx pat;
11348 tree arg0 = TREE_VALUE (arglist);
11349 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11350 enum machine_mode tmode = insn_data[icode].operand[0].mode;
11351 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
11353 if (! target
11354 || GET_MODE (target) != tmode
11355 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11356 target = gen_reg_rtx (tmode);
11357 if (do_load)
11358 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
11359 else
11361 if (VECTOR_MODE_P (mode0))
11362 op0 = safe_vector_operand (op0, mode0);
11364 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11365 op0 = copy_to_mode_reg (mode0, op0);
11368 pat = GEN_FCN (icode) (target, op0);
11369 if (! pat)
11370 return 0;
11371 emit_insn (pat);
11372 return target;
11375 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
11376 sqrtss, rsqrtss, rcpss. */
11378 static rtx
11379 ix86_expand_unop1_builtin (icode, arglist, target)
11380 enum insn_code icode;
11381 tree arglist;
11382 rtx target;
11384 rtx pat;
11385 tree arg0 = TREE_VALUE (arglist);
11386 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11387 enum machine_mode tmode = insn_data[icode].operand[0].mode;
11388 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
11390 if (! target
11391 || GET_MODE (target) != tmode
11392 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11393 target = gen_reg_rtx (tmode);
11395 if (VECTOR_MODE_P (mode0))
11396 op0 = safe_vector_operand (op0, mode0);
11398 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11399 op0 = copy_to_mode_reg (mode0, op0);
11401 pat = GEN_FCN (icode) (target, op0, op0);
11402 if (! pat)
11403 return 0;
11404 emit_insn (pat);
11405 return target;
11408 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
11410 static rtx
11411 ix86_expand_sse_compare (d, arglist, target)
11412 struct builtin_description *d;
11413 tree arglist;
11414 rtx target;
11416 rtx pat;
11417 tree arg0 = TREE_VALUE (arglist);
11418 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11419 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11420 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11421 rtx op2;
11422 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
11423 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
11424 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
11425 enum rtx_code comparison = d->comparison;
11427 if (VECTOR_MODE_P (mode0))
11428 op0 = safe_vector_operand (op0, mode0);
11429 if (VECTOR_MODE_P (mode1))
11430 op1 = safe_vector_operand (op1, mode1);
11432 /* Swap operands if we have a comparison that isn't available in
11433 hardware. */
11434 if (d->flag)
11436 rtx tmp = gen_reg_rtx (mode1);
11437 emit_move_insn (tmp, op1);
11438 op1 = op0;
11439 op0 = tmp;
11442 if (! target
11443 || GET_MODE (target) != tmode
11444 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
11445 target = gen_reg_rtx (tmode);
11447 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
11448 op0 = copy_to_mode_reg (mode0, op0);
11449 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
11450 op1 = copy_to_mode_reg (mode1, op1);
11452 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
11453 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
11454 if (! pat)
11455 return 0;
11456 emit_insn (pat);
11457 return target;
11460 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
11462 static rtx
11463 ix86_expand_sse_comi (d, arglist, target)
11464 struct builtin_description *d;
11465 tree arglist;
11466 rtx target;
11468 rtx pat;
11469 tree arg0 = TREE_VALUE (arglist);
11470 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11471 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11472 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11473 rtx op2;
11474 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
11475 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
11476 enum rtx_code comparison = d->comparison;
11478 if (VECTOR_MODE_P (mode0))
11479 op0 = safe_vector_operand (op0, mode0);
11480 if (VECTOR_MODE_P (mode1))
11481 op1 = safe_vector_operand (op1, mode1);
11483 /* Swap operands if we have a comparison that isn't available in
11484 hardware. */
11485 if (d->flag)
11487 rtx tmp = op1;
11488 op1 = op0;
11489 op0 = tmp;
11492 target = gen_reg_rtx (SImode);
11493 emit_move_insn (target, const0_rtx);
11494 target = gen_rtx_SUBREG (QImode, target, 0);
11496 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
11497 op0 = copy_to_mode_reg (mode0, op0);
11498 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
11499 op1 = copy_to_mode_reg (mode1, op1);
11501 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
11502 pat = GEN_FCN (d->icode) (op0, op1, op2);
11503 if (! pat)
11504 return 0;
11505 emit_insn (pat);
11506 emit_insn (gen_setcc_2 (target, op2));
11508 return target;
11511 /* Expand an expression EXP that calls a built-in function,
11512 with result going to TARGET if that's convenient
11513 (and in mode MODE if that's convenient).
11514 SUBTARGET may be used as the target for computing one of EXP's operands.
11515 IGNORE is nonzero if the value is to be ignored. */
11518 ix86_expand_builtin (exp, target, subtarget, mode, ignore)
11519 tree exp;
11520 rtx target;
11521 rtx subtarget ATTRIBUTE_UNUSED;
11522 enum machine_mode mode ATTRIBUTE_UNUSED;
11523 int ignore ATTRIBUTE_UNUSED;
11525 struct builtin_description *d;
11526 size_t i;
11527 enum insn_code icode;
11528 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
11529 tree arglist = TREE_OPERAND (exp, 1);
11530 tree arg0, arg1, arg2, arg3;
11531 rtx op0, op1, op2, pat;
11532 enum machine_mode tmode, mode0, mode1, mode2;
11533 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
11535 switch (fcode)
11537 case IX86_BUILTIN_EMMS:
11538 emit_insn (gen_emms ());
11539 return 0;
11541 case IX86_BUILTIN_SFENCE:
11542 emit_insn (gen_sfence ());
11543 return 0;
11545 case IX86_BUILTIN_M_FROM_INT:
11546 target = gen_reg_rtx (DImode);
11547 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
11548 emit_move_insn (gen_rtx_SUBREG (SImode, target, 0), op0);
11549 return target;
11551 case IX86_BUILTIN_M_TO_INT:
11552 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
11553 op0 = copy_to_mode_reg (DImode, op0);
11554 target = gen_reg_rtx (SImode);
11555 emit_move_insn (target, gen_rtx_SUBREG (SImode, op0, 0));
11556 return target;
11558 case IX86_BUILTIN_PEXTRW:
11559 icode = CODE_FOR_mmx_pextrw;
11560 arg0 = TREE_VALUE (arglist);
11561 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11562 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11563 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11564 tmode = insn_data[icode].operand[0].mode;
11565 mode0 = insn_data[icode].operand[1].mode;
11566 mode1 = insn_data[icode].operand[2].mode;
11568 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11569 op0 = copy_to_mode_reg (mode0, op0);
11570 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
11572 /* @@@ better error message */
11573 error ("selector must be an immediate");
11574 return const0_rtx;
11576 if (target == 0
11577 || GET_MODE (target) != tmode
11578 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11579 target = gen_reg_rtx (tmode);
11580 pat = GEN_FCN (icode) (target, op0, op1);
11581 if (! pat)
11582 return 0;
11583 emit_insn (pat);
11584 return target;
11586 case IX86_BUILTIN_PINSRW:
11587 icode = CODE_FOR_mmx_pinsrw;
11588 arg0 = TREE_VALUE (arglist);
11589 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11590 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
11591 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11592 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11593 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
11594 tmode = insn_data[icode].operand[0].mode;
11595 mode0 = insn_data[icode].operand[1].mode;
11596 mode1 = insn_data[icode].operand[2].mode;
11597 mode2 = insn_data[icode].operand[3].mode;
11599 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11600 op0 = copy_to_mode_reg (mode0, op0);
11601 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
11602 op1 = copy_to_mode_reg (mode1, op1);
11603 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
11605 /* @@@ better error message */
11606 error ("selector must be an immediate");
11607 return const0_rtx;
11609 if (target == 0
11610 || GET_MODE (target) != tmode
11611 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11612 target = gen_reg_rtx (tmode);
11613 pat = GEN_FCN (icode) (target, op0, op1, op2);
11614 if (! pat)
11615 return 0;
11616 emit_insn (pat);
11617 return target;
11619 case IX86_BUILTIN_MASKMOVQ:
11620 icode = CODE_FOR_mmx_maskmovq;
11621 /* Note the arg order is different from the operand order. */
11622 arg1 = TREE_VALUE (arglist);
11623 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
11624 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
11625 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11626 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11627 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
11628 mode0 = insn_data[icode].operand[0].mode;
11629 mode1 = insn_data[icode].operand[1].mode;
11630 mode2 = insn_data[icode].operand[2].mode;
11632 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11633 op0 = copy_to_mode_reg (mode0, op0);
11634 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
11635 op1 = copy_to_mode_reg (mode1, op1);
11636 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
11637 op2 = copy_to_mode_reg (mode2, op2);
11638 pat = GEN_FCN (icode) (op0, op1, op2);
11639 if (! pat)
11640 return 0;
11641 emit_insn (pat);
11642 return 0;
11644 case IX86_BUILTIN_SQRTSS:
11645 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
11646 case IX86_BUILTIN_RSQRTSS:
11647 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
11648 case IX86_BUILTIN_RCPSS:
11649 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
11651 case IX86_BUILTIN_LOADAPS:
11652 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
11654 case IX86_BUILTIN_LOADUPS:
11655 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
11657 case IX86_BUILTIN_STOREAPS:
11658 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist, -1);
11659 case IX86_BUILTIN_STOREUPS:
11660 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist, -1);
11662 case IX86_BUILTIN_LOADSS:
11663 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
11665 case IX86_BUILTIN_STORESS:
11666 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist, -1);
11668 case IX86_BUILTIN_LOADHPS:
11669 case IX86_BUILTIN_LOADLPS:
11670 icode = (fcode == IX86_BUILTIN_LOADHPS
11671 ? CODE_FOR_sse_movhps : CODE_FOR_sse_movlps);
11672 arg0 = TREE_VALUE (arglist);
11673 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11674 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11675 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11676 tmode = insn_data[icode].operand[0].mode;
11677 mode0 = insn_data[icode].operand[1].mode;
11678 mode1 = insn_data[icode].operand[2].mode;
11680 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11681 op0 = copy_to_mode_reg (mode0, op0);
11682 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
11683 if (target == 0
11684 || GET_MODE (target) != tmode
11685 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11686 target = gen_reg_rtx (tmode);
11687 pat = GEN_FCN (icode) (target, op0, op1);
11688 if (! pat)
11689 return 0;
11690 emit_insn (pat);
11691 return target;
11693 case IX86_BUILTIN_STOREHPS:
11694 case IX86_BUILTIN_STORELPS:
11695 icode = (fcode == IX86_BUILTIN_STOREHPS
11696 ? CODE_FOR_sse_movhps : CODE_FOR_sse_movlps);
11697 arg0 = TREE_VALUE (arglist);
11698 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11699 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11700 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11701 mode0 = insn_data[icode].operand[1].mode;
11702 mode1 = insn_data[icode].operand[2].mode;
11704 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
11705 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
11706 op1 = copy_to_mode_reg (mode1, op1);
11708 pat = GEN_FCN (icode) (op0, op0, op1);
11709 if (! pat)
11710 return 0;
11711 emit_insn (pat);
11712 return 0;
11714 case IX86_BUILTIN_MOVNTPS:
11715 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist, -1);
11716 case IX86_BUILTIN_MOVNTQ:
11717 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist, -1);
11719 case IX86_BUILTIN_LDMXCSR:
11720 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
11721 target = assign_386_stack_local (SImode, 0);
11722 emit_move_insn (target, op0);
11723 emit_insn (gen_ldmxcsr (target));
11724 return 0;
11726 case IX86_BUILTIN_STMXCSR:
11727 target = assign_386_stack_local (SImode, 0);
11728 emit_insn (gen_stmxcsr (target));
11729 return copy_to_mode_reg (SImode, target);
11731 case IX86_BUILTIN_PREFETCH:
11732 icode = CODE_FOR_prefetch;
11733 arg0 = TREE_VALUE (arglist);
11734 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11735 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11736 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11737 mode0 = insn_data[icode].operand[0].mode;
11738 mode1 = insn_data[icode].operand[1].mode;
11740 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
11742 /* @@@ better error message */
11743 error ("selector must be an immediate");
11744 return const0_rtx;
11747 op0 = copy_to_mode_reg (Pmode, op0);
11748 pat = GEN_FCN (icode) (op0, op1);
11749 if (! pat)
11750 return 0;
11751 emit_insn (pat);
11752 return target;
11754 case IX86_BUILTIN_SHUFPS:
11755 icode = CODE_FOR_sse_shufps;
11756 arg0 = TREE_VALUE (arglist);
11757 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11758 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
11759 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11760 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11761 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
11762 tmode = insn_data[icode].operand[0].mode;
11763 mode0 = insn_data[icode].operand[1].mode;
11764 mode1 = insn_data[icode].operand[2].mode;
11765 mode2 = insn_data[icode].operand[3].mode;
11767 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11768 op0 = copy_to_mode_reg (mode0, op0);
11769 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
11770 op1 = copy_to_mode_reg (mode1, op1);
11771 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
11773 /* @@@ better error message */
11774 error ("mask must be an immediate");
11775 return const0_rtx;
11777 if (target == 0
11778 || GET_MODE (target) != tmode
11779 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11780 target = gen_reg_rtx (tmode);
11781 pat = GEN_FCN (icode) (target, op0, op1, op2);
11782 if (! pat)
11783 return 0;
11784 emit_insn (pat);
11785 return target;
11787 case IX86_BUILTIN_PSHUFW:
11788 icode = CODE_FOR_mmx_pshufw;
11789 arg0 = TREE_VALUE (arglist);
11790 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11791 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11792 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11793 tmode = insn_data[icode].operand[0].mode;
11794 mode0 = insn_data[icode].operand[2].mode;
11795 mode1 = insn_data[icode].operand[3].mode;
11797 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11798 op0 = copy_to_mode_reg (mode0, op0);
11799 if (! (*insn_data[icode].operand[3].predicate) (op1, mode1))
11801 /* @@@ better error message */
11802 error ("mask must be an immediate");
11803 return const0_rtx;
11805 if (target == 0
11806 || GET_MODE (target) != tmode
11807 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11808 target = gen_reg_rtx (tmode);
11809 pat = GEN_FCN (icode) (target, target, op0, op1);
11810 if (! pat)
11811 return 0;
11812 emit_insn (pat);
11813 return target;
11815 case IX86_BUILTIN_FEMMS:
11816 emit_insn (gen_femms ());
11817 return NULL_RTX;
11819 case IX86_BUILTIN_PAVGUSB:
11820 return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target);
11822 case IX86_BUILTIN_PF2ID:
11823 return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0);
11825 case IX86_BUILTIN_PFACC:
11826 return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target);
11828 case IX86_BUILTIN_PFADD:
11829 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target);
11831 case IX86_BUILTIN_PFCMPEQ:
11832 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target);
11834 case IX86_BUILTIN_PFCMPGE:
11835 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target);
11837 case IX86_BUILTIN_PFCMPGT:
11838 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target);
11840 case IX86_BUILTIN_PFMAX:
11841 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target);
11843 case IX86_BUILTIN_PFMIN:
11844 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target);
11846 case IX86_BUILTIN_PFMUL:
11847 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target);
11849 case IX86_BUILTIN_PFRCP:
11850 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0);
11852 case IX86_BUILTIN_PFRCPIT1:
11853 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target);
11855 case IX86_BUILTIN_PFRCPIT2:
11856 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target);
11858 case IX86_BUILTIN_PFRSQIT1:
11859 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target);
11861 case IX86_BUILTIN_PFRSQRT:
11862 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0);
11864 case IX86_BUILTIN_PFSUB:
11865 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target);
11867 case IX86_BUILTIN_PFSUBR:
11868 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target);
11870 case IX86_BUILTIN_PI2FD:
11871 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0);
11873 case IX86_BUILTIN_PMULHRW:
11874 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
11876 case IX86_BUILTIN_PREFETCH_3DNOW:
11877 icode = CODE_FOR_prefetch_3dnow;
11878 arg0 = TREE_VALUE (arglist);
11879 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11880 mode0 = insn_data[icode].operand[0].mode;
11881 pat = GEN_FCN (icode) (copy_to_mode_reg (Pmode, op0));
11882 if (! pat)
11883 return NULL_RTX;
11884 emit_insn (pat);
11885 return NULL_RTX;
11887 case IX86_BUILTIN_PREFETCHW:
11888 icode = CODE_FOR_prefetchw;
11889 arg0 = TREE_VALUE (arglist);
11890 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11891 mode0 = insn_data[icode].operand[0].mode;
11892 pat = GEN_FCN (icode) (copy_to_mode_reg (Pmode, op0));
11893 if (! pat)
11894 return NULL_RTX;
11895 emit_insn (pat);
11896 return NULL_RTX;
11898 case IX86_BUILTIN_PF2IW:
11899 return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
11901 case IX86_BUILTIN_PFNACC:
11902 return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target);
11904 case IX86_BUILTIN_PFPNACC:
11905 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target);
11907 case IX86_BUILTIN_PI2FW:
11908 return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0);
11910 case IX86_BUILTIN_PSWAPDSI:
11911 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0);
11913 case IX86_BUILTIN_PSWAPDSF:
11914 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
11916 /* Composite intrinsics. */
11917 case IX86_BUILTIN_SETPS1:
11918 target = assign_386_stack_local (SFmode, 0);
11919 arg0 = TREE_VALUE (arglist);
11920 emit_move_insn (adjust_address (target, SFmode, 0),
11921 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
11922 op0 = gen_reg_rtx (V4SFmode);
11923 emit_insn (gen_sse_loadss (op0, adjust_address (target, V4SFmode, 0)));
11924 emit_insn (gen_sse_shufps (op0, op0, op0, GEN_INT (0)));
11925 return op0;
11927 case IX86_BUILTIN_SETPS:
11928 target = assign_386_stack_local (V4SFmode, 0);
11929 arg0 = TREE_VALUE (arglist);
11930 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11931 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
11932 arg3 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
11933 emit_move_insn (adjust_address (target, SFmode, 0),
11934 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
11935 emit_move_insn (adjust_address (target, SFmode, 4),
11936 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
11937 emit_move_insn (adjust_address (target, SFmode, 8),
11938 expand_expr (arg2, NULL_RTX, VOIDmode, 0));
11939 emit_move_insn (adjust_address (target, SFmode, 12),
11940 expand_expr (arg3, NULL_RTX, VOIDmode, 0));
11941 op0 = gen_reg_rtx (V4SFmode);
11942 emit_insn (gen_sse_movaps (op0, target));
11943 return op0;
11945 case IX86_BUILTIN_CLRPS:
11946 target = gen_reg_rtx (TImode);
11947 emit_insn (gen_sse_clrti (target));
11948 return target;
11950 case IX86_BUILTIN_LOADRPS:
11951 target = ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist,
11952 gen_reg_rtx (V4SFmode), 1);
11953 emit_insn (gen_sse_shufps (target, target, target, GEN_INT (0x1b)));
11954 return target;
11956 case IX86_BUILTIN_LOADPS1:
11957 target = ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist,
11958 gen_reg_rtx (V4SFmode), 1);
11959 emit_insn (gen_sse_shufps (target, target, target, const0_rtx));
11960 return target;
11962 case IX86_BUILTIN_STOREPS1:
11963 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist, 0);
11964 case IX86_BUILTIN_STORERPS:
11965 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist, 0x1B);
11967 case IX86_BUILTIN_MMX_ZERO:
11968 target = gen_reg_rtx (DImode);
11969 emit_insn (gen_mmx_clrdi (target));
11970 return target;
11972 default:
11973 break;
11976 for (i = 0, d = bdesc_2arg; i < sizeof (bdesc_2arg) / sizeof *d; i++, d++)
11977 if (d->code == fcode)
11979 /* Compares are treated specially. */
11980 if (d->icode == CODE_FOR_maskcmpv4sf3
11981 || d->icode == CODE_FOR_vmmaskcmpv4sf3
11982 || d->icode == CODE_FOR_maskncmpv4sf3
11983 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
11984 return ix86_expand_sse_compare (d, arglist, target);
11986 return ix86_expand_binop_builtin (d->icode, arglist, target);
11989 for (i = 0, d = bdesc_1arg; i < sizeof (bdesc_1arg) / sizeof *d; i++, d++)
11990 if (d->code == fcode)
11991 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
11993 for (i = 0, d = bdesc_comi; i < sizeof (bdesc_comi) / sizeof *d; i++, d++)
11994 if (d->code == fcode)
11995 return ix86_expand_sse_comi (d, arglist, target);
11997 /* @@@ Should really do something sensible here. */
11998 return 0;
12001 /* Store OPERAND to the memory after reload is completed. This means
12002 that we can't easilly use assign_stack_local. */
12004 ix86_force_to_memory (mode, operand)
12005 enum machine_mode mode;
12006 rtx operand;
12008 rtx result;
12009 if (!reload_completed)
12010 abort ();
12011 if (TARGET_64BIT && TARGET_RED_ZONE)
12013 result = gen_rtx_MEM (mode,
12014 gen_rtx_PLUS (Pmode,
12015 stack_pointer_rtx,
12016 GEN_INT (-RED_ZONE_SIZE)));
12017 emit_move_insn (result, operand);
12019 else if (TARGET_64BIT && !TARGET_RED_ZONE)
12021 switch (mode)
12023 case HImode:
12024 case SImode:
12025 operand = gen_lowpart (DImode, operand);
12026 /* FALLTHRU */
12027 case DImode:
12028 emit_insn (
12029 gen_rtx_SET (VOIDmode,
12030 gen_rtx_MEM (DImode,
12031 gen_rtx_PRE_DEC (DImode,
12032 stack_pointer_rtx)),
12033 operand));
12034 break;
12035 default:
12036 abort ();
12038 result = gen_rtx_MEM (mode, stack_pointer_rtx);
12040 else
12042 switch (mode)
12044 case DImode:
12046 rtx operands[2];
12047 split_di (&operand, 1, operands, operands + 1);
12048 emit_insn (
12049 gen_rtx_SET (VOIDmode,
12050 gen_rtx_MEM (SImode,
12051 gen_rtx_PRE_DEC (Pmode,
12052 stack_pointer_rtx)),
12053 operands[1]));
12054 emit_insn (
12055 gen_rtx_SET (VOIDmode,
12056 gen_rtx_MEM (SImode,
12057 gen_rtx_PRE_DEC (Pmode,
12058 stack_pointer_rtx)),
12059 operands[0]));
12061 break;
12062 case HImode:
12063 /* It is better to store HImodes as SImodes. */
12064 if (!TARGET_PARTIAL_REG_STALL)
12065 operand = gen_lowpart (SImode, operand);
12066 /* FALLTHRU */
12067 case SImode:
12068 emit_insn (
12069 gen_rtx_SET (VOIDmode,
12070 gen_rtx_MEM (GET_MODE (operand),
12071 gen_rtx_PRE_DEC (SImode,
12072 stack_pointer_rtx)),
12073 operand));
12074 break;
12075 default:
12076 abort ();
12078 result = gen_rtx_MEM (mode, stack_pointer_rtx);
12080 return result;
12083 /* Free operand from the memory. */
12084 void
12085 ix86_free_from_memory (mode)
12086 enum machine_mode mode;
12088 if (!TARGET_64BIT || !TARGET_RED_ZONE)
12090 int size;
12092 if (mode == DImode || TARGET_64BIT)
12093 size = 8;
12094 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
12095 size = 2;
12096 else
12097 size = 4;
12098 /* Use LEA to deallocate stack space. In peephole2 it will be converted
12099 to pop or add instruction if registers are available. */
12100 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
12101 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12102 GEN_INT (size))));
12106 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
12107 QImode must go into class Q_REGS.
12108 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
12109 movdf to do mem-to-mem moves through integer regs. */
12110 enum reg_class
12111 ix86_preferred_reload_class (x, class)
12112 rtx x;
12113 enum reg_class class;
12115 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
12117 /* SSE can't load any constant directly yet. */
12118 if (SSE_CLASS_P (class))
12119 return NO_REGS;
12120 /* Floats can load 0 and 1. */
12121 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
12123 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
12124 if (MAYBE_SSE_CLASS_P (class))
12125 return (reg_class_subset_p (class, GENERAL_REGS)
12126 ? GENERAL_REGS : FLOAT_REGS);
12127 else
12128 return class;
12130 /* General regs can load everything. */
12131 if (reg_class_subset_p (class, GENERAL_REGS))
12132 return GENERAL_REGS;
12133 /* In case we haven't resolved FLOAT or SSE yet, give up. */
12134 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
12135 return NO_REGS;
12137 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
12138 return NO_REGS;
12139 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
12140 return Q_REGS;
12141 return class;
12144 /* If we are copying between general and FP registers, we need a memory
12145 location. The same is true for SSE and MMX registers.
12147 The macro can't work reliably when one of the CLASSES is class containing
12148 registers from multiple units (SSE, MMX, integer). We avoid this by never
12149 combining those units in single alternative in the machine description.
12150 Ensure that this constraint holds to avoid unexpected surprises.
12152 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
12153 enforce these sanity checks. */
12155 ix86_secondary_memory_needed (class1, class2, mode, strict)
12156 enum reg_class class1, class2;
12157 enum machine_mode mode;
12158 int strict;
12160 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
12161 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
12162 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
12163 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
12164 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
12165 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
12167 if (strict)
12168 abort ();
12169 else
12170 return 1;
12172 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
12173 || (SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
12174 && (mode) != SImode)
12175 || (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
12176 && (mode) != SImode));
12178 /* Return the cost of moving data from a register in class CLASS1 to
12179 one in class CLASS2.
12181 It is not required that the cost always equal 2 when FROM is the same as TO;
12182 on some machines it is expensive to move between registers if they are not
12183 general registers. */
12185 ix86_register_move_cost (mode, class1, class2)
12186 enum machine_mode mode;
12187 enum reg_class class1, class2;
12189 /* In case we require secondary memory, compute cost of the store followed
12190 by load. In case of copying from general_purpose_register we may emit
12191 multiple stores followed by single load causing memory size mismatch
12192 stall. Count this as arbitarily high cost of 20. */
12193 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
12195 int add_cost = 0;
12196 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
12197 add_cost = 20;
12198 return (MEMORY_MOVE_COST (mode, class1, 0)
12199 + MEMORY_MOVE_COST (mode, class2, 1) + add_cost);
12201 /* Moves between SSE/MMX and integer unit are expensive. */
12202 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
12203 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
12204 return ix86_cost->mmxsse_to_integer;
12205 if (MAYBE_FLOAT_CLASS_P (class1))
12206 return ix86_cost->fp_move;
12207 if (MAYBE_SSE_CLASS_P (class1))
12208 return ix86_cost->sse_move;
12209 if (MAYBE_MMX_CLASS_P (class1))
12210 return ix86_cost->mmx_move;
12211 return 2;
12214 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
12216 ix86_hard_regno_mode_ok (regno, mode)
12217 int regno;
12218 enum machine_mode mode;
12220 /* Flags and only flags can only hold CCmode values. */
12221 if (CC_REGNO_P (regno))
12222 return GET_MODE_CLASS (mode) == MODE_CC;
12223 if (GET_MODE_CLASS (mode) == MODE_CC
12224 || GET_MODE_CLASS (mode) == MODE_RANDOM
12225 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
12226 return 0;
12227 if (FP_REGNO_P (regno))
12228 return VALID_FP_MODE_P (mode);
12229 if (SSE_REGNO_P (regno))
12230 return VALID_SSE_REG_MODE (mode);
12231 if (MMX_REGNO_P (regno))
12232 return VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode);
12233 /* We handle both integer and floats in the general purpose registers.
12234 In future we should be able to handle vector modes as well. */
12235 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
12236 return 0;
12237 /* Take care for QImode values - they can be in non-QI regs, but then
12238 they do cause partial register stalls. */
12239 if (regno < 4 || mode != QImode || TARGET_64BIT)
12240 return 1;
12241 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
12244 /* Return the cost of moving data of mode M between a
12245 register and memory. A value of 2 is the default; this cost is
12246 relative to those in `REGISTER_MOVE_COST'.
12248 If moving between registers and memory is more expensive than
12249 between two registers, you should define this macro to express the
12250 relative cost.
12252 Model also increased moving costs of QImode registers in non
12253 Q_REGS classes.
12256 ix86_memory_move_cost (mode, class, in)
12257 enum machine_mode mode;
12258 enum reg_class class;
12259 int in;
12261 if (FLOAT_CLASS_P (class))
12263 int index;
12264 switch (mode)
12266 case SFmode:
12267 index = 0;
12268 break;
12269 case DFmode:
12270 index = 1;
12271 break;
12272 case XFmode:
12273 case TFmode:
12274 index = 2;
12275 break;
12276 default:
12277 return 100;
12279 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
12281 if (SSE_CLASS_P (class))
12283 int index;
12284 switch (GET_MODE_SIZE (mode))
12286 case 4:
12287 index = 0;
12288 break;
12289 case 8:
12290 index = 1;
12291 break;
12292 case 16:
12293 index = 2;
12294 break;
12295 default:
12296 return 100;
12298 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
12300 if (MMX_CLASS_P (class))
12302 int index;
12303 switch (GET_MODE_SIZE (mode))
12305 case 4:
12306 index = 0;
12307 break;
12308 case 8:
12309 index = 1;
12310 break;
12311 default:
12312 return 100;
12314 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
12316 switch (GET_MODE_SIZE (mode))
12318 case 1:
12319 if (in)
12320 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
12321 : ix86_cost->movzbl_load);
12322 else
12323 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
12324 : ix86_cost->int_store[0] + 4);
12325 break;
12326 case 2:
12327 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
12328 default:
12329 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
12330 if (mode == TFmode)
12331 mode = XFmode;
12332 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
12333 * (int) GET_MODE_SIZE (mode) / 4);
12337 #ifdef DO_GLOBAL_CTORS_BODY
12338 static void
12339 ix86_svr3_asm_out_constructor (symbol, priority)
12340 rtx symbol;
12341 int priority ATTRIBUTE_UNUSED;
12343 init_section ();
12344 fputs ("\tpushl $", asm_out_file);
12345 assemble_name (asm_out_file, XSTR (symbol, 0));
12346 fputc ('\n', asm_out_file);
12348 #endif
12350 #if defined(TARGET_ELF) && defined(TARGET_COFF)
12351 static void
12352 sco_asm_named_section (name, flags)
12353 const char *name;
12354 unsigned int flags;
12356 if (TARGET_ELF)
12357 default_elf_asm_named_section (name, flags);
12358 else
12359 default_coff_asm_named_section (name, flags);
12362 static void
12363 sco_asm_out_constructor (symbol, priority)
12364 rtx symbol;
12365 int priority;
12367 if (TARGET_ELF)
12368 default_named_section_asm_out_constrctor (symbol, priority);
12369 else
12370 ix86_svr3_asm_out_constructor (symbol, priority);
12372 #endif