* builtins.c (expand_builtin_setjmp_receiver): Const-ify.
[official-gcc.git] / gcc / config / i386 / i386.c
blob933459a2c8f6697004786f389d00c91ccb098909
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001
3 Free Software Foundation, Inc.
5 This file is part of GNU CC.
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
22 #include "config.h"
23 #include "system.h"
24 #include "rtl.h"
25 #include "tree.h"
26 #include "tm_p.h"
27 #include "regs.h"
28 #include "hard-reg-set.h"
29 #include "real.h"
30 #include "insn-config.h"
31 #include "conditions.h"
32 #include "output.h"
33 #include "insn-attr.h"
34 #include "flags.h"
35 #include "except.h"
36 #include "function.h"
37 #include "recog.h"
38 #include "expr.h"
39 #include "optabs.h"
40 #include "toplev.h"
41 #include "basic-block.h"
42 #include "ggc.h"
43 #include "target.h"
44 #include "target-def.h"
46 #ifndef CHECK_STACK_LIMIT
47 #define CHECK_STACK_LIMIT -1
48 #endif
50 /* Processor costs (relative to an add) */
51 static const
52 struct processor_costs size_cost = { /* costs for tunning for size */
53 2, /* cost of an add instruction */
54 3, /* cost of a lea instruction */
55 2, /* variable shift costs */
56 3, /* constant shift costs */
57 3, /* cost of starting a multiply */
58 0, /* cost of multiply per each bit set */
59 3, /* cost of a divide/mod */
60 0, /* "large" insn */
61 2, /* MOVE_RATIO */
62 2, /* cost for loading QImode using movzbl */
63 {2, 2, 2}, /* cost of loading integer registers
64 in QImode, HImode and SImode.
65 Relative to reg-reg move (2). */
66 {2, 2, 2}, /* cost of storing integer registers */
67 2, /* cost of reg,reg fld/fst */
68 {2, 2, 2}, /* cost of loading fp registers
69 in SFmode, DFmode and XFmode */
70 {2, 2, 2}, /* cost of loading integer registers */
71 3, /* cost of moving MMX register */
72 {3, 3}, /* cost of loading MMX registers
73 in SImode and DImode */
74 {3, 3}, /* cost of storing MMX registers
75 in SImode and DImode */
76 3, /* cost of moving SSE register */
77 {3, 3, 3}, /* cost of loading SSE registers
78 in SImode, DImode and TImode */
79 {3, 3, 3}, /* cost of storing SSE registers
80 in SImode, DImode and TImode */
81 3, /* MMX or SSE register to integer */
83 /* Processor costs (relative to an add) */
84 static const
85 struct processor_costs i386_cost = { /* 386 specific costs */
86 1, /* cost of an add instruction */
87 1, /* cost of a lea instruction */
88 3, /* variable shift costs */
89 2, /* constant shift costs */
90 6, /* cost of starting a multiply */
91 1, /* cost of multiply per each bit set */
92 23, /* cost of a divide/mod */
93 15, /* "large" insn */
94 3, /* MOVE_RATIO */
95 4, /* cost for loading QImode using movzbl */
96 {2, 4, 2}, /* cost of loading integer registers
97 in QImode, HImode and SImode.
98 Relative to reg-reg move (2). */
99 {2, 4, 2}, /* cost of storing integer registers */
100 2, /* cost of reg,reg fld/fst */
101 {8, 8, 8}, /* cost of loading fp registers
102 in SFmode, DFmode and XFmode */
103 {8, 8, 8}, /* cost of loading integer registers */
104 2, /* cost of moving MMX register */
105 {4, 8}, /* cost of loading MMX registers
106 in SImode and DImode */
107 {4, 8}, /* cost of storing MMX registers
108 in SImode and DImode */
109 2, /* cost of moving SSE register */
110 {4, 8, 16}, /* cost of loading SSE registers
111 in SImode, DImode and TImode */
112 {4, 8, 16}, /* cost of storing SSE registers
113 in SImode, DImode and TImode */
114 3, /* MMX or SSE register to integer */
117 static const
118 struct processor_costs i486_cost = { /* 486 specific costs */
119 1, /* cost of an add instruction */
120 1, /* cost of a lea instruction */
121 3, /* variable shift costs */
122 2, /* constant shift costs */
123 12, /* cost of starting a multiply */
124 1, /* cost of multiply per each bit set */
125 40, /* cost of a divide/mod */
126 15, /* "large" insn */
127 3, /* MOVE_RATIO */
128 4, /* cost for loading QImode using movzbl */
129 {2, 4, 2}, /* cost of loading integer registers
130 in QImode, HImode and SImode.
131 Relative to reg-reg move (2). */
132 {2, 4, 2}, /* cost of storing integer registers */
133 2, /* cost of reg,reg fld/fst */
134 {8, 8, 8}, /* cost of loading fp registers
135 in SFmode, DFmode and XFmode */
136 {8, 8, 8}, /* cost of loading integer registers */
137 2, /* cost of moving MMX register */
138 {4, 8}, /* cost of loading MMX registers
139 in SImode and DImode */
140 {4, 8}, /* cost of storing MMX registers
141 in SImode and DImode */
142 2, /* cost of moving SSE register */
143 {4, 8, 16}, /* cost of loading SSE registers
144 in SImode, DImode and TImode */
145 {4, 8, 16}, /* cost of storing SSE registers
146 in SImode, DImode and TImode */
147 3 /* MMX or SSE register to integer */
150 static const
151 struct processor_costs pentium_cost = {
152 1, /* cost of an add instruction */
153 1, /* cost of a lea instruction */
154 4, /* variable shift costs */
155 1, /* constant shift costs */
156 11, /* cost of starting a multiply */
157 0, /* cost of multiply per each bit set */
158 25, /* cost of a divide/mod */
159 8, /* "large" insn */
160 6, /* MOVE_RATIO */
161 6, /* cost for loading QImode using movzbl */
162 {2, 4, 2}, /* cost of loading integer registers
163 in QImode, HImode and SImode.
164 Relative to reg-reg move (2). */
165 {2, 4, 2}, /* cost of storing integer registers */
166 2, /* cost of reg,reg fld/fst */
167 {2, 2, 6}, /* cost of loading fp registers
168 in SFmode, DFmode and XFmode */
169 {4, 4, 6}, /* cost of loading integer registers */
170 8, /* cost of moving MMX register */
171 {8, 8}, /* cost of loading MMX registers
172 in SImode and DImode */
173 {8, 8}, /* cost of storing MMX registers
174 in SImode and DImode */
175 2, /* cost of moving SSE register */
176 {4, 8, 16}, /* cost of loading SSE registers
177 in SImode, DImode and TImode */
178 {4, 8, 16}, /* cost of storing SSE registers
179 in SImode, DImode and TImode */
180 3 /* MMX or SSE register to integer */
183 static const
184 struct processor_costs pentiumpro_cost = {
185 1, /* cost of an add instruction */
186 1, /* cost of a lea instruction */
187 1, /* variable shift costs */
188 1, /* constant shift costs */
189 4, /* cost of starting a multiply */
190 0, /* cost of multiply per each bit set */
191 17, /* cost of a divide/mod */
192 8, /* "large" insn */
193 6, /* MOVE_RATIO */
194 2, /* cost for loading QImode using movzbl */
195 {4, 4, 4}, /* cost of loading integer registers
196 in QImode, HImode and SImode.
197 Relative to reg-reg move (2). */
198 {2, 2, 2}, /* cost of storing integer registers */
199 2, /* cost of reg,reg fld/fst */
200 {2, 2, 6}, /* cost of loading fp registers
201 in SFmode, DFmode and XFmode */
202 {4, 4, 6}, /* cost of loading integer registers */
203 2, /* cost of moving MMX register */
204 {2, 2}, /* cost of loading MMX registers
205 in SImode and DImode */
206 {2, 2}, /* cost of storing MMX registers
207 in SImode and DImode */
208 2, /* cost of moving SSE register */
209 {2, 2, 8}, /* cost of loading SSE registers
210 in SImode, DImode and TImode */
211 {2, 2, 8}, /* cost of storing SSE registers
212 in SImode, DImode and TImode */
213 3 /* MMX or SSE register to integer */
216 static const
217 struct processor_costs k6_cost = {
218 1, /* cost of an add instruction */
219 2, /* cost of a lea instruction */
220 1, /* variable shift costs */
221 1, /* constant shift costs */
222 3, /* cost of starting a multiply */
223 0, /* cost of multiply per each bit set */
224 18, /* cost of a divide/mod */
225 8, /* "large" insn */
226 4, /* MOVE_RATIO */
227 3, /* cost for loading QImode using movzbl */
228 {4, 5, 4}, /* cost of loading integer registers
229 in QImode, HImode and SImode.
230 Relative to reg-reg move (2). */
231 {2, 3, 2}, /* cost of storing integer registers */
232 4, /* cost of reg,reg fld/fst */
233 {6, 6, 6}, /* cost of loading fp registers
234 in SFmode, DFmode and XFmode */
235 {4, 4, 4}, /* cost of loading integer registers */
236 2, /* cost of moving MMX register */
237 {2, 2}, /* cost of loading MMX registers
238 in SImode and DImode */
239 {2, 2}, /* cost of storing MMX registers
240 in SImode and DImode */
241 2, /* cost of moving SSE register */
242 {2, 2, 8}, /* cost of loading SSE registers
243 in SImode, DImode and TImode */
244 {2, 2, 8}, /* cost of storing SSE registers
245 in SImode, DImode and TImode */
246 6 /* MMX or SSE register to integer */
249 static const
250 struct processor_costs athlon_cost = {
251 1, /* cost of an add instruction */
252 2, /* cost of a lea instruction */
253 1, /* variable shift costs */
254 1, /* constant shift costs */
255 5, /* cost of starting a multiply */
256 0, /* cost of multiply per each bit set */
257 42, /* cost of a divide/mod */
258 8, /* "large" insn */
259 9, /* MOVE_RATIO */
260 4, /* cost for loading QImode using movzbl */
261 {4, 5, 4}, /* cost of loading integer registers
262 in QImode, HImode and SImode.
263 Relative to reg-reg move (2). */
264 {2, 3, 2}, /* cost of storing integer registers */
265 4, /* cost of reg,reg fld/fst */
266 {6, 6, 20}, /* cost of loading fp registers
267 in SFmode, DFmode and XFmode */
268 {4, 4, 16}, /* cost of loading integer registers */
269 2, /* cost of moving MMX register */
270 {2, 2}, /* cost of loading MMX registers
271 in SImode and DImode */
272 {2, 2}, /* cost of storing MMX registers
273 in SImode and DImode */
274 2, /* cost of moving SSE register */
275 {2, 2, 8}, /* cost of loading SSE registers
276 in SImode, DImode and TImode */
277 {2, 2, 8}, /* cost of storing SSE registers
278 in SImode, DImode and TImode */
279 6 /* MMX or SSE register to integer */
282 static const
283 struct processor_costs pentium4_cost = {
284 1, /* cost of an add instruction */
285 1, /* cost of a lea instruction */
286 8, /* variable shift costs */
287 8, /* constant shift costs */
288 30, /* cost of starting a multiply */
289 0, /* cost of multiply per each bit set */
290 112, /* cost of a divide/mod */
291 16, /* "large" insn */
292 6, /* MOVE_RATIO */
293 2, /* cost for loading QImode using movzbl */
294 {4, 5, 4}, /* cost of loading integer registers
295 in QImode, HImode and SImode.
296 Relative to reg-reg move (2). */
297 {2, 3, 2}, /* cost of storing integer registers */
298 2, /* cost of reg,reg fld/fst */
299 {2, 2, 6}, /* cost of loading fp registers
300 in SFmode, DFmode and XFmode */
301 {4, 4, 6}, /* cost of loading integer registers */
302 2, /* cost of moving MMX register */
303 {2, 2}, /* cost of loading MMX registers
304 in SImode and DImode */
305 {2, 2}, /* cost of storing MMX registers
306 in SImode and DImode */
307 12, /* cost of moving SSE register */
308 {12, 12, 12}, /* cost of loading SSE registers
309 in SImode, DImode and TImode */
310 {2, 2, 8}, /* cost of storing SSE registers
311 in SImode, DImode and TImode */
312 10, /* MMX or SSE register to integer */
315 const struct processor_costs *ix86_cost = &pentium_cost;
317 /* Processor feature/optimization bitmasks. */
318 #define m_386 (1<<PROCESSOR_I386)
319 #define m_486 (1<<PROCESSOR_I486)
320 #define m_PENT (1<<PROCESSOR_PENTIUM)
321 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
322 #define m_K6 (1<<PROCESSOR_K6)
323 #define m_ATHLON (1<<PROCESSOR_ATHLON)
324 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
326 const int x86_use_leave = m_386 | m_K6 | m_ATHLON;
327 const int x86_push_memory = m_386 | m_K6 | m_ATHLON | m_PENT4;
328 const int x86_zero_extend_with_and = m_486 | m_PENT;
329 const int x86_movx = m_ATHLON | m_PPRO | m_PENT4 /* m_386 | m_K6 */;
330 const int x86_double_with_add = ~m_386;
331 const int x86_use_bit_test = m_386;
332 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON | m_K6;
333 const int x86_cmove = m_PPRO | m_ATHLON | m_PENT4;
334 const int x86_3dnow_a = m_ATHLON;
335 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON | m_PENT4;
336 const int x86_branch_hints = m_PENT4;
337 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4;
338 const int x86_partial_reg_stall = m_PPRO;
339 const int x86_use_loop = m_K6;
340 const int x86_use_fiop = ~(m_PPRO | m_ATHLON | m_PENT);
341 const int x86_use_mov0 = m_K6;
342 const int x86_use_cltd = ~(m_PENT | m_K6);
343 const int x86_read_modify_write = ~m_PENT;
344 const int x86_read_modify = ~(m_PENT | m_PPRO);
345 const int x86_split_long_moves = m_PPRO;
346 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486;
347 const int x86_single_stringop = m_386 | m_PENT4;
348 const int x86_qimode_math = ~(0);
349 const int x86_promote_qi_regs = 0;
350 const int x86_himode_math = ~(m_PPRO);
351 const int x86_promote_hi_regs = m_PPRO;
352 const int x86_sub_esp_4 = m_ATHLON | m_PPRO | m_PENT4;
353 const int x86_sub_esp_8 = m_ATHLON | m_PPRO | m_386 | m_486 | m_PENT4;
354 const int x86_add_esp_4 = m_ATHLON | m_K6 | m_PENT4;
355 const int x86_add_esp_8 = m_ATHLON | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4;
356 const int x86_integer_DFmode_moves = ~(m_ATHLON | m_PENT4);
357 const int x86_partial_reg_dependency = m_ATHLON | m_PENT4;
358 const int x86_memory_mismatch_stall = m_ATHLON | m_PENT4;
359 const int x86_accumulate_outgoing_args = m_ATHLON | m_PENT4 | m_PPRO;
360 const int x86_prologue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
361 const int x86_epilogue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
363 /* In case the avreage insn count for single function invocation is
364 lower than this constant, emit fast (but longer) prologue and
365 epilogue code. */
366 #define FAST_PROLOGUE_INSN_COUNT 30
367 /* Set by prologue expander and used by epilogue expander to determine
368 the style used. */
369 static int use_fast_prologue_epilogue;
371 #define AT_BP(mode) (gen_rtx_MEM ((mode), hard_frame_pointer_rtx))
373 static const char *const hi_reg_name[] = HI_REGISTER_NAMES; /* names for 16 bit regs */
374 static const char *const qi_reg_name[] = QI_REGISTER_NAMES; /* names for 8 bit regs (low) */
375 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES; /* names for 8 bit regs (high) */
377 /* Array of the smallest class containing reg number REGNO, indexed by
378 REGNO. Used by REGNO_REG_CLASS in i386.h. */
380 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
382 /* ax, dx, cx, bx */
383 AREG, DREG, CREG, BREG,
384 /* si, di, bp, sp */
385 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
386 /* FP registers */
387 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
388 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
389 /* arg pointer */
390 NON_Q_REGS,
391 /* flags, fpsr, dirflag, frame */
392 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
393 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
394 SSE_REGS, SSE_REGS,
395 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
396 MMX_REGS, MMX_REGS,
397 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
398 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
399 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
400 SSE_REGS, SSE_REGS,
403 /* The "default" register map used in 32bit mode. */
405 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
407 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
408 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
409 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
410 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
411 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
412 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
413 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
416 static int x86_64_int_parameter_registers[6] = {5 /*RDI*/, 4 /*RSI*/,
417 1 /*RDX*/, 2 /*RCX*/,
418 FIRST_REX_INT_REG /*R8 */,
419 FIRST_REX_INT_REG + 1 /*R9 */};
420 static int x86_64_int_return_registers[4] = {0 /*RAX*/, 1 /*RDI*/, 5, 4};
422 /* The "default" register map used in 64bit mode. */
423 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
425 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
426 33, 34, 35, 36, 37, 38, 39, 40 /* fp regs */
427 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
428 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
429 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
430 8,9,10,11,12,13,14,15, /* extended integer registers */
431 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
434 /* Define the register numbers to be used in Dwarf debugging information.
435 The SVR4 reference port C compiler uses the following register numbers
436 in its Dwarf output code:
437 0 for %eax (gcc regno = 0)
438 1 for %ecx (gcc regno = 2)
439 2 for %edx (gcc regno = 1)
440 3 for %ebx (gcc regno = 3)
441 4 for %esp (gcc regno = 7)
442 5 for %ebp (gcc regno = 6)
443 6 for %esi (gcc regno = 4)
444 7 for %edi (gcc regno = 5)
445 The following three DWARF register numbers are never generated by
446 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
447 believes these numbers have these meanings.
448 8 for %eip (no gcc equivalent)
449 9 for %eflags (gcc regno = 17)
450 10 for %trapno (no gcc equivalent)
451 It is not at all clear how we should number the FP stack registers
452 for the x86 architecture. If the version of SDB on x86/svr4 were
453 a bit less brain dead with respect to floating-point then we would
454 have a precedent to follow with respect to DWARF register numbers
455 for x86 FP registers, but the SDB on x86/svr4 is so completely
456 broken with respect to FP registers that it is hardly worth thinking
457 of it as something to strive for compatibility with.
458 The version of x86/svr4 SDB I have at the moment does (partially)
459 seem to believe that DWARF register number 11 is associated with
460 the x86 register %st(0), but that's about all. Higher DWARF
461 register numbers don't seem to be associated with anything in
462 particular, and even for DWARF regno 11, SDB only seems to under-
463 stand that it should say that a variable lives in %st(0) (when
464 asked via an `=' command) if we said it was in DWARF regno 11,
465 but SDB still prints garbage when asked for the value of the
466 variable in question (via a `/' command).
467 (Also note that the labels SDB prints for various FP stack regs
468 when doing an `x' command are all wrong.)
469 Note that these problems generally don't affect the native SVR4
470 C compiler because it doesn't allow the use of -O with -g and
471 because when it is *not* optimizing, it allocates a memory
472 location for each floating-point variable, and the memory
473 location is what gets described in the DWARF AT_location
474 attribute for the variable in question.
475 Regardless of the severe mental illness of the x86/svr4 SDB, we
476 do something sensible here and we use the following DWARF
477 register numbers. Note that these are all stack-top-relative
478 numbers.
479 11 for %st(0) (gcc regno = 8)
480 12 for %st(1) (gcc regno = 9)
481 13 for %st(2) (gcc regno = 10)
482 14 for %st(3) (gcc regno = 11)
483 15 for %st(4) (gcc regno = 12)
484 16 for %st(5) (gcc regno = 13)
485 17 for %st(6) (gcc regno = 14)
486 18 for %st(7) (gcc regno = 15)
488 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
490 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
491 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
492 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
493 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
494 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
495 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded integer registers */
496 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded SSE registers */
499 /* Test and compare insns in i386.md store the information needed to
500 generate branch and scc insns here. */
502 struct rtx_def *ix86_compare_op0 = NULL_RTX;
503 struct rtx_def *ix86_compare_op1 = NULL_RTX;
505 #define MAX_386_STACK_LOCALS 3
506 /* Size of the register save area. */
507 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
509 /* Define the structure for the machine field in struct function. */
510 struct machine_function
512 rtx stack_locals[(int) MAX_MACHINE_MODE][MAX_386_STACK_LOCALS];
513 int save_varrargs_registers;
514 int accesses_prev_frame;
517 #define ix86_stack_locals (cfun->machine->stack_locals)
518 #define ix86_save_varrargs_registers (cfun->machine->save_varrargs_registers)
520 /* Structure describing stack frame layout.
521 Stack grows downward:
523 [arguments]
524 <- ARG_POINTER
525 saved pc
527 saved frame pointer if frame_pointer_needed
528 <- HARD_FRAME_POINTER
529 [saved regs]
531 [padding1] \
533 [va_arg registers] (
534 > to_allocate <- FRAME_POINTER
535 [frame] (
537 [padding2] /
539 struct ix86_frame
541 int nregs;
542 int padding1;
543 int va_arg_size;
544 HOST_WIDE_INT frame;
545 int padding2;
546 int outgoing_arguments_size;
547 int red_zone_size;
549 HOST_WIDE_INT to_allocate;
550 /* The offsets relative to ARG_POINTER. */
551 HOST_WIDE_INT frame_pointer_offset;
552 HOST_WIDE_INT hard_frame_pointer_offset;
553 HOST_WIDE_INT stack_pointer_offset;
556 /* Code model option as passed by user. */
557 const char *ix86_cmodel_string;
558 /* Parsed value. */
559 enum cmodel ix86_cmodel;
561 /* which cpu are we scheduling for */
562 enum processor_type ix86_cpu;
564 /* which instruction set architecture to use. */
565 int ix86_arch;
567 /* Strings to hold which cpu and instruction set architecture to use. */
568 const char *ix86_cpu_string; /* for -mcpu=<xxx> */
569 const char *ix86_arch_string; /* for -march=<xxx> */
571 /* # of registers to use to pass arguments. */
572 const char *ix86_regparm_string;
574 /* ix86_regparm_string as a number */
575 int ix86_regparm;
577 /* Alignment to use for loops and jumps: */
579 /* Power of two alignment for loops. */
580 const char *ix86_align_loops_string;
582 /* Power of two alignment for non-loop jumps. */
583 const char *ix86_align_jumps_string;
585 /* Power of two alignment for stack boundary in bytes. */
586 const char *ix86_preferred_stack_boundary_string;
588 /* Preferred alignment for stack boundary in bits. */
589 int ix86_preferred_stack_boundary;
591 /* Values 1-5: see jump.c */
592 int ix86_branch_cost;
593 const char *ix86_branch_cost_string;
595 /* Power of two alignment for functions. */
596 const char *ix86_align_funcs_string;
598 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
599 static char internal_label_prefix[16];
600 static int internal_label_prefix_len;
602 static int local_symbolic_operand PARAMS ((rtx, enum machine_mode));
603 static void output_pic_addr_const PARAMS ((FILE *, rtx, int));
604 static void put_condition_code PARAMS ((enum rtx_code, enum machine_mode,
605 int, int, FILE *));
606 static rtx ix86_expand_int_compare PARAMS ((enum rtx_code, rtx, rtx));
607 static enum rtx_code ix86_prepare_fp_compare_args PARAMS ((enum rtx_code,
608 rtx *, rtx *));
609 static rtx gen_push PARAMS ((rtx));
610 static int memory_address_length PARAMS ((rtx addr));
611 static int ix86_flags_dependant PARAMS ((rtx, rtx, enum attr_type));
612 static int ix86_agi_dependant PARAMS ((rtx, rtx, enum attr_type));
613 static int ix86_safe_length PARAMS ((rtx));
614 static enum attr_memory ix86_safe_memory PARAMS ((rtx));
615 static enum attr_pent_pair ix86_safe_pent_pair PARAMS ((rtx));
616 static enum attr_ppro_uops ix86_safe_ppro_uops PARAMS ((rtx));
617 static void ix86_dump_ppro_packet PARAMS ((FILE *));
618 static void ix86_reorder_insn PARAMS ((rtx *, rtx *));
619 static rtx * ix86_pent_find_pair PARAMS ((rtx *, rtx *, enum attr_pent_pair,
620 rtx));
621 static void ix86_init_machine_status PARAMS ((struct function *));
622 static void ix86_mark_machine_status PARAMS ((struct function *));
623 static void ix86_free_machine_status PARAMS ((struct function *));
624 static int ix86_split_to_parts PARAMS ((rtx, rtx *, enum machine_mode));
625 static int ix86_safe_length_prefix PARAMS ((rtx));
626 static int ix86_nsaved_regs PARAMS((void));
627 static void ix86_emit_save_regs PARAMS((void));
628 static void ix86_emit_save_regs_using_mov PARAMS ((rtx, HOST_WIDE_INT));
629 static void ix86_emit_restore_regs_using_mov PARAMS ((rtx, int, int));
630 static void ix86_set_move_mem_attrs_1 PARAMS ((rtx, rtx, rtx, rtx, rtx));
631 static void ix86_sched_reorder_pentium PARAMS((rtx *, rtx *));
632 static void ix86_sched_reorder_ppro PARAMS((rtx *, rtx *));
633 static HOST_WIDE_INT ix86_GOT_alias_set PARAMS ((void));
634 static void ix86_adjust_counter PARAMS ((rtx, HOST_WIDE_INT));
635 static rtx ix86_expand_aligntest PARAMS ((rtx, int));
636 static void ix86_expand_strlensi_unroll_1 PARAMS ((rtx, rtx));
637 static int ix86_issue_rate PARAMS ((void));
638 static int ix86_adjust_cost PARAMS ((rtx, rtx, rtx, int));
639 static void ix86_sched_init PARAMS ((FILE *, int, int));
640 static int ix86_sched_reorder PARAMS ((FILE *, int, rtx *, int *, int));
641 static int ix86_variable_issue PARAMS ((FILE *, int, rtx, int));
643 struct ix86_address
645 rtx base, index, disp;
646 HOST_WIDE_INT scale;
649 static int ix86_decompose_address PARAMS ((rtx, struct ix86_address *));
651 struct builtin_description;
652 static rtx ix86_expand_sse_comi PARAMS ((const struct builtin_description *,
653 tree, rtx));
654 static rtx ix86_expand_sse_compare PARAMS ((const struct builtin_description *,
655 tree, rtx));
656 static rtx ix86_expand_unop1_builtin PARAMS ((enum insn_code, tree, rtx));
657 static rtx ix86_expand_unop_builtin PARAMS ((enum insn_code, tree, rtx, int));
658 static rtx ix86_expand_binop_builtin PARAMS ((enum insn_code, tree, rtx));
659 static rtx ix86_expand_store_builtin PARAMS ((enum insn_code, tree, int));
660 static rtx safe_vector_operand PARAMS ((rtx, enum machine_mode));
661 static enum rtx_code ix86_fp_compare_code_to_integer PARAMS ((enum rtx_code));
662 static void ix86_fp_comparison_codes PARAMS ((enum rtx_code code,
663 enum rtx_code *,
664 enum rtx_code *,
665 enum rtx_code *));
666 static rtx ix86_expand_fp_compare PARAMS ((enum rtx_code, rtx, rtx, rtx,
667 rtx *, rtx *));
668 static int ix86_fp_comparison_arithmetics_cost PARAMS ((enum rtx_code code));
669 static int ix86_fp_comparison_fcomi_cost PARAMS ((enum rtx_code code));
670 static int ix86_fp_comparison_sahf_cost PARAMS ((enum rtx_code code));
671 static int ix86_fp_comparison_cost PARAMS ((enum rtx_code code));
672 static int ix86_save_reg PARAMS ((int, int));
673 static void ix86_compute_frame_layout PARAMS ((struct ix86_frame *));
674 static int ix86_comp_type_attributes PARAMS ((tree, tree));
675 const struct attribute_spec ix86_attribute_table[];
676 static tree ix86_handle_cdecl_attribute PARAMS ((tree *, tree, tree, int, bool *));
677 static tree ix86_handle_regparm_attribute PARAMS ((tree *, tree, tree, int, bool *));
679 #ifdef DO_GLOBAL_CTORS_BODY
680 static void ix86_svr3_asm_out_constructor PARAMS ((rtx, int));
681 #endif
682 #if defined(TARGET_ELF) && defined(TARGET_COFF)
683 static void sco_asm_named_section PARAMS ((const char *, unsigned int));
684 static void sco_asm_out_constructor PARAMS ((rtx, int));
685 #endif
686 /* Register class used for passing given 64bit part of the argument.
687 These represent classes as documented by the PS ABI, with the exception
688 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
689 use SF or DFmode move instead of DImode to avoid reformating penalties.
691 Similary we play games with INTEGERSI_CLASS to use cheaper SImode moves
692 whenever possible (upper half does contain padding).
694 enum x86_64_reg_class
696 X86_64_NO_CLASS,
697 X86_64_INTEGER_CLASS,
698 X86_64_INTEGERSI_CLASS,
699 X86_64_SSE_CLASS,
700 X86_64_SSESF_CLASS,
701 X86_64_SSEDF_CLASS,
702 X86_64_SSEUP_CLASS,
703 X86_64_X87_CLASS,
704 X86_64_X87UP_CLASS,
705 X86_64_MEMORY_CLASS
707 const char * const x86_64_reg_class_name[] =
708 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
710 #define MAX_CLASSES 4
711 static int classify_argument PARAMS ((enum machine_mode, tree,
712 enum x86_64_reg_class [MAX_CLASSES],
713 int));
714 static int examine_argument PARAMS ((enum machine_mode, tree, int, int *,
715 int *));
716 static rtx construct_container PARAMS ((enum machine_mode, tree, int, int, int,
717 int *, int));
718 static enum x86_64_reg_class merge_classes PARAMS ((enum x86_64_reg_class,
719 enum x86_64_reg_class));
721 /* Initialize the GCC target structure. */
722 #undef TARGET_ATTRIBUTE_TABLE
723 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
724 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
725 # undef TARGET_MERGE_DECL_ATTRIBUTES
726 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
727 #endif
729 #undef TARGET_COMP_TYPE_ATTRIBUTES
730 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
732 #undef TARGET_INIT_BUILTINS
733 #define TARGET_INIT_BUILTINS ix86_init_builtins
735 #undef TARGET_EXPAND_BUILTIN
736 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
738 #if defined (OSF_OS) || defined (TARGET_OSF1ELF)
739 static void ix86_osf_output_function_prologue PARAMS ((FILE *,
740 HOST_WIDE_INT));
741 # undef TARGET_ASM_FUNCTION_PROLOGUE
742 # define TARGET_ASM_FUNCTION_PROLOGUE ix86_osf_output_function_prologue
743 #endif
745 #undef TARGET_ASM_OPEN_PAREN
746 #define TARGET_ASM_OPEN_PAREN ""
747 #undef TARGET_ASM_CLOSE_PAREN
748 #define TARGET_ASM_CLOSE_PAREN ""
750 #undef TARGET_SCHED_ADJUST_COST
751 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
752 #undef TARGET_SCHED_ISSUE_RATE
753 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
754 #undef TARGET_SCHED_VARIABLE_ISSUE
755 #define TARGET_SCHED_VARIABLE_ISSUE ix86_variable_issue
756 #undef TARGET_SCHED_INIT
757 #define TARGET_SCHED_INIT ix86_sched_init
758 #undef TARGET_SCHED_REORDER
759 #define TARGET_SCHED_REORDER ix86_sched_reorder
761 struct gcc_target targetm = TARGET_INITIALIZER;
763 /* Sometimes certain combinations of command options do not make
764 sense on a particular target machine. You can define a macro
765 `OVERRIDE_OPTIONS' to take account of this. This macro, if
766 defined, is executed once just after all the command options have
767 been parsed.
769 Don't use this macro to turn on various extra optimizations for
770 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
772 void
773 override_options ()
775 int i;
776 /* Comes from final.c -- no real reason to change it. */
777 #define MAX_CODE_ALIGN 16
779 static struct ptt
781 const struct processor_costs *cost; /* Processor costs */
782 const int target_enable; /* Target flags to enable. */
783 const int target_disable; /* Target flags to disable. */
784 const int align_loop; /* Default alignments. */
785 const int align_jump;
786 const int align_func;
787 const int branch_cost;
789 const processor_target_table[PROCESSOR_max] =
791 {&i386_cost, 0, 0, 2, 2, 2, 1},
792 {&i486_cost, 0, 0, 4, 4, 4, 1},
793 {&pentium_cost, 0, 0, -4, -4, -4, 1},
794 {&pentiumpro_cost, 0, 0, 4, -4, 4, 1},
795 {&k6_cost, 0, 0, -5, -5, 4, 1},
796 {&athlon_cost, 0, 0, 4, -4, 4, 1},
797 {&pentium4_cost, 0, 0, 2, 2, 2, 1}
800 static struct pta
802 const char *const name; /* processor name or nickname. */
803 const enum processor_type processor;
805 const processor_alias_table[] =
807 {"i386", PROCESSOR_I386},
808 {"i486", PROCESSOR_I486},
809 {"i586", PROCESSOR_PENTIUM},
810 {"pentium", PROCESSOR_PENTIUM},
811 {"i686", PROCESSOR_PENTIUMPRO},
812 {"pentiumpro", PROCESSOR_PENTIUMPRO},
813 {"k6", PROCESSOR_K6},
814 {"athlon", PROCESSOR_ATHLON},
815 {"pentium4", PROCESSOR_PENTIUM4},
818 int const pta_size = sizeof (processor_alias_table) / sizeof (struct pta);
820 #ifdef SUBTARGET_OVERRIDE_OPTIONS
821 SUBTARGET_OVERRIDE_OPTIONS;
822 #endif
824 ix86_arch = PROCESSOR_I386;
825 ix86_cpu = (enum processor_type) TARGET_CPU_DEFAULT;
827 if (ix86_cmodel_string != 0)
829 if (!strcmp (ix86_cmodel_string, "small"))
830 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
831 else if (flag_pic)
832 sorry ("Code model %s not supported in PIC mode", ix86_cmodel_string);
833 else if (!strcmp (ix86_cmodel_string, "32"))
834 ix86_cmodel = CM_32;
835 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
836 ix86_cmodel = CM_KERNEL;
837 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
838 ix86_cmodel = CM_MEDIUM;
839 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
840 ix86_cmodel = CM_LARGE;
841 else
842 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
844 else
846 ix86_cmodel = CM_32;
847 if (TARGET_64BIT)
848 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
850 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
851 error ("Code model `%s' not supported in the %s bit mode.",
852 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
853 if (ix86_cmodel == CM_LARGE)
854 sorry ("Code model `large' not supported yet.");
855 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
856 sorry ("%i-bit mode not compiled in.",
857 (target_flags & MASK_64BIT) ? 64 : 32);
859 if (ix86_arch_string != 0)
861 for (i = 0; i < pta_size; i++)
862 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
864 ix86_arch = processor_alias_table[i].processor;
865 /* Default cpu tuning to the architecture. */
866 ix86_cpu = ix86_arch;
867 break;
870 if (i == pta_size)
871 error ("bad value (%s) for -march= switch", ix86_arch_string);
874 if (ix86_cpu_string != 0)
876 for (i = 0; i < pta_size; i++)
877 if (! strcmp (ix86_cpu_string, processor_alias_table[i].name))
879 ix86_cpu = processor_alias_table[i].processor;
880 break;
882 if (i == pta_size)
883 error ("bad value (%s) for -mcpu= switch", ix86_cpu_string);
886 if (optimize_size)
887 ix86_cost = &size_cost;
888 else
889 ix86_cost = processor_target_table[ix86_cpu].cost;
890 target_flags |= processor_target_table[ix86_cpu].target_enable;
891 target_flags &= ~processor_target_table[ix86_cpu].target_disable;
893 /* Arrange to set up i386_stack_locals for all functions. */
894 init_machine_status = ix86_init_machine_status;
895 mark_machine_status = ix86_mark_machine_status;
896 free_machine_status = ix86_free_machine_status;
898 /* Validate -mregparm= value. */
899 if (ix86_regparm_string)
901 i = atoi (ix86_regparm_string);
902 if (i < 0 || i > REGPARM_MAX)
903 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
904 else
905 ix86_regparm = i;
907 else
908 if (TARGET_64BIT)
909 ix86_regparm = REGPARM_MAX;
911 /* If the user has provided any of the -malign-* options,
912 warn and use that value only if -falign-* is not set.
913 Remove this code in GCC 3.2 or later. */
914 if (ix86_align_loops_string)
916 warning ("-malign-loops is obsolete, use -falign-loops");
917 if (align_loops == 0)
919 i = atoi (ix86_align_loops_string);
920 if (i < 0 || i > MAX_CODE_ALIGN)
921 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
922 else
923 align_loops = 1 << i;
927 if (ix86_align_jumps_string)
929 warning ("-malign-jumps is obsolete, use -falign-jumps");
930 if (align_jumps == 0)
932 i = atoi (ix86_align_jumps_string);
933 if (i < 0 || i > MAX_CODE_ALIGN)
934 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
935 else
936 align_jumps = 1 << i;
940 if (ix86_align_funcs_string)
942 warning ("-malign-functions is obsolete, use -falign-functions");
943 if (align_functions == 0)
945 i = atoi (ix86_align_funcs_string);
946 if (i < 0 || i > MAX_CODE_ALIGN)
947 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
948 else
949 align_functions = 1 << i;
953 /* Default align_* from the processor table. */
954 #define abs(n) (n < 0 ? -n : n)
955 if (align_loops == 0)
956 align_loops = 1 << abs (processor_target_table[ix86_cpu].align_loop);
957 if (align_jumps == 0)
958 align_jumps = 1 << abs (processor_target_table[ix86_cpu].align_jump);
959 if (align_functions == 0)
960 align_functions = 1 << abs (processor_target_table[ix86_cpu].align_func);
962 /* Validate -mpreferred-stack-boundary= value, or provide default.
963 The default of 128 bits is for Pentium III's SSE __m128. */
964 ix86_preferred_stack_boundary = 128;
965 if (ix86_preferred_stack_boundary_string)
967 i = atoi (ix86_preferred_stack_boundary_string);
968 if (i < (TARGET_64BIT ? 3 : 2) || i > 31)
969 error ("-mpreferred-stack-boundary=%d is not between %d and 31", i,
970 TARGET_64BIT ? 3 : 2);
971 else
972 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
975 /* Validate -mbranch-cost= value, or provide default. */
976 ix86_branch_cost = processor_target_table[ix86_cpu].branch_cost;
977 if (ix86_branch_cost_string)
979 i = atoi (ix86_branch_cost_string);
980 if (i < 0 || i > 5)
981 error ("-mbranch-cost=%d is not between 0 and 5", i);
982 else
983 ix86_branch_cost = i;
986 /* Keep nonleaf frame pointers. */
987 if (TARGET_OMIT_LEAF_FRAME_POINTER)
988 flag_omit_frame_pointer = 1;
990 /* If we're doing fast math, we don't care about comparison order
991 wrt NaNs. This lets us use a shorter comparison sequence. */
992 if (flag_unsafe_math_optimizations)
993 target_flags &= ~MASK_IEEE_FP;
995 if (TARGET_64BIT)
997 if (TARGET_ALIGN_DOUBLE)
998 error ("-malign-double makes no sense in the 64bit mode.");
999 if (TARGET_RTD)
1000 error ("-mrtd calling convention not supported in the 64bit mode.");
1001 /* Enable by default the SSE and MMX builtins. */
1002 target_flags |= MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE;
1005 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1006 on by -msse. */
1007 if (TARGET_SSE)
1008 target_flags |= MASK_MMX;
1010 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1011 if (TARGET_3DNOW)
1013 target_flags |= MASK_MMX;
1014 /* If we are targetting the Athlon architecture, enable the 3Dnow/MMX
1015 extensions it adds. */
1016 if (x86_3dnow_a & (1 << ix86_arch))
1017 target_flags |= MASK_3DNOW_A;
1019 if ((x86_accumulate_outgoing_args & CPUMASK)
1020 && !(target_flags & MASK_NO_ACCUMULATE_OUTGOING_ARGS)
1021 && !optimize_size)
1022 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1024 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1026 char *p;
1027 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1028 p = strchr (internal_label_prefix, 'X');
1029 internal_label_prefix_len = p - internal_label_prefix;
1030 *p = '\0';
1034 void
1035 optimization_options (level, size)
1036 int level;
1037 int size ATTRIBUTE_UNUSED;
1039 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1040 make the problem with not enough registers even worse. */
1041 #ifdef INSN_SCHEDULING
1042 if (level > 1)
1043 flag_schedule_insns = 0;
1044 #endif
1045 if (TARGET_64BIT && optimize >= 1)
1046 flag_omit_frame_pointer = 1;
1047 if (TARGET_64BIT)
1048 flag_pcc_struct_return = 0;
1051 /* Table of valid machine attributes. */
1052 const struct attribute_spec ix86_attribute_table[] =
1054 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1055 /* Stdcall attribute says callee is responsible for popping arguments
1056 if they are not variable. */
1057 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1058 /* Cdecl attribute says the callee is a normal C declaration */
1059 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1060 /* Regparm attribute specifies how many integer arguments are to be
1061 passed in registers. */
1062 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
1063 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
1064 { "dllimport", 1, 1, false, false, false, ix86_handle_dll_attribute },
1065 { "dllexport", 1, 1, false, false, false, ix86_handle_dll_attribute },
1066 { "shared", 1, 1, true, false, false, ix86_handle_shared_attribute },
1067 #endif
1068 { NULL, 0, 0, false, false, false, NULL }
1071 /* Handle a "cdecl" or "stdcall" attribute;
1072 arguments as in struct attribute_spec.handler. */
1073 static tree
1074 ix86_handle_cdecl_attribute (node, name, args, flags, no_add_attrs)
1075 tree *node;
1076 tree name;
1077 tree args ATTRIBUTE_UNUSED;
1078 int flags ATTRIBUTE_UNUSED;
1079 bool *no_add_attrs;
1081 if (TREE_CODE (*node) != FUNCTION_TYPE
1082 && TREE_CODE (*node) != METHOD_TYPE
1083 && TREE_CODE (*node) != FIELD_DECL
1084 && TREE_CODE (*node) != TYPE_DECL)
1086 warning ("`%s' attribute only applies to functions",
1087 IDENTIFIER_POINTER (name));
1088 *no_add_attrs = true;
1091 if (TARGET_64BIT)
1093 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1094 *no_add_attrs = true;
1097 return NULL_TREE;
1100 /* Handle a "regparm" attribute;
1101 arguments as in struct attribute_spec.handler. */
1102 static tree
1103 ix86_handle_regparm_attribute (node, name, args, flags, no_add_attrs)
1104 tree *node;
1105 tree name;
1106 tree args;
1107 int flags ATTRIBUTE_UNUSED;
1108 bool *no_add_attrs;
1110 if (TREE_CODE (*node) != FUNCTION_TYPE
1111 && TREE_CODE (*node) != METHOD_TYPE
1112 && TREE_CODE (*node) != FIELD_DECL
1113 && TREE_CODE (*node) != TYPE_DECL)
1115 warning ("`%s' attribute only applies to functions",
1116 IDENTIFIER_POINTER (name));
1117 *no_add_attrs = true;
1119 else
1121 tree cst;
1123 cst = TREE_VALUE (args);
1124 if (TREE_CODE (cst) != INTEGER_CST)
1126 warning ("`%s' attribute requires an integer constant argument",
1127 IDENTIFIER_POINTER (name));
1128 *no_add_attrs = true;
1130 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1132 warning ("argument to `%s' attribute larger than %d",
1133 IDENTIFIER_POINTER (name), REGPARM_MAX);
1134 *no_add_attrs = true;
1138 return NULL_TREE;
1141 #if defined (OSF_OS) || defined (TARGET_OSF1ELF)
1143 /* Generate the assembly code for function entry. FILE is a stdio
1144 stream to output the code to. SIZE is an int: how many units of
1145 temporary storage to allocate.
1147 Refer to the array `regs_ever_live' to determine which registers to
1148 save; `regs_ever_live[I]' is nonzero if register number I is ever
1149 used in the function. This function is responsible for knowing
1150 which registers should not be saved even if used.
1152 We override it here to allow for the new profiling code to go before
1153 the prologue and the old mcount code to go after the prologue (and
1154 after %ebx has been set up for ELF shared library support). */
1156 static void
1157 ix86_osf_output_function_prologue (file, size)
1158 FILE *file;
1159 HOST_WIDE_INT size;
1161 char *prefix = "";
1162 char *lprefix = LPREFIX;
1163 int labelno = profile_label_no;
1165 #ifdef OSF_OS
1167 if (TARGET_UNDERSCORES)
1168 prefix = "_";
1170 if (profile_flag && OSF_PROFILE_BEFORE_PROLOGUE)
1172 if (!flag_pic && !HALF_PIC_P ())
1174 fprintf (file, "\tmovl $%sP%d,%%edx\n", lprefix, labelno);
1175 fprintf (file, "\tcall *%s_mcount_ptr\n", prefix);
1178 else if (HALF_PIC_P ())
1180 rtx symref;
1182 HALF_PIC_EXTERNAL ("_mcount_ptr");
1183 symref = HALF_PIC_PTR (gen_rtx_SYMBOL_REF (Pmode,
1184 "_mcount_ptr"));
1186 fprintf (file, "\tmovl $%sP%d,%%edx\n", lprefix, labelno);
1187 fprintf (file, "\tmovl %s%s,%%eax\n", prefix,
1188 XSTR (symref, 0));
1189 fprintf (file, "\tcall *(%%eax)\n");
1192 else
1194 static int call_no = 0;
1196 fprintf (file, "\tcall %sPc%d\n", lprefix, call_no);
1197 fprintf (file, "%sPc%d:\tpopl %%eax\n", lprefix, call_no);
1198 fprintf (file, "\taddl $_GLOBAL_OFFSET_TABLE_+[.-%sPc%d],%%eax\n",
1199 lprefix, call_no++);
1200 fprintf (file, "\tleal %sP%d@GOTOFF(%%eax),%%edx\n",
1201 lprefix, labelno);
1202 fprintf (file, "\tmovl %s_mcount_ptr@GOT(%%eax),%%eax\n",
1203 prefix);
1204 fprintf (file, "\tcall *(%%eax)\n");
1208 #else /* !OSF_OS */
1210 if (profile_flag && OSF_PROFILE_BEFORE_PROLOGUE)
1212 if (!flag_pic)
1214 fprintf (file, "\tmovl $%sP%d,%%edx\n", lprefix, labelno);
1215 fprintf (file, "\tcall *%s_mcount_ptr\n", prefix);
1218 else
1220 static int call_no = 0;
1222 fprintf (file, "\tcall %sPc%d\n", lprefix, call_no);
1223 fprintf (file, "%sPc%d:\tpopl %%eax\n", lprefix, call_no);
1224 fprintf (file, "\taddl $_GLOBAL_OFFSET_TABLE_+[.-%sPc%d],%%eax\n",
1225 lprefix, call_no++);
1226 fprintf (file, "\tleal %sP%d@GOTOFF(%%eax),%%edx\n",
1227 lprefix, labelno);
1228 fprintf (file, "\tmovl %s_mcount_ptr@GOT(%%eax),%%eax\n",
1229 prefix);
1230 fprintf (file, "\tcall *(%%eax)\n");
1233 #endif /* !OSF_OS */
1235 function_prologue (file, size);
1238 #endif /* OSF_OS || TARGET_OSF1ELF */
1240 /* Return 0 if the attributes for two types are incompatible, 1 if they
1241 are compatible, and 2 if they are nearly compatible (which causes a
1242 warning to be generated). */
1244 static int
1245 ix86_comp_type_attributes (type1, type2)
1246 tree type1;
1247 tree type2;
1249 /* Check for mismatch of non-default calling convention. */
1250 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
1252 if (TREE_CODE (type1) != FUNCTION_TYPE)
1253 return 1;
1255 /* Check for mismatched return types (cdecl vs stdcall). */
1256 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1257 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
1258 return 0;
1259 return 1;
1262 /* Value is the number of bytes of arguments automatically
1263 popped when returning from a subroutine call.
1264 FUNDECL is the declaration node of the function (as a tree),
1265 FUNTYPE is the data type of the function (as a tree),
1266 or for a library call it is an identifier node for the subroutine name.
1267 SIZE is the number of bytes of arguments passed on the stack.
1269 On the 80386, the RTD insn may be used to pop them if the number
1270 of args is fixed, but if the number is variable then the caller
1271 must pop them all. RTD can't be used for library calls now
1272 because the library is compiled with the Unix compiler.
1273 Use of RTD is a selectable option, since it is incompatible with
1274 standard Unix calling sequences. If the option is not selected,
1275 the caller must always pop the args.
1277 The attribute stdcall is equivalent to RTD on a per module basis. */
1280 ix86_return_pops_args (fundecl, funtype, size)
1281 tree fundecl;
1282 tree funtype;
1283 int size;
1285 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
1287 /* Cdecl functions override -mrtd, and never pop the stack. */
1288 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
1290 /* Stdcall functions will pop the stack if not variable args. */
1291 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype)))
1292 rtd = 1;
1294 if (rtd
1295 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
1296 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1297 == void_type_node)))
1298 return size;
1301 /* Lose any fake structure return argument. */
1302 if (aggregate_value_p (TREE_TYPE (funtype))
1303 && !TARGET_64BIT)
1304 return GET_MODE_SIZE (Pmode);
1306 return 0;
1309 /* Argument support functions. */
1311 /* Return true when register may be used to pass function parameters. */
1312 bool
1313 ix86_function_arg_regno_p (regno)
1314 int regno;
1316 int i;
1317 if (!TARGET_64BIT)
1318 return regno < REGPARM_MAX || (TARGET_SSE && SSE_REGNO_P (regno));
1319 if (SSE_REGNO_P (regno) && TARGET_SSE)
1320 return true;
1321 /* RAX is used as hidden argument to va_arg functions. */
1322 if (!regno)
1323 return true;
1324 for (i = 0; i < REGPARM_MAX; i++)
1325 if (regno == x86_64_int_parameter_registers[i])
1326 return true;
1327 return false;
1330 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1331 for a call to a function whose data type is FNTYPE.
1332 For a library call, FNTYPE is 0. */
1334 void
1335 init_cumulative_args (cum, fntype, libname)
1336 CUMULATIVE_ARGS *cum; /* Argument info to initialize */
1337 tree fntype; /* tree ptr for function decl */
1338 rtx libname; /* SYMBOL_REF of library name or 0 */
1340 static CUMULATIVE_ARGS zero_cum;
1341 tree param, next_param;
1343 if (TARGET_DEBUG_ARG)
1345 fprintf (stderr, "\ninit_cumulative_args (");
1346 if (fntype)
1347 fprintf (stderr, "fntype code = %s, ret code = %s",
1348 tree_code_name[(int) TREE_CODE (fntype)],
1349 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
1350 else
1351 fprintf (stderr, "no fntype");
1353 if (libname)
1354 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1357 *cum = zero_cum;
1359 /* Set up the number of registers to use for passing arguments. */
1360 cum->nregs = ix86_regparm;
1361 cum->sse_nregs = SSE_REGPARM_MAX;
1362 if (fntype && !TARGET_64BIT)
1364 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype));
1366 if (attr)
1367 cum->nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1369 cum->maybe_vaarg = false;
1371 /* Determine if this function has variable arguments. This is
1372 indicated by the last argument being 'void_type_mode' if there
1373 are no variable arguments. If there are variable arguments, then
1374 we won't pass anything in registers */
1376 if (cum->nregs)
1378 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
1379 param != 0; param = next_param)
1381 next_param = TREE_CHAIN (param);
1382 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
1384 if (!TARGET_64BIT)
1385 cum->nregs = 0;
1386 cum->maybe_vaarg = true;
1390 if ((!fntype && !libname)
1391 || (fntype && !TYPE_ARG_TYPES (fntype)))
1392 cum->maybe_vaarg = 1;
1394 if (TARGET_DEBUG_ARG)
1395 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
1397 return;
1400 /* x86-64 register passing impleemntation. See x86-64 ABI for details. Goal
1401 of this code is to classify each 8bytes of incomming argument by the register
1402 class and assign registers accordingly. */
1404 /* Return the union class of CLASS1 and CLASS2.
1405 See the x86-64 PS ABI for details. */
1407 static enum x86_64_reg_class
1408 merge_classes (class1, class2)
1409 enum x86_64_reg_class class1, class2;
1411 /* Rule #1: If both classes are equal, this is the resulting class. */
1412 if (class1 == class2)
1413 return class1;
1415 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
1416 the other class. */
1417 if (class1 == X86_64_NO_CLASS)
1418 return class2;
1419 if (class2 == X86_64_NO_CLASS)
1420 return class1;
1422 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
1423 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
1424 return X86_64_MEMORY_CLASS;
1426 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
1427 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
1428 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
1429 return X86_64_INTEGERSI_CLASS;
1430 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
1431 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
1432 return X86_64_INTEGER_CLASS;
1434 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
1435 if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS
1436 || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS)
1437 return X86_64_MEMORY_CLASS;
1439 /* Rule #6: Otherwise class SSE is used. */
1440 return X86_64_SSE_CLASS;
1443 /* Classify the argument of type TYPE and mode MODE.
1444 CLASSES will be filled by the register class used to pass each word
1445 of the operand. The number of words is returned. In case the parameter
1446 should be passed in memory, 0 is returned. As a special case for zero
1447 sized containers, classes[0] will be NO_CLASS and 1 is returned.
1449 BIT_OFFSET is used internally for handling records and specifies offset
1450 of the offset in bits modulo 256 to avoid overflow cases.
1452 See the x86-64 PS ABI for details.
1455 static int
1456 classify_argument (mode, type, classes, bit_offset)
1457 enum machine_mode mode;
1458 tree type;
1459 enum x86_64_reg_class classes[MAX_CLASSES];
1460 int bit_offset;
1462 int bytes =
1463 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1464 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1466 if (type && AGGREGATE_TYPE_P (type))
1468 int i;
1469 tree field;
1470 enum x86_64_reg_class subclasses[MAX_CLASSES];
1472 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
1473 if (bytes > 16)
1474 return 0;
1476 for (i = 0; i < words; i++)
1477 classes[i] = X86_64_NO_CLASS;
1479 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
1480 signalize memory class, so handle it as special case. */
1481 if (!words)
1483 classes[0] = X86_64_NO_CLASS;
1484 return 1;
1487 /* Classify each field of record and merge classes. */
1488 if (TREE_CODE (type) == RECORD_TYPE)
1490 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1492 if (TREE_CODE (field) == FIELD_DECL)
1494 int num;
1496 /* Bitfields are always classified as integer. Handle them
1497 early, since later code would consider them to be
1498 misaligned integers. */
1499 if (DECL_BIT_FIELD (field))
1501 for (i = int_bit_position (field) / 8 / 8;
1502 i < (int_bit_position (field)
1503 + tree_low_cst (DECL_SIZE (field), 0)
1504 + 63) / 8 / 8; i++)
1505 classes[i] =
1506 merge_classes (X86_64_INTEGER_CLASS,
1507 classes[i]);
1509 else
1511 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
1512 TREE_TYPE (field), subclasses,
1513 (int_bit_position (field)
1514 + bit_offset) % 256);
1515 if (!num)
1516 return 0;
1517 for (i = 0; i < num; i++)
1519 int pos =
1520 (int_bit_position (field) + bit_offset) / 8 / 8;
1521 classes[i + pos] =
1522 merge_classes (subclasses[i], classes[i + pos]);
1528 /* Arrays are handled as small records. */
1529 else if (TREE_CODE (type) == ARRAY_TYPE)
1531 int num;
1532 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
1533 TREE_TYPE (type), subclasses, bit_offset);
1534 if (!num)
1535 return 0;
1537 /* The partial classes are now full classes. */
1538 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
1539 subclasses[0] = X86_64_SSE_CLASS;
1540 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
1541 subclasses[0] = X86_64_INTEGER_CLASS;
1543 for (i = 0; i < words; i++)
1544 classes[i] = subclasses[i % num];
1546 /* Unions are similar to RECORD_TYPE but offset is always 0. */
1547 else if (TREE_CODE (type) == UNION_TYPE)
1549 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1551 if (TREE_CODE (field) == FIELD_DECL)
1553 int num;
1554 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
1555 TREE_TYPE (field), subclasses,
1556 bit_offset);
1557 if (!num)
1558 return 0;
1559 for (i = 0; i < num; i++)
1560 classes[i] = merge_classes (subclasses[i], classes[i]);
1564 else
1565 abort ();
1567 /* Final merger cleanup. */
1568 for (i = 0; i < words; i++)
1570 /* If one class is MEMORY, everything should be passed in
1571 memory. */
1572 if (classes[i] == X86_64_MEMORY_CLASS)
1573 return 0;
1575 /* The X86_64_SSEUP_CLASS should be always preceeded by
1576 X86_64_SSE_CLASS. */
1577 if (classes[i] == X86_64_SSEUP_CLASS
1578 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
1579 classes[i] = X86_64_SSE_CLASS;
1581 /* X86_64_X87UP_CLASS should be preceeded by X86_64_X87_CLASS. */
1582 if (classes[i] == X86_64_X87UP_CLASS
1583 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
1584 classes[i] = X86_64_SSE_CLASS;
1586 return words;
1589 /* Compute alignment needed. We align all types to natural boundaries with
1590 exception of XFmode that is aligned to 64bits. */
1591 if (mode != VOIDmode && mode != BLKmode)
1593 int mode_alignment = GET_MODE_BITSIZE (mode);
1595 if (mode == XFmode)
1596 mode_alignment = 128;
1597 else if (mode == XCmode)
1598 mode_alignment = 256;
1599 /* Missalignmed fields are always returned in memory. */
1600 if (bit_offset % mode_alignment)
1601 return 0;
1604 /* Classification of atomic types. */
1605 switch (mode)
1607 case DImode:
1608 case SImode:
1609 case HImode:
1610 case QImode:
1611 case CSImode:
1612 case CHImode:
1613 case CQImode:
1614 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
1615 classes[0] = X86_64_INTEGERSI_CLASS;
1616 else
1617 classes[0] = X86_64_INTEGER_CLASS;
1618 return 1;
1619 case CDImode:
1620 case TImode:
1621 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
1622 return 2;
1623 case CTImode:
1624 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
1625 classes[2] = classes[3] = X86_64_INTEGER_CLASS;
1626 return 4;
1627 case SFmode:
1628 if (!(bit_offset % 64))
1629 classes[0] = X86_64_SSESF_CLASS;
1630 else
1631 classes[0] = X86_64_SSE_CLASS;
1632 return 1;
1633 case DFmode:
1634 classes[0] = X86_64_SSEDF_CLASS;
1635 return 1;
1636 case TFmode:
1637 classes[0] = X86_64_X87_CLASS;
1638 classes[1] = X86_64_X87UP_CLASS;
1639 return 2;
1640 case TCmode:
1641 classes[0] = X86_64_X87_CLASS;
1642 classes[1] = X86_64_X87UP_CLASS;
1643 classes[2] = X86_64_X87_CLASS;
1644 classes[3] = X86_64_X87UP_CLASS;
1645 return 4;
1646 case DCmode:
1647 classes[0] = X86_64_SSEDF_CLASS;
1648 classes[1] = X86_64_SSEDF_CLASS;
1649 return 2;
1650 case SCmode:
1651 classes[0] = X86_64_SSE_CLASS;
1652 return 1;
1653 case BLKmode:
1654 return 0;
1655 default:
1656 abort ();
1660 /* Examine the argument and return set number of register required in each
1661 class. Return 0 ifif parameter should be passed in memory. */
1662 static int
1663 examine_argument (mode, type, in_return, int_nregs, sse_nregs)
1664 enum machine_mode mode;
1665 tree type;
1666 int *int_nregs, *sse_nregs;
1667 int in_return;
1669 enum x86_64_reg_class class[MAX_CLASSES];
1670 int n = classify_argument (mode, type, class, 0);
1672 *int_nregs = 0;
1673 *sse_nregs = 0;
1674 if (!n)
1675 return 0;
1676 for (n--; n >= 0; n--)
1677 switch (class[n])
1679 case X86_64_INTEGER_CLASS:
1680 case X86_64_INTEGERSI_CLASS:
1681 (*int_nregs)++;
1682 break;
1683 case X86_64_SSE_CLASS:
1684 case X86_64_SSESF_CLASS:
1685 case X86_64_SSEDF_CLASS:
1686 (*sse_nregs)++;
1687 break;
1688 case X86_64_NO_CLASS:
1689 case X86_64_SSEUP_CLASS:
1690 break;
1691 case X86_64_X87_CLASS:
1692 case X86_64_X87UP_CLASS:
1693 if (!in_return)
1694 return 0;
1695 break;
1696 case X86_64_MEMORY_CLASS:
1697 abort ();
1699 return 1;
1701 /* Construct container for the argument used by GCC interface. See
1702 FUNCTION_ARG for the detailed description. */
1703 static rtx
1704 construct_container (mode, type, in_return, nintregs, nsseregs, intreg, sse_regno)
1705 enum machine_mode mode;
1706 tree type;
1707 int in_return;
1708 int nintregs, nsseregs;
1709 int *intreg, sse_regno;
1711 enum machine_mode tmpmode;
1712 int bytes =
1713 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1714 enum x86_64_reg_class class[MAX_CLASSES];
1715 int n;
1716 int i;
1717 int nexps = 0;
1718 int needed_sseregs, needed_intregs;
1719 rtx exp[MAX_CLASSES];
1720 rtx ret;
1722 n = classify_argument (mode, type, class, 0);
1723 if (TARGET_DEBUG_ARG)
1725 if (!n)
1726 fprintf (stderr, "Memory class\n");
1727 else
1729 fprintf (stderr, "Classes:");
1730 for (i = 0; i < n; i++)
1732 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
1734 fprintf (stderr, "\n");
1737 if (!n)
1738 return NULL;
1739 if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs))
1740 return NULL;
1741 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
1742 return NULL;
1744 /* First construct simple cases. Avoid SCmode, since we want to use
1745 single register to pass this type. */
1746 if (n == 1 && mode != SCmode)
1747 switch (class[0])
1749 case X86_64_INTEGER_CLASS:
1750 case X86_64_INTEGERSI_CLASS:
1751 return gen_rtx_REG (mode, intreg[0]);
1752 case X86_64_SSE_CLASS:
1753 case X86_64_SSESF_CLASS:
1754 case X86_64_SSEDF_CLASS:
1755 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
1756 case X86_64_X87_CLASS:
1757 return gen_rtx_REG (mode, FIRST_STACK_REG);
1758 case X86_64_NO_CLASS:
1759 /* Zero sized array, struct or class. */
1760 return NULL;
1761 default:
1762 abort ();
1764 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS)
1765 return gen_rtx_REG (TImode, SSE_REGNO (sse_regno));
1766 if (n == 2
1767 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
1768 return gen_rtx_REG (TFmode, FIRST_STACK_REG);
1769 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
1770 && class[1] == X86_64_INTEGER_CLASS
1771 && (mode == CDImode || mode == TImode)
1772 && intreg[0] + 1 == intreg[1])
1773 return gen_rtx_REG (mode, intreg[0]);
1774 if (n == 4
1775 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS
1776 && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS)
1777 return gen_rtx_REG (TCmode, FIRST_STACK_REG);
1779 /* Otherwise figure out the entries of the PARALLEL. */
1780 for (i = 0; i < n; i++)
1782 switch (class[i])
1784 case X86_64_NO_CLASS:
1785 break;
1786 case X86_64_INTEGER_CLASS:
1787 case X86_64_INTEGERSI_CLASS:
1788 /* Merge TImodes on aligned occassions here too. */
1789 if (i * 8 + 8 > bytes)
1790 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
1791 else if (class[i] == X86_64_INTEGERSI_CLASS)
1792 tmpmode = SImode;
1793 else
1794 tmpmode = DImode;
1795 /* We've requested 24 bytes we don't have mode for. Use DImode. */
1796 if (tmpmode == BLKmode)
1797 tmpmode = DImode;
1798 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
1799 gen_rtx_REG (tmpmode, *intreg),
1800 GEN_INT (i*8));
1801 intreg++;
1802 break;
1803 case X86_64_SSESF_CLASS:
1804 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
1805 gen_rtx_REG (SFmode,
1806 SSE_REGNO (sse_regno)),
1807 GEN_INT (i*8));
1808 sse_regno++;
1809 break;
1810 case X86_64_SSEDF_CLASS:
1811 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
1812 gen_rtx_REG (DFmode,
1813 SSE_REGNO (sse_regno)),
1814 GEN_INT (i*8));
1815 sse_regno++;
1816 break;
1817 case X86_64_SSE_CLASS:
1818 if (i < n && class[i + 1] == X86_64_SSEUP_CLASS)
1819 tmpmode = TImode, i++;
1820 else
1821 tmpmode = DImode;
1822 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
1823 gen_rtx_REG (tmpmode,
1824 SSE_REGNO (sse_regno)),
1825 GEN_INT (i*8));
1826 sse_regno++;
1827 break;
1828 default:
1829 abort ();
1832 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
1833 for (i = 0; i < nexps; i++)
1834 XVECEXP (ret, 0, i) = exp [i];
1835 return ret;
1838 /* Update the data in CUM to advance over an argument
1839 of mode MODE and data type TYPE.
1840 (TYPE is null for libcalls where that information may not be available.) */
1842 void
1843 function_arg_advance (cum, mode, type, named)
1844 CUMULATIVE_ARGS *cum; /* current arg information */
1845 enum machine_mode mode; /* current arg mode */
1846 tree type; /* type of the argument or 0 if lib support */
1847 int named; /* whether or not the argument was named */
1849 int bytes =
1850 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1851 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1853 if (TARGET_DEBUG_ARG)
1854 fprintf (stderr,
1855 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
1856 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
1857 if (TARGET_64BIT)
1859 int int_nregs, sse_nregs;
1860 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
1861 cum->words += words;
1862 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
1864 cum->nregs -= int_nregs;
1865 cum->sse_nregs -= sse_nregs;
1866 cum->regno += int_nregs;
1867 cum->sse_regno += sse_nregs;
1869 else
1870 cum->words += words;
1872 else
1874 if (TARGET_SSE && mode == TImode)
1876 cum->sse_words += words;
1877 cum->sse_nregs -= 1;
1878 cum->sse_regno += 1;
1879 if (cum->sse_nregs <= 0)
1881 cum->sse_nregs = 0;
1882 cum->sse_regno = 0;
1885 else
1887 cum->words += words;
1888 cum->nregs -= words;
1889 cum->regno += words;
1891 if (cum->nregs <= 0)
1893 cum->nregs = 0;
1894 cum->regno = 0;
1898 return;
1901 /* Define where to put the arguments to a function.
1902 Value is zero to push the argument on the stack,
1903 or a hard register in which to store the argument.
1905 MODE is the argument's machine mode.
1906 TYPE is the data type of the argument (as a tree).
1907 This is null for libcalls where that information may
1908 not be available.
1909 CUM is a variable of type CUMULATIVE_ARGS which gives info about
1910 the preceding args and about the function being called.
1911 NAMED is nonzero if this argument is a named parameter
1912 (otherwise it is an extra parameter matching an ellipsis). */
1914 struct rtx_def *
1915 function_arg (cum, mode, type, named)
1916 CUMULATIVE_ARGS *cum; /* current arg information */
1917 enum machine_mode mode; /* current arg mode */
1918 tree type; /* type of the argument or 0 if lib support */
1919 int named; /* != 0 for normal args, == 0 for ... args */
1921 rtx ret = NULL_RTX;
1922 int bytes =
1923 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1924 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1926 /* Handle an hidden AL argument containing number of registers for varargs
1927 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
1928 any AL settings. */
1929 if (mode == VOIDmode)
1931 if (TARGET_64BIT)
1932 return GEN_INT (cum->maybe_vaarg
1933 ? (cum->sse_nregs < 0
1934 ? SSE_REGPARM_MAX
1935 : cum->sse_regno)
1936 : -1);
1937 else
1938 return constm1_rtx;
1940 if (TARGET_64BIT)
1941 ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs,
1942 &x86_64_int_parameter_registers [cum->regno],
1943 cum->sse_regno);
1944 else
1945 switch (mode)
1947 /* For now, pass fp/complex values on the stack. */
1948 default:
1949 break;
1951 case BLKmode:
1952 case DImode:
1953 case SImode:
1954 case HImode:
1955 case QImode:
1956 if (words <= cum->nregs)
1957 ret = gen_rtx_REG (mode, cum->regno);
1958 break;
1959 case TImode:
1960 if (cum->sse_nregs)
1961 ret = gen_rtx_REG (mode, cum->sse_regno);
1962 break;
1965 if (TARGET_DEBUG_ARG)
1967 fprintf (stderr,
1968 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d",
1969 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
1971 if (ret)
1972 fprintf (stderr, ", reg=%%e%s", reg_names[ REGNO(ret) ]);
1973 else
1974 fprintf (stderr, ", stack");
1976 fprintf (stderr, " )\n");
1979 return ret;
1982 /* Gives the alignment boundary, in bits, of an argument with the specified mode
1983 and type. */
1986 ix86_function_arg_boundary (mode, type)
1987 enum machine_mode mode;
1988 tree type;
1990 int align;
1991 if (!TARGET_64BIT)
1992 return PARM_BOUNDARY;
1993 if (type)
1994 align = TYPE_ALIGN (type);
1995 else
1996 align = GET_MODE_ALIGNMENT (mode);
1997 if (align < PARM_BOUNDARY)
1998 align = PARM_BOUNDARY;
1999 if (align > 128)
2000 align = 128;
2001 return align;
2004 /* Return true if N is a possible register number of function value. */
2005 bool
2006 ix86_function_value_regno_p (regno)
2007 int regno;
2009 if (!TARGET_64BIT)
2011 return ((regno) == 0
2012 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
2013 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
2015 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
2016 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
2017 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
2020 /* Define how to find the value returned by a function.
2021 VALTYPE is the data type of the value (as a tree).
2022 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2023 otherwise, FUNC is 0. */
2025 ix86_function_value (valtype)
2026 tree valtype;
2028 if (TARGET_64BIT)
2030 rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1,
2031 REGPARM_MAX, SSE_REGPARM_MAX,
2032 x86_64_int_return_registers, 0);
2033 /* For zero sized structures, construct_continer return NULL, but we need
2034 to keep rest of compiler happy by returning meaningfull value. */
2035 if (!ret)
2036 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
2037 return ret;
2039 else
2040 return gen_rtx_REG (TYPE_MODE (valtype), VALUE_REGNO (TYPE_MODE (valtype)));
2043 /* Return false ifif type is returned in memory. */
2045 ix86_return_in_memory (type)
2046 tree type;
2048 int needed_intregs, needed_sseregs;
2049 if (TARGET_64BIT)
2051 return !examine_argument (TYPE_MODE (type), type, 1,
2052 &needed_intregs, &needed_sseregs);
2054 else
2056 if (TYPE_MODE (type) == BLKmode
2057 || (VECTOR_MODE_P (TYPE_MODE (type))
2058 && int_size_in_bytes (type) == 8)
2059 || (int_size_in_bytes (type) > 12 && TYPE_MODE (type) != TImode
2060 && TYPE_MODE (type) != TFmode
2061 && !VECTOR_MODE_P (TYPE_MODE (type))))
2062 return 1;
2063 return 0;
2067 /* Define how to find the value returned by a library function
2068 assuming the value has mode MODE. */
2070 ix86_libcall_value (mode)
2071 enum machine_mode mode;
2073 if (TARGET_64BIT)
2075 switch (mode)
2077 case SFmode:
2078 case SCmode:
2079 case DFmode:
2080 case DCmode:
2081 return gen_rtx_REG (mode, FIRST_SSE_REG);
2082 case TFmode:
2083 case TCmode:
2084 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
2085 default:
2086 return gen_rtx_REG (mode, 0);
2089 else
2090 return gen_rtx_REG (mode, VALUE_REGNO (mode));
2093 /* Create the va_list data type. */
2095 tree
2096 ix86_build_va_list ()
2098 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
2100 /* For i386 we use plain pointer to argument area. */
2101 if (!TARGET_64BIT)
2102 return build_pointer_type (char_type_node);
2104 record = make_lang_type (RECORD_TYPE);
2105 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
2107 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
2108 unsigned_type_node);
2109 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
2110 unsigned_type_node);
2111 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
2112 ptr_type_node);
2113 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
2114 ptr_type_node);
2116 DECL_FIELD_CONTEXT (f_gpr) = record;
2117 DECL_FIELD_CONTEXT (f_fpr) = record;
2118 DECL_FIELD_CONTEXT (f_ovf) = record;
2119 DECL_FIELD_CONTEXT (f_sav) = record;
2121 TREE_CHAIN (record) = type_decl;
2122 TYPE_NAME (record) = type_decl;
2123 TYPE_FIELDS (record) = f_gpr;
2124 TREE_CHAIN (f_gpr) = f_fpr;
2125 TREE_CHAIN (f_fpr) = f_ovf;
2126 TREE_CHAIN (f_ovf) = f_sav;
2128 layout_type (record);
2130 /* The correct type is an array type of one element. */
2131 return build_array_type (record, build_index_type (size_zero_node));
2134 /* Perform any needed actions needed for a function that is receiving a
2135 variable number of arguments.
2137 CUM is as above.
2139 MODE and TYPE are the mode and type of the current parameter.
2141 PRETEND_SIZE is a variable that should be set to the amount of stack
2142 that must be pushed by the prolog to pretend that our caller pushed
2145 Normally, this macro will push all remaining incoming registers on the
2146 stack and set PRETEND_SIZE to the length of the registers pushed. */
2148 void
2149 ix86_setup_incoming_varargs (cum, mode, type, pretend_size, no_rtl)
2150 CUMULATIVE_ARGS *cum;
2151 enum machine_mode mode;
2152 tree type;
2153 int *pretend_size ATTRIBUTE_UNUSED;
2154 int no_rtl;
2157 CUMULATIVE_ARGS next_cum;
2158 rtx save_area = NULL_RTX, mem;
2159 rtx label;
2160 rtx label_ref;
2161 rtx tmp_reg;
2162 rtx nsse_reg;
2163 int set;
2164 tree fntype;
2165 int stdarg_p;
2166 int i;
2168 if (!TARGET_64BIT)
2169 return;
2171 /* Indicate to allocate space on the stack for varargs save area. */
2172 ix86_save_varrargs_registers = 1;
2174 fntype = TREE_TYPE (current_function_decl);
2175 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
2176 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
2177 != void_type_node));
2179 /* For varargs, we do not want to skip the dummy va_dcl argument.
2180 For stdargs, we do want to skip the last named argument. */
2181 next_cum = *cum;
2182 if (stdarg_p)
2183 function_arg_advance (&next_cum, mode, type, 1);
2185 if (!no_rtl)
2186 save_area = frame_pointer_rtx;
2188 set = get_varargs_alias_set ();
2190 for (i = next_cum.regno; i < ix86_regparm; i++)
2192 mem = gen_rtx_MEM (Pmode,
2193 plus_constant (save_area, i * UNITS_PER_WORD));
2194 set_mem_alias_set (mem, set);
2195 emit_move_insn (mem, gen_rtx_REG (Pmode,
2196 x86_64_int_parameter_registers[i]));
2199 if (next_cum.sse_nregs)
2201 /* Now emit code to save SSE registers. The AX parameter contains number
2202 of SSE parameter regsiters used to call this function. We use
2203 sse_prologue_save insn template that produces computed jump across
2204 SSE saves. We need some preparation work to get this working. */
2206 label = gen_label_rtx ();
2207 label_ref = gen_rtx_LABEL_REF (Pmode, label);
2209 /* Compute address to jump to :
2210 label - 5*eax + nnamed_sse_arguments*5 */
2211 tmp_reg = gen_reg_rtx (Pmode);
2212 nsse_reg = gen_reg_rtx (Pmode);
2213 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
2214 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2215 gen_rtx_MULT (VOIDmode, nsse_reg,
2216 GEN_INT (4))));
2217 if (next_cum.sse_regno)
2218 emit_move_insn
2219 (nsse_reg,
2220 gen_rtx_CONST (DImode,
2221 gen_rtx_PLUS (DImode,
2222 label_ref,
2223 GEN_INT (next_cum.sse_regno * 4))));
2224 else
2225 emit_move_insn (nsse_reg, label_ref);
2226 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
2228 /* Compute address of memory block we save into. We always use pointer
2229 pointing 127 bytes after first byte to store - this is needed to keep
2230 instruction size limited by 4 bytes. */
2231 tmp_reg = gen_reg_rtx (Pmode);
2232 emit_insn (gen_rtx_SET(VOIDmode, tmp_reg,
2233 plus_constant (save_area, 8 * REGPARM_MAX + 127)));
2234 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
2235 set_mem_alias_set (mem, set);
2237 /* And finally do the dirty job! */
2238 emit_insn (gen_sse_prologue_save (mem, nsse_reg, GEN_INT (next_cum.sse_regno),
2239 label));
2244 /* Implement va_start. */
2246 void
2247 ix86_va_start (stdarg_p, valist, nextarg)
2248 int stdarg_p;
2249 tree valist;
2250 rtx nextarg;
2252 HOST_WIDE_INT words, n_gpr, n_fpr;
2253 tree f_gpr, f_fpr, f_ovf, f_sav;
2254 tree gpr, fpr, ovf, sav, t;
2256 /* Only 64bit target needs something special. */
2257 if (!TARGET_64BIT)
2259 std_expand_builtin_va_start (stdarg_p, valist, nextarg);
2260 return;
2263 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2264 f_fpr = TREE_CHAIN (f_gpr);
2265 f_ovf = TREE_CHAIN (f_fpr);
2266 f_sav = TREE_CHAIN (f_ovf);
2268 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2269 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2270 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2271 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2272 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2274 /* Count number of gp and fp argument registers used. */
2275 words = current_function_args_info.words;
2276 n_gpr = current_function_args_info.regno;
2277 n_fpr = current_function_args_info.sse_regno;
2279 if (TARGET_DEBUG_ARG)
2280 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
2281 (int)words, (int)n_gpr, (int)n_fpr);
2283 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
2284 build_int_2 (n_gpr * 8, 0));
2285 TREE_SIDE_EFFECTS (t) = 1;
2286 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2288 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
2289 build_int_2 (n_fpr * 16 + 8*REGPARM_MAX, 0));
2290 TREE_SIDE_EFFECTS (t) = 1;
2291 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2293 /* Find the overflow area. */
2294 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
2295 if (words != 0)
2296 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
2297 build_int_2 (words * UNITS_PER_WORD, 0));
2298 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2299 TREE_SIDE_EFFECTS (t) = 1;
2300 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2302 /* Find the register save area.
2303 Prologue of the function save it right above stack frame. */
2304 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
2305 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
2306 TREE_SIDE_EFFECTS (t) = 1;
2307 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2310 /* Implement va_arg. */
2312 ix86_va_arg (valist, type)
2313 tree valist, type;
2315 static int intreg[6] = { 0, 1, 2, 3, 4, 5 };
2316 tree f_gpr, f_fpr, f_ovf, f_sav;
2317 tree gpr, fpr, ovf, sav, t;
2318 int indirect_p = 0, size, rsize;
2319 rtx lab_false, lab_over = NULL_RTX;
2320 rtx addr_rtx, r;
2321 rtx container;
2323 /* Only 64bit target needs something special. */
2324 if (!TARGET_64BIT)
2326 return std_expand_builtin_va_arg (valist, type);
2329 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2330 f_fpr = TREE_CHAIN (f_gpr);
2331 f_ovf = TREE_CHAIN (f_fpr);
2332 f_sav = TREE_CHAIN (f_ovf);
2334 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2335 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2336 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2337 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2338 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2340 size = int_size_in_bytes (type);
2341 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2343 container = construct_container (TYPE_MODE (type), type, 0,
2344 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
2346 * Pull the value out of the saved registers ...
2349 addr_rtx = gen_reg_rtx (Pmode);
2351 if (container)
2353 rtx int_addr_rtx, sse_addr_rtx;
2354 int needed_intregs, needed_sseregs;
2355 int need_temp;
2357 lab_over = gen_label_rtx ();
2358 lab_false = gen_label_rtx ();
2360 examine_argument (TYPE_MODE (type), type, 0,
2361 &needed_intregs, &needed_sseregs);
2364 need_temp = ((needed_intregs && TYPE_ALIGN (type) > 64)
2365 || TYPE_ALIGN (type) > 128);
2367 /* In case we are passing structure, verify that it is consetuctive block
2368 on the register save area. If not we need to do moves. */
2369 if (!need_temp && !REG_P (container))
2371 /* Verify that all registers are strictly consetuctive */
2372 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
2374 int i;
2376 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2378 rtx slot = XVECEXP (container, 0, i);
2379 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int)i
2380 || INTVAL (XEXP (slot, 1)) != i * 16)
2381 need_temp = 1;
2384 else
2386 int i;
2388 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2390 rtx slot = XVECEXP (container, 0, i);
2391 if (REGNO (XEXP (slot, 0)) != (unsigned int)i
2392 || INTVAL (XEXP (slot, 1)) != i * 8)
2393 need_temp = 1;
2397 if (!need_temp)
2399 int_addr_rtx = addr_rtx;
2400 sse_addr_rtx = addr_rtx;
2402 else
2404 int_addr_rtx = gen_reg_rtx (Pmode);
2405 sse_addr_rtx = gen_reg_rtx (Pmode);
2407 /* First ensure that we fit completely in registers. */
2408 if (needed_intregs)
2410 emit_cmp_and_jump_insns (expand_expr
2411 (gpr, NULL_RTX, SImode, EXPAND_NORMAL),
2412 GEN_INT ((REGPARM_MAX - needed_intregs +
2413 1) * 8), GE, const1_rtx, SImode,
2414 1, 1, lab_false);
2416 if (needed_sseregs)
2418 emit_cmp_and_jump_insns (expand_expr
2419 (fpr, NULL_RTX, SImode, EXPAND_NORMAL),
2420 GEN_INT ((SSE_REGPARM_MAX -
2421 needed_sseregs + 1) * 16 +
2422 REGPARM_MAX * 8), GE, const1_rtx,
2423 SImode, 1, 1, lab_false);
2426 /* Compute index to start of area used for integer regs. */
2427 if (needed_intregs)
2429 t = build (PLUS_EXPR, ptr_type_node, sav, gpr);
2430 r = expand_expr (t, int_addr_rtx, Pmode, EXPAND_NORMAL);
2431 if (r != int_addr_rtx)
2432 emit_move_insn (int_addr_rtx, r);
2434 if (needed_sseregs)
2436 t = build (PLUS_EXPR, ptr_type_node, sav, fpr);
2437 r = expand_expr (t, sse_addr_rtx, Pmode, EXPAND_NORMAL);
2438 if (r != sse_addr_rtx)
2439 emit_move_insn (sse_addr_rtx, r);
2441 if (need_temp)
2443 int i;
2444 rtx mem;
2446 mem = assign_temp (type, 0, 1, 0);
2447 set_mem_alias_set (mem, get_varargs_alias_set ());
2448 addr_rtx = XEXP (mem, 0);
2449 for (i = 0; i < XVECLEN (container, 0); i++)
2451 rtx slot = XVECEXP (container, 0, i);
2452 rtx reg = XEXP (slot, 0);
2453 enum machine_mode mode = GET_MODE (reg);
2454 rtx src_addr;
2455 rtx src_mem;
2456 int src_offset;
2457 rtx dest_mem;
2459 if (SSE_REGNO_P (REGNO (reg)))
2461 src_addr = sse_addr_rtx;
2462 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
2464 else
2466 src_addr = int_addr_rtx;
2467 src_offset = REGNO (reg) * 8;
2469 src_mem = gen_rtx_MEM (mode, src_addr);
2470 set_mem_alias_set (src_mem, get_varargs_alias_set ());
2471 src_mem = adjust_address (src_mem, mode, src_offset);
2472 dest_mem = adjust_address (mem, mode, INTVAL (XEXP (slot, 1)));
2473 PUT_MODE (dest_mem, mode);
2474 /* ??? Break out TImode moves from integer registers? */
2475 emit_move_insn (dest_mem, src_mem);
2479 if (needed_intregs)
2482 build (PLUS_EXPR, TREE_TYPE (gpr), gpr,
2483 build_int_2 (needed_intregs * 8, 0));
2484 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
2485 TREE_SIDE_EFFECTS (t) = 1;
2486 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2488 if (needed_sseregs)
2491 build (PLUS_EXPR, TREE_TYPE (fpr), fpr,
2492 build_int_2 (needed_sseregs * 16, 0));
2493 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
2494 TREE_SIDE_EFFECTS (t) = 1;
2495 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2498 emit_jump_insn (gen_jump (lab_over));
2499 emit_barrier ();
2500 emit_label (lab_false);
2503 /* ... otherwise out of the overflow area. */
2505 /* Care for on-stack alignment if needed. */
2506 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
2507 t = ovf;
2508 else
2510 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
2511 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf, build_int_2 (align - 1, 0));
2512 t = build (BIT_AND_EXPR, TREE_TYPE (t), t, build_int_2 (-align, -1));
2514 t = save_expr (t);
2516 r = expand_expr (t, addr_rtx, Pmode, EXPAND_NORMAL);
2517 if (r != addr_rtx)
2518 emit_move_insn (addr_rtx, r);
2521 build (PLUS_EXPR, TREE_TYPE (t), t,
2522 build_int_2 (rsize * UNITS_PER_WORD, 0));
2523 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2524 TREE_SIDE_EFFECTS (t) = 1;
2525 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2527 if (container)
2528 emit_label (lab_over);
2530 if (indirect_p)
2532 abort ();
2533 r = gen_rtx_MEM (Pmode, addr_rtx);
2534 set_mem_alias_set (r, get_varargs_alias_set ());
2535 emit_move_insn (addr_rtx, r);
2538 return addr_rtx;
2541 /* Return nonzero if OP is general operand representable on x86_64. */
2544 x86_64_general_operand (op, mode)
2545 rtx op;
2546 enum machine_mode mode;
2548 if (!TARGET_64BIT)
2549 return general_operand (op, mode);
2550 if (nonimmediate_operand (op, mode))
2551 return 1;
2552 return x86_64_sign_extended_value (op);
2555 /* Return nonzero if OP is general operand representable on x86_64
2556 as eighter sign extended or zero extended constant. */
2559 x86_64_szext_general_operand (op, mode)
2560 rtx op;
2561 enum machine_mode mode;
2563 if (!TARGET_64BIT)
2564 return general_operand (op, mode);
2565 if (nonimmediate_operand (op, mode))
2566 return 1;
2567 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
2570 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
2573 x86_64_nonmemory_operand (op, mode)
2574 rtx op;
2575 enum machine_mode mode;
2577 if (!TARGET_64BIT)
2578 return nonmemory_operand (op, mode);
2579 if (register_operand (op, mode))
2580 return 1;
2581 return x86_64_sign_extended_value (op);
2584 /* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
2587 x86_64_movabs_operand (op, mode)
2588 rtx op;
2589 enum machine_mode mode;
2591 if (!TARGET_64BIT || !flag_pic)
2592 return nonmemory_operand (op, mode);
2593 if (register_operand (op, mode) || x86_64_sign_extended_value (op))
2594 return 1;
2595 if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op))
2596 return 1;
2597 return 0;
2600 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
2603 x86_64_szext_nonmemory_operand (op, mode)
2604 rtx op;
2605 enum machine_mode mode;
2607 if (!TARGET_64BIT)
2608 return nonmemory_operand (op, mode);
2609 if (register_operand (op, mode))
2610 return 1;
2611 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
2614 /* Return nonzero if OP is immediate operand representable on x86_64. */
2617 x86_64_immediate_operand (op, mode)
2618 rtx op;
2619 enum machine_mode mode;
2621 if (!TARGET_64BIT)
2622 return immediate_operand (op, mode);
2623 return x86_64_sign_extended_value (op);
2626 /* Return nonzero if OP is immediate operand representable on x86_64. */
2629 x86_64_zext_immediate_operand (op, mode)
2630 rtx op;
2631 enum machine_mode mode ATTRIBUTE_UNUSED;
2633 return x86_64_zero_extended_value (op);
2636 /* Return nonzero if OP is (const_int 1), else return zero. */
2639 const_int_1_operand (op, mode)
2640 rtx op;
2641 enum machine_mode mode ATTRIBUTE_UNUSED;
2643 return (GET_CODE (op) == CONST_INT && INTVAL (op) == 1);
2646 /* Returns 1 if OP is either a symbol reference or a sum of a symbol
2647 reference and a constant. */
2650 symbolic_operand (op, mode)
2651 register rtx op;
2652 enum machine_mode mode ATTRIBUTE_UNUSED;
2654 switch (GET_CODE (op))
2656 case SYMBOL_REF:
2657 case LABEL_REF:
2658 return 1;
2660 case CONST:
2661 op = XEXP (op, 0);
2662 if (GET_CODE (op) == SYMBOL_REF
2663 || GET_CODE (op) == LABEL_REF
2664 || (GET_CODE (op) == UNSPEC
2665 && (XINT (op, 1) == 6
2666 || XINT (op, 1) == 7
2667 || XINT (op, 1) == 15)))
2668 return 1;
2669 if (GET_CODE (op) != PLUS
2670 || GET_CODE (XEXP (op, 1)) != CONST_INT)
2671 return 0;
2673 op = XEXP (op, 0);
2674 if (GET_CODE (op) == SYMBOL_REF
2675 || GET_CODE (op) == LABEL_REF)
2676 return 1;
2677 /* Only @GOTOFF gets offsets. */
2678 if (GET_CODE (op) != UNSPEC
2679 || XINT (op, 1) != 7)
2680 return 0;
2682 op = XVECEXP (op, 0, 0);
2683 if (GET_CODE (op) == SYMBOL_REF
2684 || GET_CODE (op) == LABEL_REF)
2685 return 1;
2686 return 0;
2688 default:
2689 return 0;
2693 /* Return true if the operand contains a @GOT or @GOTOFF reference. */
2696 pic_symbolic_operand (op, mode)
2697 register rtx op;
2698 enum machine_mode mode ATTRIBUTE_UNUSED;
2700 if (GET_CODE (op) != CONST)
2701 return 0;
2702 op = XEXP (op, 0);
2703 if (TARGET_64BIT)
2705 if (GET_CODE (XEXP (op, 0)) == UNSPEC)
2706 return 1;
2708 else
2710 if (GET_CODE (op) == UNSPEC)
2711 return 1;
2712 if (GET_CODE (op) != PLUS
2713 || GET_CODE (XEXP (op, 1)) != CONST_INT)
2714 return 0;
2715 op = XEXP (op, 0);
2716 if (GET_CODE (op) == UNSPEC)
2717 return 1;
2719 return 0;
2722 /* Return true if OP is a symbolic operand that resolves locally. */
2724 static int
2725 local_symbolic_operand (op, mode)
2726 rtx op;
2727 enum machine_mode mode ATTRIBUTE_UNUSED;
2729 if (GET_CODE (op) == LABEL_REF)
2730 return 1;
2732 if (GET_CODE (op) == CONST
2733 && GET_CODE (XEXP (op, 0)) == PLUS
2734 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
2735 op = XEXP (XEXP (op, 0), 0);
2737 if (GET_CODE (op) != SYMBOL_REF)
2738 return 0;
2740 /* These we've been told are local by varasm and encode_section_info
2741 respectively. */
2742 if (CONSTANT_POOL_ADDRESS_P (op) || SYMBOL_REF_FLAG (op))
2743 return 1;
2745 /* There is, however, a not insubstantial body of code in the rest of
2746 the compiler that assumes it can just stick the results of
2747 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */
2748 /* ??? This is a hack. Should update the body of the compiler to
2749 always create a DECL an invoke ENCODE_SECTION_INFO. */
2750 if (strncmp (XSTR (op, 0), internal_label_prefix,
2751 internal_label_prefix_len) == 0)
2752 return 1;
2754 return 0;
2757 /* Test for a valid operand for a call instruction. Don't allow the
2758 arg pointer register or virtual regs since they may decay into
2759 reg + const, which the patterns can't handle. */
2762 call_insn_operand (op, mode)
2763 rtx op;
2764 enum machine_mode mode ATTRIBUTE_UNUSED;
2766 /* Disallow indirect through a virtual register. This leads to
2767 compiler aborts when trying to eliminate them. */
2768 if (GET_CODE (op) == REG
2769 && (op == arg_pointer_rtx
2770 || op == frame_pointer_rtx
2771 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
2772 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
2773 return 0;
2775 /* Disallow `call 1234'. Due to varying assembler lameness this
2776 gets either rejected or translated to `call .+1234'. */
2777 if (GET_CODE (op) == CONST_INT)
2778 return 0;
2780 /* Explicitly allow SYMBOL_REF even if pic. */
2781 if (GET_CODE (op) == SYMBOL_REF)
2782 return 1;
2784 /* Half-pic doesn't allow anything but registers and constants.
2785 We've just taken care of the later. */
2786 if (HALF_PIC_P ())
2787 return register_operand (op, Pmode);
2789 /* Otherwise we can allow any general_operand in the address. */
2790 return general_operand (op, Pmode);
2794 constant_call_address_operand (op, mode)
2795 rtx op;
2796 enum machine_mode mode ATTRIBUTE_UNUSED;
2798 if (GET_CODE (op) == CONST
2799 && GET_CODE (XEXP (op, 0)) == PLUS
2800 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
2801 op = XEXP (XEXP (op, 0), 0);
2802 return GET_CODE (op) == SYMBOL_REF;
2805 /* Match exactly zero and one. */
2808 const0_operand (op, mode)
2809 register rtx op;
2810 enum machine_mode mode;
2812 return op == CONST0_RTX (mode);
2816 const1_operand (op, mode)
2817 register rtx op;
2818 enum machine_mode mode ATTRIBUTE_UNUSED;
2820 return op == const1_rtx;
2823 /* Match 2, 4, or 8. Used for leal multiplicands. */
2826 const248_operand (op, mode)
2827 register rtx op;
2828 enum machine_mode mode ATTRIBUTE_UNUSED;
2830 return (GET_CODE (op) == CONST_INT
2831 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
2834 /* True if this is a constant appropriate for an increment or decremenmt. */
2837 incdec_operand (op, mode)
2838 register rtx op;
2839 enum machine_mode mode ATTRIBUTE_UNUSED;
2841 /* On Pentium4, the inc and dec operations causes extra dependancy on flag
2842 registers, since carry flag is not set. */
2843 if (TARGET_PENTIUM4 && !optimize_size)
2844 return 0;
2845 return op == const1_rtx || op == constm1_rtx;
2848 /* Return nonzero if OP is acceptable as operand of DImode shift
2849 expander. */
2852 shiftdi_operand (op, mode)
2853 rtx op;
2854 enum machine_mode mode ATTRIBUTE_UNUSED;
2856 if (TARGET_64BIT)
2857 return nonimmediate_operand (op, mode);
2858 else
2859 return register_operand (op, mode);
2862 /* Return false if this is the stack pointer, or any other fake
2863 register eliminable to the stack pointer. Otherwise, this is
2864 a register operand.
2866 This is used to prevent esp from being used as an index reg.
2867 Which would only happen in pathological cases. */
2870 reg_no_sp_operand (op, mode)
2871 register rtx op;
2872 enum machine_mode mode;
2874 rtx t = op;
2875 if (GET_CODE (t) == SUBREG)
2876 t = SUBREG_REG (t);
2877 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
2878 return 0;
2880 return register_operand (op, mode);
2884 mmx_reg_operand (op, mode)
2885 register rtx op;
2886 enum machine_mode mode ATTRIBUTE_UNUSED;
2888 return MMX_REG_P (op);
2891 /* Return false if this is any eliminable register. Otherwise
2892 general_operand. */
2895 general_no_elim_operand (op, mode)
2896 register rtx op;
2897 enum machine_mode mode;
2899 rtx t = op;
2900 if (GET_CODE (t) == SUBREG)
2901 t = SUBREG_REG (t);
2902 if (t == arg_pointer_rtx || t == frame_pointer_rtx
2903 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
2904 || t == virtual_stack_dynamic_rtx)
2905 return 0;
2906 if (REG_P (t)
2907 && REGNO (t) >= FIRST_VIRTUAL_REGISTER
2908 && REGNO (t) <= LAST_VIRTUAL_REGISTER)
2909 return 0;
2911 return general_operand (op, mode);
2914 /* Return false if this is any eliminable register. Otherwise
2915 register_operand or const_int. */
2918 nonmemory_no_elim_operand (op, mode)
2919 register rtx op;
2920 enum machine_mode mode;
2922 rtx t = op;
2923 if (GET_CODE (t) == SUBREG)
2924 t = SUBREG_REG (t);
2925 if (t == arg_pointer_rtx || t == frame_pointer_rtx
2926 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
2927 || t == virtual_stack_dynamic_rtx)
2928 return 0;
2930 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
2933 /* Return true if op is a Q_REGS class register. */
2936 q_regs_operand (op, mode)
2937 register rtx op;
2938 enum machine_mode mode;
2940 if (mode != VOIDmode && GET_MODE (op) != mode)
2941 return 0;
2942 if (GET_CODE (op) == SUBREG)
2943 op = SUBREG_REG (op);
2944 return QI_REG_P (op);
2947 /* Return true if op is a NON_Q_REGS class register. */
2950 non_q_regs_operand (op, mode)
2951 register rtx op;
2952 enum machine_mode mode;
2954 if (mode != VOIDmode && GET_MODE (op) != mode)
2955 return 0;
2956 if (GET_CODE (op) == SUBREG)
2957 op = SUBREG_REG (op);
2958 return NON_QI_REG_P (op);
2961 /* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
2962 insns. */
2964 sse_comparison_operator (op, mode)
2965 rtx op;
2966 enum machine_mode mode ATTRIBUTE_UNUSED;
2968 enum rtx_code code = GET_CODE (op);
2969 switch (code)
2971 /* Operations supported directly. */
2972 case EQ:
2973 case LT:
2974 case LE:
2975 case UNORDERED:
2976 case NE:
2977 case UNGE:
2978 case UNGT:
2979 case ORDERED:
2980 return 1;
2981 /* These are equivalent to ones above in non-IEEE comparisons. */
2982 case UNEQ:
2983 case UNLT:
2984 case UNLE:
2985 case LTGT:
2986 case GE:
2987 case GT:
2988 return !TARGET_IEEE_FP;
2989 default:
2990 return 0;
2993 /* Return 1 if OP is a valid comparison operator in valid mode. */
2995 ix86_comparison_operator (op, mode)
2996 register rtx op;
2997 enum machine_mode mode;
2999 enum machine_mode inmode;
3000 enum rtx_code code = GET_CODE (op);
3001 if (mode != VOIDmode && GET_MODE (op) != mode)
3002 return 0;
3003 if (GET_RTX_CLASS (code) != '<')
3004 return 0;
3005 inmode = GET_MODE (XEXP (op, 0));
3007 if (inmode == CCFPmode || inmode == CCFPUmode)
3009 enum rtx_code second_code, bypass_code;
3010 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3011 return (bypass_code == NIL && second_code == NIL);
3013 switch (code)
3015 case EQ: case NE:
3016 return 1;
3017 case LT: case GE:
3018 if (inmode == CCmode || inmode == CCGCmode
3019 || inmode == CCGOCmode || inmode == CCNOmode)
3020 return 1;
3021 return 0;
3022 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
3023 if (inmode == CCmode)
3024 return 1;
3025 return 0;
3026 case GT: case LE:
3027 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
3028 return 1;
3029 return 0;
3030 default:
3031 return 0;
3035 /* Return 1 if OP is a comparison operator that can be issued by fcmov. */
3038 fcmov_comparison_operator (op, mode)
3039 register rtx op;
3040 enum machine_mode mode;
3042 enum machine_mode inmode;
3043 enum rtx_code code = GET_CODE (op);
3044 if (mode != VOIDmode && GET_MODE (op) != mode)
3045 return 0;
3046 if (GET_RTX_CLASS (code) != '<')
3047 return 0;
3048 inmode = GET_MODE (XEXP (op, 0));
3049 if (inmode == CCFPmode || inmode == CCFPUmode)
3051 enum rtx_code second_code, bypass_code;
3052 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3053 if (bypass_code != NIL || second_code != NIL)
3054 return 0;
3055 code = ix86_fp_compare_code_to_integer (code);
3057 /* i387 supports just limited amount of conditional codes. */
3058 switch (code)
3060 case LTU: case GTU: case LEU: case GEU:
3061 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
3062 return 1;
3063 return 0;
3064 case ORDERED: case UNORDERED:
3065 case EQ: case NE:
3066 return 1;
3067 default:
3068 return 0;
3072 /* Return 1 if OP is a binary operator that can be promoted to wider mode. */
3075 promotable_binary_operator (op, mode)
3076 register rtx op;
3077 enum machine_mode mode ATTRIBUTE_UNUSED;
3079 switch (GET_CODE (op))
3081 case MULT:
3082 /* Modern CPUs have same latency for HImode and SImode multiply,
3083 but 386 and 486 do HImode multiply faster. */
3084 return ix86_cpu > PROCESSOR_I486;
3085 case PLUS:
3086 case AND:
3087 case IOR:
3088 case XOR:
3089 case ASHIFT:
3090 return 1;
3091 default:
3092 return 0;
3096 /* Nearly general operand, but accept any const_double, since we wish
3097 to be able to drop them into memory rather than have them get pulled
3098 into registers. */
3101 cmp_fp_expander_operand (op, mode)
3102 register rtx op;
3103 enum machine_mode mode;
3105 if (mode != VOIDmode && mode != GET_MODE (op))
3106 return 0;
3107 if (GET_CODE (op) == CONST_DOUBLE)
3108 return 1;
3109 return general_operand (op, mode);
3112 /* Match an SI or HImode register for a zero_extract. */
3115 ext_register_operand (op, mode)
3116 register rtx op;
3117 enum machine_mode mode ATTRIBUTE_UNUSED;
3119 int regno;
3120 if ((!TARGET_64BIT || GET_MODE (op) != DImode)
3121 && GET_MODE (op) != SImode && GET_MODE (op) != HImode)
3122 return 0;
3124 if (!register_operand (op, VOIDmode))
3125 return 0;
3127 /* Be curefull to accept only registers having upper parts. */
3128 regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op));
3129 return (regno > LAST_VIRTUAL_REGISTER || regno < 4);
3132 /* Return 1 if this is a valid binary floating-point operation.
3133 OP is the expression matched, and MODE is its mode. */
3136 binary_fp_operator (op, mode)
3137 register rtx op;
3138 enum machine_mode mode;
3140 if (mode != VOIDmode && mode != GET_MODE (op))
3141 return 0;
3143 switch (GET_CODE (op))
3145 case PLUS:
3146 case MINUS:
3147 case MULT:
3148 case DIV:
3149 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
3151 default:
3152 return 0;
3157 mult_operator(op, mode)
3158 register rtx op;
3159 enum machine_mode mode ATTRIBUTE_UNUSED;
3161 return GET_CODE (op) == MULT;
3165 div_operator(op, mode)
3166 register rtx op;
3167 enum machine_mode mode ATTRIBUTE_UNUSED;
3169 return GET_CODE (op) == DIV;
3173 arith_or_logical_operator (op, mode)
3174 rtx op;
3175 enum machine_mode mode;
3177 return ((mode == VOIDmode || GET_MODE (op) == mode)
3178 && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
3179 || GET_RTX_CLASS (GET_CODE (op)) == '2'));
3182 /* Returns 1 if OP is memory operand with a displacement. */
3185 memory_displacement_operand (op, mode)
3186 register rtx op;
3187 enum machine_mode mode;
3189 struct ix86_address parts;
3191 if (! memory_operand (op, mode))
3192 return 0;
3194 if (! ix86_decompose_address (XEXP (op, 0), &parts))
3195 abort ();
3197 return parts.disp != NULL_RTX;
3200 /* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
3201 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
3203 ??? It seems likely that this will only work because cmpsi is an
3204 expander, and no actual insns use this. */
3207 cmpsi_operand (op, mode)
3208 rtx op;
3209 enum machine_mode mode;
3211 if (nonimmediate_operand (op, mode))
3212 return 1;
3214 if (GET_CODE (op) == AND
3215 && GET_MODE (op) == SImode
3216 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
3217 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
3218 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
3219 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
3220 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
3221 && GET_CODE (XEXP (op, 1)) == CONST_INT)
3222 return 1;
3224 return 0;
3227 /* Returns 1 if OP is memory operand that can not be represented by the
3228 modRM array. */
3231 long_memory_operand (op, mode)
3232 register rtx op;
3233 enum machine_mode mode;
3235 if (! memory_operand (op, mode))
3236 return 0;
3238 return memory_address_length (op) != 0;
3241 /* Return nonzero if the rtx is known aligned. */
3244 aligned_operand (op, mode)
3245 rtx op;
3246 enum machine_mode mode;
3248 struct ix86_address parts;
3250 if (!general_operand (op, mode))
3251 return 0;
3253 /* Registers and immediate operands are always "aligned". */
3254 if (GET_CODE (op) != MEM)
3255 return 1;
3257 /* Don't even try to do any aligned optimizations with volatiles. */
3258 if (MEM_VOLATILE_P (op))
3259 return 0;
3261 op = XEXP (op, 0);
3263 /* Pushes and pops are only valid on the stack pointer. */
3264 if (GET_CODE (op) == PRE_DEC
3265 || GET_CODE (op) == POST_INC)
3266 return 1;
3268 /* Decode the address. */
3269 if (! ix86_decompose_address (op, &parts))
3270 abort ();
3272 /* Look for some component that isn't known to be aligned. */
3273 if (parts.index)
3275 if (parts.scale < 4
3276 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
3277 return 0;
3279 if (parts.base)
3281 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
3282 return 0;
3284 if (parts.disp)
3286 if (GET_CODE (parts.disp) != CONST_INT
3287 || (INTVAL (parts.disp) & 3) != 0)
3288 return 0;
3291 /* Didn't find one -- this must be an aligned address. */
3292 return 1;
3295 /* Return true if the constant is something that can be loaded with
3296 a special instruction. Only handle 0.0 and 1.0; others are less
3297 worthwhile. */
3300 standard_80387_constant_p (x)
3301 rtx x;
3303 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
3304 return -1;
3305 /* Note that on the 80387, other constants, such as pi, that we should support
3306 too. On some machines, these are much slower to load as standard constant,
3307 than to load from doubles in memory. */
3308 if (x == CONST0_RTX (GET_MODE (x)))
3309 return 1;
3310 if (x == CONST1_RTX (GET_MODE (x)))
3311 return 2;
3312 return 0;
3315 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
3318 standard_sse_constant_p (x)
3319 rtx x;
3321 if (GET_CODE (x) != CONST_DOUBLE)
3322 return -1;
3323 return (x == CONST0_RTX (GET_MODE (x)));
3326 /* Returns 1 if OP contains a symbol reference */
3329 symbolic_reference_mentioned_p (op)
3330 rtx op;
3332 register const char *fmt;
3333 register int i;
3335 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
3336 return 1;
3338 fmt = GET_RTX_FORMAT (GET_CODE (op));
3339 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
3341 if (fmt[i] == 'E')
3343 register int j;
3345 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
3346 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
3347 return 1;
3350 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
3351 return 1;
3354 return 0;
3357 /* Return 1 if it is appropriate to emit `ret' instructions in the
3358 body of a function. Do this only if the epilogue is simple, needing a
3359 couple of insns. Prior to reloading, we can't tell how many registers
3360 must be saved, so return 0 then. Return 0 if there is no frame
3361 marker to de-allocate.
3363 If NON_SAVING_SETJMP is defined and true, then it is not possible
3364 for the epilogue to be simple, so return 0. This is a special case
3365 since NON_SAVING_SETJMP will not cause regs_ever_live to change
3366 until final, but jump_optimize may need to know sooner if a
3367 `return' is OK. */
3370 ix86_can_use_return_insn_p ()
3372 struct ix86_frame frame;
3374 #ifdef NON_SAVING_SETJMP
3375 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
3376 return 0;
3377 #endif
3378 #ifdef FUNCTION_BLOCK_PROFILER_EXIT
3379 if (profile_block_flag == 2)
3380 return 0;
3381 #endif
3383 if (! reload_completed || frame_pointer_needed)
3384 return 0;
3386 /* Don't allow more than 32 pop, since that's all we can do
3387 with one instruction. */
3388 if (current_function_pops_args
3389 && current_function_args_size >= 32768)
3390 return 0;
3392 ix86_compute_frame_layout (&frame);
3393 return frame.to_allocate == 0 && frame.nregs == 0;
3396 /* Return 1 if VALUE can be stored in the sign extended immediate field. */
3398 x86_64_sign_extended_value (value)
3399 rtx value;
3401 switch (GET_CODE (value))
3403 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
3404 to be at least 32 and this all acceptable constants are
3405 represented as CONST_INT. */
3406 case CONST_INT:
3407 if (HOST_BITS_PER_WIDE_INT == 32)
3408 return 1;
3409 else
3411 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode);
3412 return trunc_int_for_mode (val, SImode) == val;
3414 break;
3416 /* For certain code models, the symbolic references are known to fit. */
3417 case SYMBOL_REF:
3418 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL;
3420 /* For certain code models, the code is near as well. */
3421 case LABEL_REF:
3422 return ix86_cmodel != CM_LARGE && ix86_cmodel != CM_SMALL_PIC;
3424 /* We also may accept the offsetted memory references in certain special
3425 cases. */
3426 case CONST:
3427 if (GET_CODE (XEXP (value, 0)) == UNSPEC
3428 && XVECLEN (XEXP (value, 0), 0) == 1
3429 && XINT (XEXP (value, 0), 1) == 15)
3430 return 1;
3431 else if (GET_CODE (XEXP (value, 0)) == PLUS)
3433 rtx op1 = XEXP (XEXP (value, 0), 0);
3434 rtx op2 = XEXP (XEXP (value, 0), 1);
3435 HOST_WIDE_INT offset;
3437 if (ix86_cmodel == CM_LARGE)
3438 return 0;
3439 if (GET_CODE (op2) != CONST_INT)
3440 return 0;
3441 offset = trunc_int_for_mode (INTVAL (op2), DImode);
3442 switch (GET_CODE (op1))
3444 case SYMBOL_REF:
3445 /* For CM_SMALL assume that latest object is 1MB before
3446 end of 31bits boundary. We may also accept pretty
3447 large negative constants knowing that all objects are
3448 in the positive half of address space. */
3449 if (ix86_cmodel == CM_SMALL
3450 && offset < 1024*1024*1024
3451 && trunc_int_for_mode (offset, SImode) == offset)
3452 return 1;
3453 /* For CM_KERNEL we know that all object resist in the
3454 negative half of 32bits address space. We may not
3455 accept negative offsets, since they may be just off
3456 and we may accept pretty large possitive ones. */
3457 if (ix86_cmodel == CM_KERNEL
3458 && offset > 0
3459 && trunc_int_for_mode (offset, SImode) == offset)
3460 return 1;
3461 break;
3462 case LABEL_REF:
3463 /* These conditions are similar to SYMBOL_REF ones, just the
3464 constraints for code models differ. */
3465 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
3466 && offset < 1024*1024*1024
3467 && trunc_int_for_mode (offset, SImode) == offset)
3468 return 1;
3469 if (ix86_cmodel == CM_KERNEL
3470 && offset > 0
3471 && trunc_int_for_mode (offset, SImode) == offset)
3472 return 1;
3473 break;
3474 default:
3475 return 0;
3478 return 0;
3479 default:
3480 return 0;
3484 /* Return 1 if VALUE can be stored in the zero extended immediate field. */
3486 x86_64_zero_extended_value (value)
3487 rtx value;
3489 switch (GET_CODE (value))
3491 case CONST_DOUBLE:
3492 if (HOST_BITS_PER_WIDE_INT == 32)
3493 return (GET_MODE (value) == VOIDmode
3494 && !CONST_DOUBLE_HIGH (value));
3495 else
3496 return 0;
3497 case CONST_INT:
3498 if (HOST_BITS_PER_WIDE_INT == 32)
3499 return INTVAL (value) >= 0;
3500 else
3501 return !(INTVAL (value) & ~(HOST_WIDE_INT)0xffffffff);
3502 break;
3504 /* For certain code models, the symbolic references are known to fit. */
3505 case SYMBOL_REF:
3506 return ix86_cmodel == CM_SMALL;
3508 /* For certain code models, the code is near as well. */
3509 case LABEL_REF:
3510 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM;
3512 /* We also may accept the offsetted memory references in certain special
3513 cases. */
3514 case CONST:
3515 if (GET_CODE (XEXP (value, 0)) == PLUS)
3517 rtx op1 = XEXP (XEXP (value, 0), 0);
3518 rtx op2 = XEXP (XEXP (value, 0), 1);
3520 if (ix86_cmodel == CM_LARGE)
3521 return 0;
3522 switch (GET_CODE (op1))
3524 case SYMBOL_REF:
3525 return 0;
3526 /* For small code model we may accept pretty large possitive
3527 offsets, since one bit is available for free. Negative
3528 offsets are limited by the size of NULL pointer area
3529 specified by the ABI. */
3530 if (ix86_cmodel == CM_SMALL
3531 && GET_CODE (op2) == CONST_INT
3532 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
3533 && (trunc_int_for_mode (INTVAL (op2), SImode)
3534 == INTVAL (op2)))
3535 return 1;
3536 /* ??? For the kernel, we may accept adjustment of
3537 -0x10000000, since we know that it will just convert
3538 negative address space to possitive, but perhaps this
3539 is not worthwhile. */
3540 break;
3541 case LABEL_REF:
3542 /* These conditions are similar to SYMBOL_REF ones, just the
3543 constraints for code models differ. */
3544 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
3545 && GET_CODE (op2) == CONST_INT
3546 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
3547 && (trunc_int_for_mode (INTVAL (op2), SImode)
3548 == INTVAL (op2)))
3549 return 1;
3550 break;
3551 default:
3552 return 0;
3555 return 0;
3556 default:
3557 return 0;
3561 /* Value should be nonzero if functions must have frame pointers.
3562 Zero means the frame pointer need not be set up (and parms may
3563 be accessed via the stack pointer) in functions that seem suitable. */
3566 ix86_frame_pointer_required ()
3568 /* If we accessed previous frames, then the generated code expects
3569 to be able to access the saved ebp value in our frame. */
3570 if (cfun->machine->accesses_prev_frame)
3571 return 1;
3573 /* Several x86 os'es need a frame pointer for other reasons,
3574 usually pertaining to setjmp. */
3575 if (SUBTARGET_FRAME_POINTER_REQUIRED)
3576 return 1;
3578 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
3579 the frame pointer by default. Turn it back on now if we've not
3580 got a leaf function. */
3581 if (TARGET_OMIT_LEAF_FRAME_POINTER && ! leaf_function_p ())
3582 return 1;
3584 return 0;
3587 /* Record that the current function accesses previous call frames. */
3589 void
3590 ix86_setup_frame_addresses ()
3592 cfun->machine->accesses_prev_frame = 1;
3595 static char pic_label_name[32];
3597 /* This function generates code for -fpic that loads %ebx with
3598 the return address of the caller and then returns. */
3600 void
3601 ix86_asm_file_end (file)
3602 FILE *file;
3604 rtx xops[2];
3606 if (! TARGET_DEEP_BRANCH_PREDICTION || pic_label_name[0] == 0)
3607 return;
3609 /* ??? Binutils 2.10 and earlier has a linkonce elimination bug related
3610 to updating relocations to a section being discarded such that this
3611 doesn't work. Ought to detect this at configure time. */
3612 #if 0
3613 /* The trick here is to create a linkonce section containing the
3614 pic label thunk, but to refer to it with an internal label.
3615 Because the label is internal, we don't have inter-dso name
3616 binding issues on hosts that don't support ".hidden".
3618 In order to use these macros, however, we must create a fake
3619 function decl. */
3620 if (targetm.have_named_sections)
3622 tree decl = build_decl (FUNCTION_DECL,
3623 get_identifier ("i686.get_pc_thunk"),
3624 error_mark_node);
3625 DECL_ONE_ONLY (decl) = 1;
3626 UNIQUE_SECTION (decl, 0);
3627 named_section (decl, NULL);
3629 else
3630 #else
3631 text_section ();
3632 #endif
3634 /* This used to call ASM_DECLARE_FUNCTION_NAME() but since it's an
3635 internal (non-global) label that's being emitted, it didn't make
3636 sense to have .type information for local labels. This caused
3637 the SCO OpenServer 5.0.4 ELF assembler grief (why are you giving
3638 me debug info for a label that you're declaring non-global?) this
3639 was changed to call ASM_OUTPUT_LABEL() instead. */
3641 ASM_OUTPUT_LABEL (file, pic_label_name);
3643 xops[0] = pic_offset_table_rtx;
3644 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
3645 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
3646 output_asm_insn ("ret", xops);
3649 void
3650 load_pic_register ()
3652 rtx gotsym, pclab;
3654 if (TARGET_64BIT)
3655 abort();
3657 gotsym = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
3659 if (TARGET_DEEP_BRANCH_PREDICTION)
3661 if (! pic_label_name[0])
3662 ASM_GENERATE_INTERNAL_LABEL (pic_label_name, "LPR", 0);
3663 pclab = gen_rtx_MEM (QImode, gen_rtx_SYMBOL_REF (Pmode, pic_label_name));
3665 else
3667 pclab = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
3670 emit_insn (gen_prologue_get_pc (pic_offset_table_rtx, pclab));
3672 if (! TARGET_DEEP_BRANCH_PREDICTION)
3673 emit_insn (gen_popsi1 (pic_offset_table_rtx));
3675 emit_insn (gen_prologue_set_got (pic_offset_table_rtx, gotsym, pclab));
3678 /* Generate an "push" pattern for input ARG. */
3680 static rtx
3681 gen_push (arg)
3682 rtx arg;
3684 return gen_rtx_SET (VOIDmode,
3685 gen_rtx_MEM (Pmode,
3686 gen_rtx_PRE_DEC (Pmode,
3687 stack_pointer_rtx)),
3688 arg);
3691 /* Return 1 if we need to save REGNO. */
3692 static int
3693 ix86_save_reg (regno, maybe_eh_return)
3694 int regno;
3695 int maybe_eh_return;
3697 if (flag_pic
3698 && ! TARGET_64BIT
3699 && regno == PIC_OFFSET_TABLE_REGNUM
3700 && (current_function_uses_pic_offset_table
3701 || current_function_uses_const_pool
3702 || current_function_calls_eh_return))
3703 return 1;
3705 if (current_function_calls_eh_return && maybe_eh_return)
3707 unsigned i;
3708 for (i = 0; ; i++)
3710 unsigned test = EH_RETURN_DATA_REGNO(i);
3711 if (test == INVALID_REGNUM)
3712 break;
3713 if (test == (unsigned) regno)
3714 return 1;
3718 return (regs_ever_live[regno]
3719 && !call_used_regs[regno]
3720 && !fixed_regs[regno]
3721 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
3724 /* Return number of registers to be saved on the stack. */
3726 static int
3727 ix86_nsaved_regs ()
3729 int nregs = 0;
3730 int regno;
3732 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
3733 if (ix86_save_reg (regno, true))
3734 nregs++;
3735 return nregs;
3738 /* Return the offset between two registers, one to be eliminated, and the other
3739 its replacement, at the start of a routine. */
3741 HOST_WIDE_INT
3742 ix86_initial_elimination_offset (from, to)
3743 int from;
3744 int to;
3746 struct ix86_frame frame;
3747 ix86_compute_frame_layout (&frame);
3749 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3750 return frame.hard_frame_pointer_offset;
3751 else if (from == FRAME_POINTER_REGNUM
3752 && to == HARD_FRAME_POINTER_REGNUM)
3753 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
3754 else
3756 if (to != STACK_POINTER_REGNUM)
3757 abort ();
3758 else if (from == ARG_POINTER_REGNUM)
3759 return frame.stack_pointer_offset;
3760 else if (from != FRAME_POINTER_REGNUM)
3761 abort ();
3762 else
3763 return frame.stack_pointer_offset - frame.frame_pointer_offset;
3767 /* Fill structure ix86_frame about frame of currently computed function. */
3769 static void
3770 ix86_compute_frame_layout (frame)
3771 struct ix86_frame *frame;
3773 HOST_WIDE_INT total_size;
3774 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
3775 int offset;
3776 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
3777 HOST_WIDE_INT size = get_frame_size ();
3779 frame->nregs = ix86_nsaved_regs ();
3780 total_size = size;
3782 /* Skip return value and save base pointer. */
3783 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
3785 frame->hard_frame_pointer_offset = offset;
3787 /* Do some sanity checking of stack_alignment_needed and
3788 preferred_alignment, since i386 port is the only using those features
3789 that may break easilly. */
3791 if (size && !stack_alignment_needed)
3792 abort ();
3793 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
3794 abort ();
3795 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
3796 abort ();
3797 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
3798 abort ();
3800 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
3801 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
3803 /* Register save area */
3804 offset += frame->nregs * UNITS_PER_WORD;
3806 /* Va-arg area */
3807 if (ix86_save_varrargs_registers)
3809 offset += X86_64_VARARGS_SIZE;
3810 frame->va_arg_size = X86_64_VARARGS_SIZE;
3812 else
3813 frame->va_arg_size = 0;
3815 /* Align start of frame for local function. */
3816 frame->padding1 = ((offset + stack_alignment_needed - 1)
3817 & -stack_alignment_needed) - offset;
3819 offset += frame->padding1;
3821 /* Frame pointer points here. */
3822 frame->frame_pointer_offset = offset;
3824 offset += size;
3826 /* Add outgoing arguments area. */
3827 if (ACCUMULATE_OUTGOING_ARGS)
3829 offset += current_function_outgoing_args_size;
3830 frame->outgoing_arguments_size = current_function_outgoing_args_size;
3832 else
3833 frame->outgoing_arguments_size = 0;
3835 /* Align stack boundary. */
3836 frame->padding2 = ((offset + preferred_alignment - 1)
3837 & -preferred_alignment) - offset;
3839 offset += frame->padding2;
3841 /* We've reached end of stack frame. */
3842 frame->stack_pointer_offset = offset;
3844 /* Size prologue needs to allocate. */
3845 frame->to_allocate =
3846 (size + frame->padding1 + frame->padding2
3847 + frame->outgoing_arguments_size + frame->va_arg_size);
3849 if (TARGET_64BIT && TARGET_RED_ZONE && current_function_sp_is_unchanging
3850 && current_function_is_leaf)
3852 frame->red_zone_size = frame->to_allocate;
3853 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
3854 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
3856 else
3857 frame->red_zone_size = 0;
3858 frame->to_allocate -= frame->red_zone_size;
3859 frame->stack_pointer_offset -= frame->red_zone_size;
3860 #if 0
3861 fprintf (stderr, "nregs: %i\n", frame->nregs);
3862 fprintf (stderr, "size: %i\n", size);
3863 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
3864 fprintf (stderr, "padding1: %i\n", frame->padding1);
3865 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
3866 fprintf (stderr, "padding2: %i\n", frame->padding2);
3867 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
3868 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
3869 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
3870 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
3871 frame->hard_frame_pointer_offset);
3872 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
3873 #endif
3876 /* Emit code to save registers in the prologue. */
3878 static void
3879 ix86_emit_save_regs ()
3881 register int regno;
3882 rtx insn;
3884 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
3885 if (ix86_save_reg (regno, true))
3887 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
3888 RTX_FRAME_RELATED_P (insn) = 1;
3892 /* Emit code to save registers using MOV insns. First register
3893 is restored from POINTER + OFFSET. */
3894 static void
3895 ix86_emit_save_regs_using_mov (pointer, offset)
3896 rtx pointer;
3897 HOST_WIDE_INT offset;
3899 int regno;
3900 rtx insn;
3902 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
3903 if (ix86_save_reg (regno, true))
3905 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
3906 Pmode, offset),
3907 gen_rtx_REG (Pmode, regno));
3908 RTX_FRAME_RELATED_P (insn) = 1;
3909 offset += UNITS_PER_WORD;
3913 /* Expand the prologue into a bunch of separate insns. */
3915 void
3916 ix86_expand_prologue ()
3918 rtx insn;
3919 int pic_reg_used = (flag_pic && (current_function_uses_pic_offset_table
3920 || current_function_uses_const_pool)
3921 && !TARGET_64BIT);
3922 struct ix86_frame frame;
3923 int use_mov = 0;
3924 HOST_WIDE_INT allocate;
3926 if (!optimize_size)
3928 use_fast_prologue_epilogue
3929 = !expensive_function_p (FAST_PROLOGUE_INSN_COUNT);
3930 if (TARGET_PROLOGUE_USING_MOVE)
3931 use_mov = use_fast_prologue_epilogue;
3933 ix86_compute_frame_layout (&frame);
3935 /* Note: AT&T enter does NOT have reversed args. Enter is probably
3936 slower on all targets. Also sdb doesn't like it. */
3938 if (frame_pointer_needed)
3940 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
3941 RTX_FRAME_RELATED_P (insn) = 1;
3943 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
3944 RTX_FRAME_RELATED_P (insn) = 1;
3947 allocate = frame.to_allocate;
3948 /* In case we are dealing only with single register and empty frame,
3949 push is equivalent of the mov+add sequence. */
3950 if (allocate == 0 && frame.nregs <= 1)
3951 use_mov = 0;
3953 if (!use_mov)
3954 ix86_emit_save_regs ();
3955 else
3956 allocate += frame.nregs * UNITS_PER_WORD;
3958 if (allocate == 0)
3960 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
3962 insn = emit_insn (gen_pro_epilogue_adjust_stack
3963 (stack_pointer_rtx, stack_pointer_rtx,
3964 GEN_INT (-allocate)));
3965 RTX_FRAME_RELATED_P (insn) = 1;
3967 else
3969 /* ??? Is this only valid for Win32? */
3971 rtx arg0, sym;
3973 if (TARGET_64BIT)
3974 abort();
3976 arg0 = gen_rtx_REG (SImode, 0);
3977 emit_move_insn (arg0, GEN_INT (allocate));
3979 sym = gen_rtx_MEM (FUNCTION_MODE,
3980 gen_rtx_SYMBOL_REF (Pmode, "_alloca"));
3981 insn = emit_call_insn (gen_call (sym, const0_rtx, constm1_rtx));
3983 CALL_INSN_FUNCTION_USAGE (insn)
3984 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, arg0),
3985 CALL_INSN_FUNCTION_USAGE (insn));
3987 if (use_mov)
3989 if (!frame_pointer_needed || !frame.to_allocate)
3990 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
3991 else
3992 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
3993 -frame.nregs * UNITS_PER_WORD);
3996 #ifdef SUBTARGET_PROLOGUE
3997 SUBTARGET_PROLOGUE;
3998 #endif
4000 if (pic_reg_used)
4001 load_pic_register ();
4003 /* If we are profiling, make sure no instructions are scheduled before
4004 the call to mcount. However, if -fpic, the above call will have
4005 done that. */
4006 if ((profile_flag || profile_block_flag) && ! pic_reg_used)
4007 emit_insn (gen_blockage ());
4010 /* Emit code to restore saved registers using MOV insns. First register
4011 is restored from POINTER + OFFSET. */
4012 static void
4013 ix86_emit_restore_regs_using_mov (pointer, offset, maybe_eh_return)
4014 rtx pointer;
4015 int offset;
4016 int maybe_eh_return;
4018 int regno;
4020 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4021 if (ix86_save_reg (regno, maybe_eh_return))
4023 emit_move_insn (gen_rtx_REG (Pmode, regno),
4024 adjust_address (gen_rtx_MEM (Pmode, pointer),
4025 Pmode, offset));
4026 offset += UNITS_PER_WORD;
4030 /* Restore function stack, frame, and registers. */
4032 void
4033 ix86_expand_epilogue (style)
4034 int style;
4036 int regno;
4037 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
4038 struct ix86_frame frame;
4039 HOST_WIDE_INT offset;
4041 ix86_compute_frame_layout (&frame);
4043 /* Calculate start of saved registers relative to ebp. Special care
4044 must be taken for the normal return case of a function using
4045 eh_return: the eax and edx registers are marked as saved, but not
4046 restored along this path. */
4047 offset = frame.nregs;
4048 if (current_function_calls_eh_return && style != 2)
4049 offset -= 2;
4050 offset *= -UNITS_PER_WORD;
4052 #ifdef FUNCTION_BLOCK_PROFILER_EXIT
4053 if (profile_block_flag == 2)
4055 FUNCTION_BLOCK_PROFILER_EXIT;
4057 #endif
4059 /* If we're only restoring one register and sp is not valid then
4060 using a move instruction to restore the register since it's
4061 less work than reloading sp and popping the register.
4063 The default code result in stack adjustment using add/lea instruction,
4064 while this code results in LEAVE instruction (or discrete equivalent),
4065 so it is profitable in some other cases as well. Especially when there
4066 are no registers to restore. We also use this code when TARGET_USE_LEAVE
4067 and there is exactly one register to pop. This heruistic may need some
4068 tuning in future. */
4069 if ((!sp_valid && frame.nregs <= 1)
4070 || (TARGET_EPILOGUE_USING_MOVE
4071 && use_fast_prologue_epilogue
4072 && (frame.nregs > 1 || frame.to_allocate))
4073 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
4074 || (frame_pointer_needed && TARGET_USE_LEAVE
4075 && use_fast_prologue_epilogue && frame.nregs == 1)
4076 || current_function_calls_eh_return)
4078 /* Restore registers. We can use ebp or esp to address the memory
4079 locations. If both are available, default to ebp, since offsets
4080 are known to be small. Only exception is esp pointing directly to the
4081 end of block of saved registers, where we may simplify addressing
4082 mode. */
4084 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
4085 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
4086 frame.to_allocate, style == 2);
4087 else
4088 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
4089 offset, style == 2);
4091 /* eh_return epilogues need %ecx added to the stack pointer. */
4092 if (style == 2)
4094 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
4096 if (frame_pointer_needed)
4098 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
4099 tmp = plus_constant (tmp, UNITS_PER_WORD);
4100 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
4102 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
4103 emit_move_insn (hard_frame_pointer_rtx, tmp);
4105 emit_insn (gen_pro_epilogue_adjust_stack
4106 (stack_pointer_rtx, sa, const0_rtx));
4108 else
4110 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
4111 tmp = plus_constant (tmp, (frame.to_allocate
4112 + frame.nregs * UNITS_PER_WORD));
4113 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
4116 else if (!frame_pointer_needed)
4117 emit_insn (gen_pro_epilogue_adjust_stack
4118 (stack_pointer_rtx, stack_pointer_rtx,
4119 GEN_INT (frame.to_allocate
4120 + frame.nregs * UNITS_PER_WORD)));
4121 /* If not an i386, mov & pop is faster than "leave". */
4122 else if (TARGET_USE_LEAVE || optimize_size || !use_fast_prologue_epilogue)
4123 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4124 else
4126 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
4127 hard_frame_pointer_rtx,
4128 const0_rtx));
4129 if (TARGET_64BIT)
4130 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4131 else
4132 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4135 else
4137 /* First step is to deallocate the stack frame so that we can
4138 pop the registers. */
4139 if (!sp_valid)
4141 if (!frame_pointer_needed)
4142 abort ();
4143 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
4144 hard_frame_pointer_rtx,
4145 GEN_INT (offset)));
4147 else if (frame.to_allocate)
4148 emit_insn (gen_pro_epilogue_adjust_stack
4149 (stack_pointer_rtx, stack_pointer_rtx,
4150 GEN_INT (frame.to_allocate)));
4152 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4153 if (ix86_save_reg (regno, false))
4155 if (TARGET_64BIT)
4156 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
4157 else
4158 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
4160 if (frame_pointer_needed)
4162 /* Leave results in shorter depdendancy chains on CPUs that are
4163 able to grok it fast. */
4164 if (TARGET_USE_LEAVE)
4165 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4166 else if (TARGET_64BIT)
4167 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4168 else
4169 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4173 /* Sibcall epilogues don't want a return instruction. */
4174 if (style == 0)
4175 return;
4177 if (current_function_pops_args && current_function_args_size)
4179 rtx popc = GEN_INT (current_function_pops_args);
4181 /* i386 can only pop 64K bytes. If asked to pop more, pop
4182 return address, do explicit add, and jump indirectly to the
4183 caller. */
4185 if (current_function_pops_args >= 65536)
4187 rtx ecx = gen_rtx_REG (SImode, 2);
4189 /* There are is no "pascal" calling convention in 64bit ABI. */
4190 if (TARGET_64BIT)
4191 abort();
4193 emit_insn (gen_popsi1 (ecx));
4194 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
4195 emit_jump_insn (gen_return_indirect_internal (ecx));
4197 else
4198 emit_jump_insn (gen_return_pop_internal (popc));
4200 else
4201 emit_jump_insn (gen_return_internal ());
4204 /* Extract the parts of an RTL expression that is a valid memory address
4205 for an instruction. Return false if the structure of the address is
4206 grossly off. */
4208 static int
4209 ix86_decompose_address (addr, out)
4210 register rtx addr;
4211 struct ix86_address *out;
4213 rtx base = NULL_RTX;
4214 rtx index = NULL_RTX;
4215 rtx disp = NULL_RTX;
4216 HOST_WIDE_INT scale = 1;
4217 rtx scale_rtx = NULL_RTX;
4219 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
4220 base = addr;
4221 else if (GET_CODE (addr) == PLUS)
4223 rtx op0 = XEXP (addr, 0);
4224 rtx op1 = XEXP (addr, 1);
4225 enum rtx_code code0 = GET_CODE (op0);
4226 enum rtx_code code1 = GET_CODE (op1);
4228 if (code0 == REG || code0 == SUBREG)
4230 if (code1 == REG || code1 == SUBREG)
4231 index = op0, base = op1; /* index + base */
4232 else
4233 base = op0, disp = op1; /* base + displacement */
4235 else if (code0 == MULT)
4237 index = XEXP (op0, 0);
4238 scale_rtx = XEXP (op0, 1);
4239 if (code1 == REG || code1 == SUBREG)
4240 base = op1; /* index*scale + base */
4241 else
4242 disp = op1; /* index*scale + disp */
4244 else if (code0 == PLUS && GET_CODE (XEXP (op0, 0)) == MULT)
4246 index = XEXP (XEXP (op0, 0), 0); /* index*scale + base + disp */
4247 scale_rtx = XEXP (XEXP (op0, 0), 1);
4248 base = XEXP (op0, 1);
4249 disp = op1;
4251 else if (code0 == PLUS)
4253 index = XEXP (op0, 0); /* index + base + disp */
4254 base = XEXP (op0, 1);
4255 disp = op1;
4257 else
4258 return FALSE;
4260 else if (GET_CODE (addr) == MULT)
4262 index = XEXP (addr, 0); /* index*scale */
4263 scale_rtx = XEXP (addr, 1);
4265 else if (GET_CODE (addr) == ASHIFT)
4267 rtx tmp;
4269 /* We're called for lea too, which implements ashift on occasion. */
4270 index = XEXP (addr, 0);
4271 tmp = XEXP (addr, 1);
4272 if (GET_CODE (tmp) != CONST_INT)
4273 return FALSE;
4274 scale = INTVAL (tmp);
4275 if ((unsigned HOST_WIDE_INT) scale > 3)
4276 return FALSE;
4277 scale = 1 << scale;
4279 else
4280 disp = addr; /* displacement */
4282 /* Extract the integral value of scale. */
4283 if (scale_rtx)
4285 if (GET_CODE (scale_rtx) != CONST_INT)
4286 return FALSE;
4287 scale = INTVAL (scale_rtx);
4290 /* Allow arg pointer and stack pointer as index if there is not scaling */
4291 if (base && index && scale == 1
4292 && (index == arg_pointer_rtx || index == frame_pointer_rtx
4293 || index == stack_pointer_rtx))
4295 rtx tmp = base;
4296 base = index;
4297 index = tmp;
4300 /* Special case: %ebp cannot be encoded as a base without a displacement. */
4301 if ((base == hard_frame_pointer_rtx
4302 || base == frame_pointer_rtx
4303 || base == arg_pointer_rtx) && !disp)
4304 disp = const0_rtx;
4306 /* Special case: on K6, [%esi] makes the instruction vector decoded.
4307 Avoid this by transforming to [%esi+0]. */
4308 if (ix86_cpu == PROCESSOR_K6 && !optimize_size
4309 && base && !index && !disp
4310 && REG_P (base)
4311 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
4312 disp = const0_rtx;
4314 /* Special case: encode reg+reg instead of reg*2. */
4315 if (!base && index && scale && scale == 2)
4316 base = index, scale = 1;
4318 /* Special case: scaling cannot be encoded without base or displacement. */
4319 if (!base && !disp && index && scale != 1)
4320 disp = const0_rtx;
4322 out->base = base;
4323 out->index = index;
4324 out->disp = disp;
4325 out->scale = scale;
4327 return TRUE;
4330 /* Return cost of the memory address x.
4331 For i386, it is better to use a complex address than let gcc copy
4332 the address into a reg and make a new pseudo. But not if the address
4333 requires to two regs - that would mean more pseudos with longer
4334 lifetimes. */
4336 ix86_address_cost (x)
4337 rtx x;
4339 struct ix86_address parts;
4340 int cost = 1;
4342 if (!ix86_decompose_address (x, &parts))
4343 abort ();
4345 /* More complex memory references are better. */
4346 if (parts.disp && parts.disp != const0_rtx)
4347 cost--;
4349 /* Attempt to minimize number of registers in the address. */
4350 if ((parts.base
4351 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
4352 || (parts.index
4353 && (!REG_P (parts.index)
4354 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
4355 cost++;
4357 if (parts.base
4358 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
4359 && parts.index
4360 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
4361 && parts.base != parts.index)
4362 cost++;
4364 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
4365 since it's predecode logic can't detect the length of instructions
4366 and it degenerates to vector decoded. Increase cost of such
4367 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
4368 to split such addresses or even refuse such addresses at all.
4370 Following addressing modes are affected:
4371 [base+scale*index]
4372 [scale*index+disp]
4373 [base+index]
4375 The first and last case may be avoidable by explicitly coding the zero in
4376 memory address, but I don't have AMD-K6 machine handy to check this
4377 theory. */
4379 if (TARGET_K6
4380 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
4381 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
4382 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
4383 cost += 10;
4385 return cost;
4388 /* If X is a machine specific address (i.e. a symbol or label being
4389 referenced as a displacement from the GOT implemented using an
4390 UNSPEC), then return the base term. Otherwise return X. */
4393 ix86_find_base_term (x)
4394 rtx x;
4396 rtx term;
4398 if (TARGET_64BIT)
4400 if (GET_CODE (x) != CONST)
4401 return x;
4402 term = XEXP (x, 0);
4403 if (GET_CODE (term) == PLUS
4404 && (GET_CODE (XEXP (term, 1)) == CONST_INT
4405 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
4406 term = XEXP (term, 0);
4407 if (GET_CODE (term) != UNSPEC
4408 || XVECLEN (term, 0) != 1
4409 || XINT (term, 1) != 15)
4410 return x;
4412 term = XVECEXP (term, 0, 0);
4414 if (GET_CODE (term) != SYMBOL_REF
4415 && GET_CODE (term) != LABEL_REF)
4416 return x;
4418 return term;
4421 if (GET_CODE (x) != PLUS
4422 || XEXP (x, 0) != pic_offset_table_rtx
4423 || GET_CODE (XEXP (x, 1)) != CONST)
4424 return x;
4426 term = XEXP (XEXP (x, 1), 0);
4428 if (GET_CODE (term) == PLUS && GET_CODE (XEXP (term, 1)) == CONST_INT)
4429 term = XEXP (term, 0);
4431 if (GET_CODE (term) != UNSPEC
4432 || XVECLEN (term, 0) != 1
4433 || XINT (term, 1) != 7)
4434 return x;
4436 term = XVECEXP (term, 0, 0);
4438 if (GET_CODE (term) != SYMBOL_REF
4439 && GET_CODE (term) != LABEL_REF)
4440 return x;
4442 return term;
4445 /* Determine if a given CONST RTX is a valid memory displacement
4446 in PIC mode. */
4449 legitimate_pic_address_disp_p (disp)
4450 register rtx disp;
4452 /* In 64bit mode we can allow direct addresses of symbols and labels
4453 when they are not dynamic symbols. */
4454 if (TARGET_64BIT)
4456 rtx x = disp;
4457 if (GET_CODE (disp) == CONST)
4458 x = XEXP (disp, 0);
4459 /* ??? Handle PIC code models */
4460 if (GET_CODE (x) == PLUS
4461 && (GET_CODE (XEXP (x, 1)) == CONST_INT
4462 && ix86_cmodel == CM_SMALL_PIC
4463 && INTVAL (XEXP (x, 1)) < 1024*1024*1024
4464 && INTVAL (XEXP (x, 1)) > -1024*1024*1024))
4465 x = XEXP (x, 0);
4466 if (local_symbolic_operand (x, Pmode))
4467 return 1;
4469 if (GET_CODE (disp) != CONST)
4470 return 0;
4471 disp = XEXP (disp, 0);
4473 if (TARGET_64BIT)
4475 /* We are unsafe to allow PLUS expressions. This limit allowed distance
4476 of GOT tables. We should not need these anyway. */
4477 if (GET_CODE (disp) != UNSPEC
4478 || XVECLEN (disp, 0) != 1
4479 || XINT (disp, 1) != 15)
4480 return 0;
4482 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
4483 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
4484 return 0;
4485 return 1;
4488 if (GET_CODE (disp) == PLUS)
4490 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
4491 return 0;
4492 disp = XEXP (disp, 0);
4495 if (GET_CODE (disp) != UNSPEC
4496 || XVECLEN (disp, 0) != 1)
4497 return 0;
4499 /* Must be @GOT or @GOTOFF. */
4500 switch (XINT (disp, 1))
4502 case 6: /* @GOT */
4503 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
4505 case 7: /* @GOTOFF */
4506 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
4509 return 0;
4512 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
4513 memory address for an instruction. The MODE argument is the machine mode
4514 for the MEM expression that wants to use this address.
4516 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
4517 convert common non-canonical forms to canonical form so that they will
4518 be recognized. */
4521 legitimate_address_p (mode, addr, strict)
4522 enum machine_mode mode;
4523 register rtx addr;
4524 int strict;
4526 struct ix86_address parts;
4527 rtx base, index, disp;
4528 HOST_WIDE_INT scale;
4529 const char *reason = NULL;
4530 rtx reason_rtx = NULL_RTX;
4532 if (TARGET_DEBUG_ADDR)
4534 fprintf (stderr,
4535 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
4536 GET_MODE_NAME (mode), strict);
4537 debug_rtx (addr);
4540 if (! ix86_decompose_address (addr, &parts))
4542 reason = "decomposition failed";
4543 goto report_error;
4546 base = parts.base;
4547 index = parts.index;
4548 disp = parts.disp;
4549 scale = parts.scale;
4551 /* Validate base register.
4553 Don't allow SUBREG's here, it can lead to spill failures when the base
4554 is one word out of a two word structure, which is represented internally
4555 as a DImode int. */
4557 if (base)
4559 reason_rtx = base;
4561 if (GET_CODE (base) != REG)
4563 reason = "base is not a register";
4564 goto report_error;
4567 if (GET_MODE (base) != Pmode)
4569 reason = "base is not in Pmode";
4570 goto report_error;
4573 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (base))
4574 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (base)))
4576 reason = "base is not valid";
4577 goto report_error;
4581 /* Validate index register.
4583 Don't allow SUBREG's here, it can lead to spill failures when the index
4584 is one word out of a two word structure, which is represented internally
4585 as a DImode int. */
4587 if (index)
4589 reason_rtx = index;
4591 if (GET_CODE (index) != REG)
4593 reason = "index is not a register";
4594 goto report_error;
4597 if (GET_MODE (index) != Pmode)
4599 reason = "index is not in Pmode";
4600 goto report_error;
4603 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (index))
4604 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (index)))
4606 reason = "index is not valid";
4607 goto report_error;
4611 /* Validate scale factor. */
4612 if (scale != 1)
4614 reason_rtx = GEN_INT (scale);
4615 if (!index)
4617 reason = "scale without index";
4618 goto report_error;
4621 if (scale != 2 && scale != 4 && scale != 8)
4623 reason = "scale is not a valid multiplier";
4624 goto report_error;
4628 /* Validate displacement. */
4629 if (disp)
4631 reason_rtx = disp;
4633 if (!CONSTANT_ADDRESS_P (disp))
4635 reason = "displacement is not constant";
4636 goto report_error;
4639 if (TARGET_64BIT)
4641 if (!x86_64_sign_extended_value (disp))
4643 reason = "displacement is out of range";
4644 goto report_error;
4647 else
4649 if (GET_CODE (disp) == CONST_DOUBLE)
4651 reason = "displacement is a const_double";
4652 goto report_error;
4656 if (flag_pic && SYMBOLIC_CONST (disp))
4658 if (TARGET_64BIT && (index || base))
4660 reason = "non-constant pic memory reference";
4661 goto report_error;
4663 if (! legitimate_pic_address_disp_p (disp))
4665 reason = "displacement is an invalid pic construct";
4666 goto report_error;
4669 /* This code used to verify that a symbolic pic displacement
4670 includes the pic_offset_table_rtx register.
4672 While this is good idea, unfortunately these constructs may
4673 be created by "adds using lea" optimization for incorrect
4674 code like:
4676 int a;
4677 int foo(int i)
4679 return *(&a+i);
4682 This code is nonsensical, but results in addressing
4683 GOT table with pic_offset_table_rtx base. We can't
4684 just refuse it easilly, since it gets matched by
4685 "addsi3" pattern, that later gets split to lea in the
4686 case output register differs from input. While this
4687 can be handled by separate addsi pattern for this case
4688 that never results in lea, this seems to be easier and
4689 correct fix for crash to disable this test. */
4691 else if (HALF_PIC_P ())
4693 if (! HALF_PIC_ADDRESS_P (disp)
4694 || (base != NULL_RTX || index != NULL_RTX))
4696 reason = "displacement is an invalid half-pic reference";
4697 goto report_error;
4702 /* Everything looks valid. */
4703 if (TARGET_DEBUG_ADDR)
4704 fprintf (stderr, "Success.\n");
4705 return TRUE;
4707 report_error:
4708 if (TARGET_DEBUG_ADDR)
4710 fprintf (stderr, "Error: %s\n", reason);
4711 debug_rtx (reason_rtx);
4713 return FALSE;
4716 /* Return an unique alias set for the GOT. */
4718 static HOST_WIDE_INT
4719 ix86_GOT_alias_set ()
4721 static HOST_WIDE_INT set = -1;
4722 if (set == -1)
4723 set = new_alias_set ();
4724 return set;
4727 /* Return a legitimate reference for ORIG (an address) using the
4728 register REG. If REG is 0, a new pseudo is generated.
4730 There are two types of references that must be handled:
4732 1. Global data references must load the address from the GOT, via
4733 the PIC reg. An insn is emitted to do this load, and the reg is
4734 returned.
4736 2. Static data references, constant pool addresses, and code labels
4737 compute the address as an offset from the GOT, whose base is in
4738 the PIC reg. Static data objects have SYMBOL_REF_FLAG set to
4739 differentiate them from global data objects. The returned
4740 address is the PIC reg + an unspec constant.
4742 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
4743 reg also appears in the address. */
4746 legitimize_pic_address (orig, reg)
4747 rtx orig;
4748 rtx reg;
4750 rtx addr = orig;
4751 rtx new = orig;
4752 rtx base;
4754 if (local_symbolic_operand (addr, Pmode))
4756 /* In 64bit mode we can address such objects directly. */
4757 if (TARGET_64BIT)
4758 new = addr;
4759 else
4761 /* This symbol may be referenced via a displacement from the PIC
4762 base address (@GOTOFF). */
4764 current_function_uses_pic_offset_table = 1;
4765 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), 7);
4766 new = gen_rtx_CONST (Pmode, new);
4767 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
4769 if (reg != 0)
4771 emit_move_insn (reg, new);
4772 new = reg;
4776 else if (GET_CODE (addr) == SYMBOL_REF)
4778 if (TARGET_64BIT)
4780 current_function_uses_pic_offset_table = 1;
4781 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), 15);
4782 new = gen_rtx_CONST (Pmode, new);
4783 new = gen_rtx_MEM (Pmode, new);
4784 RTX_UNCHANGING_P (new) = 1;
4785 set_mem_alias_set (new, ix86_GOT_alias_set ());
4787 if (reg == 0)
4788 reg = gen_reg_rtx (Pmode);
4789 /* Use directly gen_movsi, otherwise the address is loaded
4790 into register for CSE. We don't want to CSE this addresses,
4791 instead we CSE addresses from the GOT table, so skip this. */
4792 emit_insn (gen_movsi (reg, new));
4793 new = reg;
4795 else
4797 /* This symbol must be referenced via a load from the
4798 Global Offset Table (@GOT). */
4800 current_function_uses_pic_offset_table = 1;
4801 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), 6);
4802 new = gen_rtx_CONST (Pmode, new);
4803 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
4804 new = gen_rtx_MEM (Pmode, new);
4805 RTX_UNCHANGING_P (new) = 1;
4806 set_mem_alias_set (new, ix86_GOT_alias_set ());
4808 if (reg == 0)
4809 reg = gen_reg_rtx (Pmode);
4810 emit_move_insn (reg, new);
4811 new = reg;
4814 else
4816 if (GET_CODE (addr) == CONST)
4818 addr = XEXP (addr, 0);
4819 if (GET_CODE (addr) == UNSPEC)
4821 /* Check that the unspec is one of the ones we generate? */
4823 else if (GET_CODE (addr) != PLUS)
4824 abort ();
4826 if (GET_CODE (addr) == PLUS)
4828 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
4830 /* Check first to see if this is a constant offset from a @GOTOFF
4831 symbol reference. */
4832 if (local_symbolic_operand (op0, Pmode)
4833 && GET_CODE (op1) == CONST_INT)
4835 if (!TARGET_64BIT)
4837 current_function_uses_pic_offset_table = 1;
4838 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0), 7);
4839 new = gen_rtx_PLUS (Pmode, new, op1);
4840 new = gen_rtx_CONST (Pmode, new);
4841 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
4843 if (reg != 0)
4845 emit_move_insn (reg, new);
4846 new = reg;
4849 else
4851 /* ??? We need to limit offsets here. */
4854 else
4856 base = legitimize_pic_address (XEXP (addr, 0), reg);
4857 new = legitimize_pic_address (XEXP (addr, 1),
4858 base == reg ? NULL_RTX : reg);
4860 if (GET_CODE (new) == CONST_INT)
4861 new = plus_constant (base, INTVAL (new));
4862 else
4864 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
4866 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
4867 new = XEXP (new, 1);
4869 new = gen_rtx_PLUS (Pmode, base, new);
4874 return new;
4877 /* Try machine-dependent ways of modifying an illegitimate address
4878 to be legitimate. If we find one, return the new, valid address.
4879 This macro is used in only one place: `memory_address' in explow.c.
4881 OLDX is the address as it was before break_out_memory_refs was called.
4882 In some cases it is useful to look at this to decide what needs to be done.
4884 MODE and WIN are passed so that this macro can use
4885 GO_IF_LEGITIMATE_ADDRESS.
4887 It is always safe for this macro to do nothing. It exists to recognize
4888 opportunities to optimize the output.
4890 For the 80386, we handle X+REG by loading X into a register R and
4891 using R+REG. R will go in a general reg and indexing will be used.
4892 However, if REG is a broken-out memory address or multiplication,
4893 nothing needs to be done because REG can certainly go in a general reg.
4895 When -fpic is used, special handling is needed for symbolic references.
4896 See comments by legitimize_pic_address in i386.c for details. */
4899 legitimize_address (x, oldx, mode)
4900 register rtx x;
4901 register rtx oldx ATTRIBUTE_UNUSED;
4902 enum machine_mode mode;
4904 int changed = 0;
4905 unsigned log;
4907 if (TARGET_DEBUG_ADDR)
4909 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
4910 GET_MODE_NAME (mode));
4911 debug_rtx (x);
4914 if (flag_pic && SYMBOLIC_CONST (x))
4915 return legitimize_pic_address (x, 0);
4917 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
4918 if (GET_CODE (x) == ASHIFT
4919 && GET_CODE (XEXP (x, 1)) == CONST_INT
4920 && (log = (unsigned)exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
4922 changed = 1;
4923 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
4924 GEN_INT (1 << log));
4927 if (GET_CODE (x) == PLUS)
4929 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
4931 if (GET_CODE (XEXP (x, 0)) == ASHIFT
4932 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
4933 && (log = (unsigned)exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
4935 changed = 1;
4936 XEXP (x, 0) = gen_rtx_MULT (Pmode,
4937 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
4938 GEN_INT (1 << log));
4941 if (GET_CODE (XEXP (x, 1)) == ASHIFT
4942 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
4943 && (log = (unsigned)exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
4945 changed = 1;
4946 XEXP (x, 1) = gen_rtx_MULT (Pmode,
4947 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
4948 GEN_INT (1 << log));
4951 /* Put multiply first if it isn't already. */
4952 if (GET_CODE (XEXP (x, 1)) == MULT)
4954 rtx tmp = XEXP (x, 0);
4955 XEXP (x, 0) = XEXP (x, 1);
4956 XEXP (x, 1) = tmp;
4957 changed = 1;
4960 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
4961 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
4962 created by virtual register instantiation, register elimination, and
4963 similar optimizations. */
4964 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
4966 changed = 1;
4967 x = gen_rtx_PLUS (Pmode,
4968 gen_rtx_PLUS (Pmode, XEXP (x, 0),
4969 XEXP (XEXP (x, 1), 0)),
4970 XEXP (XEXP (x, 1), 1));
4973 /* Canonicalize
4974 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
4975 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
4976 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
4977 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
4978 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
4979 && CONSTANT_P (XEXP (x, 1)))
4981 rtx constant;
4982 rtx other = NULL_RTX;
4984 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
4986 constant = XEXP (x, 1);
4987 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
4989 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
4991 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
4992 other = XEXP (x, 1);
4994 else
4995 constant = 0;
4997 if (constant)
4999 changed = 1;
5000 x = gen_rtx_PLUS (Pmode,
5001 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
5002 XEXP (XEXP (XEXP (x, 0), 1), 0)),
5003 plus_constant (other, INTVAL (constant)));
5007 if (changed && legitimate_address_p (mode, x, FALSE))
5008 return x;
5010 if (GET_CODE (XEXP (x, 0)) == MULT)
5012 changed = 1;
5013 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
5016 if (GET_CODE (XEXP (x, 1)) == MULT)
5018 changed = 1;
5019 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
5022 if (changed
5023 && GET_CODE (XEXP (x, 1)) == REG
5024 && GET_CODE (XEXP (x, 0)) == REG)
5025 return x;
5027 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
5029 changed = 1;
5030 x = legitimize_pic_address (x, 0);
5033 if (changed && legitimate_address_p (mode, x, FALSE))
5034 return x;
5036 if (GET_CODE (XEXP (x, 0)) == REG)
5038 register rtx temp = gen_reg_rtx (Pmode);
5039 register rtx val = force_operand (XEXP (x, 1), temp);
5040 if (val != temp)
5041 emit_move_insn (temp, val);
5043 XEXP (x, 1) = temp;
5044 return x;
5047 else if (GET_CODE (XEXP (x, 1)) == REG)
5049 register rtx temp = gen_reg_rtx (Pmode);
5050 register rtx val = force_operand (XEXP (x, 0), temp);
5051 if (val != temp)
5052 emit_move_insn (temp, val);
5054 XEXP (x, 0) = temp;
5055 return x;
5059 return x;
5062 /* Print an integer constant expression in assembler syntax. Addition
5063 and subtraction are the only arithmetic that may appear in these
5064 expressions. FILE is the stdio stream to write to, X is the rtx, and
5065 CODE is the operand print code from the output string. */
5067 static void
5068 output_pic_addr_const (file, x, code)
5069 FILE *file;
5070 rtx x;
5071 int code;
5073 char buf[256];
5075 switch (GET_CODE (x))
5077 case PC:
5078 if (flag_pic)
5079 putc ('.', file);
5080 else
5081 abort ();
5082 break;
5084 case SYMBOL_REF:
5085 assemble_name (file, XSTR (x, 0));
5086 if (code == 'P' && ! SYMBOL_REF_FLAG (x))
5087 fputs ("@PLT", file);
5088 break;
5090 case LABEL_REF:
5091 x = XEXP (x, 0);
5092 /* FALLTHRU */
5093 case CODE_LABEL:
5094 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
5095 assemble_name (asm_out_file, buf);
5096 break;
5098 case CONST_INT:
5099 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
5100 break;
5102 case CONST:
5103 /* This used to output parentheses around the expression,
5104 but that does not work on the 386 (either ATT or BSD assembler). */
5105 output_pic_addr_const (file, XEXP (x, 0), code);
5106 break;
5108 case CONST_DOUBLE:
5109 if (GET_MODE (x) == VOIDmode)
5111 /* We can use %d if the number is <32 bits and positive. */
5112 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
5113 fprintf (file, "0x%lx%08lx",
5114 (unsigned long) CONST_DOUBLE_HIGH (x),
5115 (unsigned long) CONST_DOUBLE_LOW (x));
5116 else
5117 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
5119 else
5120 /* We can't handle floating point constants;
5121 PRINT_OPERAND must handle them. */
5122 output_operand_lossage ("floating constant misused");
5123 break;
5125 case PLUS:
5126 /* Some assemblers need integer constants to appear first. */
5127 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
5129 output_pic_addr_const (file, XEXP (x, 0), code);
5130 putc ('+', file);
5131 output_pic_addr_const (file, XEXP (x, 1), code);
5133 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5135 output_pic_addr_const (file, XEXP (x, 1), code);
5136 putc ('+', file);
5137 output_pic_addr_const (file, XEXP (x, 0), code);
5139 else
5140 abort ();
5141 break;
5143 case MINUS:
5144 putc (ASSEMBLER_DIALECT ? '(' : '[', file);
5145 output_pic_addr_const (file, XEXP (x, 0), code);
5146 putc ('-', file);
5147 output_pic_addr_const (file, XEXP (x, 1), code);
5148 putc (ASSEMBLER_DIALECT ? ')' : ']', file);
5149 break;
5151 case UNSPEC:
5152 if (XVECLEN (x, 0) != 1)
5153 abort ();
5154 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
5155 switch (XINT (x, 1))
5157 case 6:
5158 fputs ("@GOT", file);
5159 break;
5160 case 7:
5161 fputs ("@GOTOFF", file);
5162 break;
5163 case 8:
5164 fputs ("@PLT", file);
5165 break;
5166 case 15:
5167 fputs ("@GOTPCREL(%RIP)", file);
5168 break;
5169 default:
5170 output_operand_lossage ("invalid UNSPEC as operand");
5171 break;
5173 break;
5175 default:
5176 output_operand_lossage ("invalid expression as operand");
5180 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
5181 We need to handle our special PIC relocations. */
5183 void
5184 i386_dwarf_output_addr_const (file, x)
5185 FILE *file;
5186 rtx x;
5188 #ifdef ASM_QUAD
5189 fprintf (file, "%s", TARGET_64BIT ? ASM_QUAD : INT_ASM_OP);
5190 #else
5191 if (TARGET_64BIT)
5192 abort ();
5193 fprintf (file, "%s", INT_ASM_OP);
5194 #endif
5195 if (flag_pic)
5196 output_pic_addr_const (file, x, '\0');
5197 else
5198 output_addr_const (file, x);
5199 fputc ('\n', file);
5202 /* In the name of slightly smaller debug output, and to cater to
5203 general assembler losage, recognize PIC+GOTOFF and turn it back
5204 into a direct symbol reference. */
5207 i386_simplify_dwarf_addr (orig_x)
5208 rtx orig_x;
5210 rtx x = orig_x;
5212 if (TARGET_64BIT)
5214 if (GET_CODE (x) != CONST
5215 || GET_CODE (XEXP (x, 0)) != UNSPEC
5216 || XINT (XEXP (x, 0), 1) != 15)
5217 return orig_x;
5218 return XVECEXP (XEXP (x, 0), 0, 0);
5221 if (GET_CODE (x) != PLUS
5222 || GET_CODE (XEXP (x, 0)) != REG
5223 || GET_CODE (XEXP (x, 1)) != CONST)
5224 return orig_x;
5226 x = XEXP (XEXP (x, 1), 0);
5227 if (GET_CODE (x) == UNSPEC
5228 && (XINT (x, 1) == 6
5229 || XINT (x, 1) == 7))
5230 return XVECEXP (x, 0, 0);
5232 if (GET_CODE (x) == PLUS
5233 && GET_CODE (XEXP (x, 0)) == UNSPEC
5234 && GET_CODE (XEXP (x, 1)) == CONST_INT
5235 && (XINT (XEXP (x, 0), 1) == 6
5236 || XINT (XEXP (x, 0), 1) == 7))
5237 return gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
5239 return orig_x;
5242 static void
5243 put_condition_code (code, mode, reverse, fp, file)
5244 enum rtx_code code;
5245 enum machine_mode mode;
5246 int reverse, fp;
5247 FILE *file;
5249 const char *suffix;
5251 if (mode == CCFPmode || mode == CCFPUmode)
5253 enum rtx_code second_code, bypass_code;
5254 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
5255 if (bypass_code != NIL || second_code != NIL)
5256 abort();
5257 code = ix86_fp_compare_code_to_integer (code);
5258 mode = CCmode;
5260 if (reverse)
5261 code = reverse_condition (code);
5263 switch (code)
5265 case EQ:
5266 suffix = "e";
5267 break;
5268 case NE:
5269 suffix = "ne";
5270 break;
5271 case GT:
5272 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
5273 abort ();
5274 suffix = "g";
5275 break;
5276 case GTU:
5277 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
5278 Those same assemblers have the same but opposite losage on cmov. */
5279 if (mode != CCmode)
5280 abort ();
5281 suffix = fp ? "nbe" : "a";
5282 break;
5283 case LT:
5284 if (mode == CCNOmode || mode == CCGOCmode)
5285 suffix = "s";
5286 else if (mode == CCmode || mode == CCGCmode)
5287 suffix = "l";
5288 else
5289 abort ();
5290 break;
5291 case LTU:
5292 if (mode != CCmode)
5293 abort ();
5294 suffix = "b";
5295 break;
5296 case GE:
5297 if (mode == CCNOmode || mode == CCGOCmode)
5298 suffix = "ns";
5299 else if (mode == CCmode || mode == CCGCmode)
5300 suffix = "ge";
5301 else
5302 abort ();
5303 break;
5304 case GEU:
5305 /* ??? As above. */
5306 if (mode != CCmode)
5307 abort ();
5308 suffix = fp ? "nb" : "ae";
5309 break;
5310 case LE:
5311 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
5312 abort ();
5313 suffix = "le";
5314 break;
5315 case LEU:
5316 if (mode != CCmode)
5317 abort ();
5318 suffix = "be";
5319 break;
5320 case UNORDERED:
5321 suffix = fp ? "u" : "p";
5322 break;
5323 case ORDERED:
5324 suffix = fp ? "nu" : "np";
5325 break;
5326 default:
5327 abort ();
5329 fputs (suffix, file);
5332 void
5333 print_reg (x, code, file)
5334 rtx x;
5335 int code;
5336 FILE *file;
5338 if (REGNO (x) == ARG_POINTER_REGNUM
5339 || REGNO (x) == FRAME_POINTER_REGNUM
5340 || REGNO (x) == FLAGS_REG
5341 || REGNO (x) == FPSR_REG)
5342 abort ();
5344 if (ASSEMBLER_DIALECT == 0 || USER_LABEL_PREFIX[0] == 0)
5345 putc ('%', file);
5347 if (code == 'w' || MMX_REG_P (x))
5348 code = 2;
5349 else if (code == 'b')
5350 code = 1;
5351 else if (code == 'k')
5352 code = 4;
5353 else if (code == 'q')
5354 code = 8;
5355 else if (code == 'y')
5356 code = 3;
5357 else if (code == 'h')
5358 code = 0;
5359 else
5360 code = GET_MODE_SIZE (GET_MODE (x));
5362 /* Irritatingly, AMD extended registers use different naming convention
5363 from the normal registers. */
5364 if (REX_INT_REG_P (x))
5366 if (!TARGET_64BIT)
5367 abort ();
5368 switch (code)
5370 case 0:
5371 error ("Extended registers have no high halves\n");
5372 break;
5373 case 1:
5374 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
5375 break;
5376 case 2:
5377 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
5378 break;
5379 case 4:
5380 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
5381 break;
5382 case 8:
5383 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
5384 break;
5385 default:
5386 error ("Unsupported operand size for extended register.\n");
5387 break;
5389 return;
5391 switch (code)
5393 case 3:
5394 if (STACK_TOP_P (x))
5396 fputs ("st(0)", file);
5397 break;
5399 /* FALLTHRU */
5400 case 8:
5401 case 4:
5402 case 12:
5403 if (! ANY_FP_REG_P (x))
5404 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
5405 /* FALLTHRU */
5406 case 16:
5407 case 2:
5408 fputs (hi_reg_name[REGNO (x)], file);
5409 break;
5410 case 1:
5411 fputs (qi_reg_name[REGNO (x)], file);
5412 break;
5413 case 0:
5414 fputs (qi_high_reg_name[REGNO (x)], file);
5415 break;
5416 default:
5417 abort ();
5421 /* Meaning of CODE:
5422 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
5423 C -- print opcode suffix for set/cmov insn.
5424 c -- like C, but print reversed condition
5425 F,f -- likewise, but for floating-point.
5426 R -- print the prefix for register names.
5427 z -- print the opcode suffix for the size of the current operand.
5428 * -- print a star (in certain assembler syntax)
5429 A -- print an absolute memory reference.
5430 w -- print the operand as if it's a "word" (HImode) even if it isn't.
5431 s -- print a shift double count, followed by the assemblers argument
5432 delimiter.
5433 b -- print the QImode name of the register for the indicated operand.
5434 %b0 would print %al if operands[0] is reg 0.
5435 w -- likewise, print the HImode name of the register.
5436 k -- likewise, print the SImode name of the register.
5437 q -- likewise, print the DImode name of the register.
5438 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
5439 y -- print "st(0)" instead of "st" as a register.
5440 D -- print condition for SSE cmp instruction.
5441 P -- if PIC, print an @PLT suffix.
5442 X -- don't print any sort of PIC '@' suffix for a symbol.
5445 void
5446 print_operand (file, x, code)
5447 FILE *file;
5448 rtx x;
5449 int code;
5451 if (code)
5453 switch (code)
5455 case '*':
5456 if (ASSEMBLER_DIALECT == 0)
5457 putc ('*', file);
5458 return;
5460 case 'A':
5461 if (ASSEMBLER_DIALECT == 0)
5462 putc ('*', file);
5463 else if (ASSEMBLER_DIALECT == 1)
5465 /* Intel syntax. For absolute addresses, registers should not
5466 be surrounded by braces. */
5467 if (GET_CODE (x) != REG)
5469 putc ('[', file);
5470 PRINT_OPERAND (file, x, 0);
5471 putc (']', file);
5472 return;
5476 PRINT_OPERAND (file, x, 0);
5477 return;
5480 case 'L':
5481 if (ASSEMBLER_DIALECT == 0)
5482 putc ('l', file);
5483 return;
5485 case 'W':
5486 if (ASSEMBLER_DIALECT == 0)
5487 putc ('w', file);
5488 return;
5490 case 'B':
5491 if (ASSEMBLER_DIALECT == 0)
5492 putc ('b', file);
5493 return;
5495 case 'Q':
5496 if (ASSEMBLER_DIALECT == 0)
5497 putc ('l', file);
5498 return;
5500 case 'S':
5501 if (ASSEMBLER_DIALECT == 0)
5502 putc ('s', file);
5503 return;
5505 case 'T':
5506 if (ASSEMBLER_DIALECT == 0)
5507 putc ('t', file);
5508 return;
5510 case 'z':
5511 /* 387 opcodes don't get size suffixes if the operands are
5512 registers. */
5514 if (STACK_REG_P (x))
5515 return;
5517 /* this is the size of op from size of operand */
5518 switch (GET_MODE_SIZE (GET_MODE (x)))
5520 case 2:
5521 #ifdef HAVE_GAS_FILDS_FISTS
5522 putc ('s', file);
5523 #endif
5524 return;
5526 case 4:
5527 if (GET_MODE (x) == SFmode)
5529 putc ('s', file);
5530 return;
5532 else
5533 putc ('l', file);
5534 return;
5536 case 12:
5537 case 16:
5538 putc ('t', file);
5539 return;
5541 case 8:
5542 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
5544 #ifdef GAS_MNEMONICS
5545 putc ('q', file);
5546 #else
5547 putc ('l', file);
5548 putc ('l', file);
5549 #endif
5551 else
5552 putc ('l', file);
5553 return;
5555 default:
5556 abort ();
5559 case 'b':
5560 case 'w':
5561 case 'k':
5562 case 'q':
5563 case 'h':
5564 case 'y':
5565 case 'X':
5566 case 'P':
5567 break;
5569 case 's':
5570 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
5572 PRINT_OPERAND (file, x, 0);
5573 putc (',', file);
5575 return;
5577 case 'D':
5578 /* Little bit of braindamage here. The SSE compare instructions
5579 does use completely different names for the comparisons that the
5580 fp conditional moves. */
5581 switch (GET_CODE (x))
5583 case EQ:
5584 case UNEQ:
5585 fputs ("eq", file);
5586 break;
5587 case LT:
5588 case UNLT:
5589 fputs ("lt", file);
5590 break;
5591 case LE:
5592 case UNLE:
5593 fputs ("le", file);
5594 break;
5595 case UNORDERED:
5596 fputs ("unord", file);
5597 break;
5598 case NE:
5599 case LTGT:
5600 fputs ("neq", file);
5601 break;
5602 case UNGE:
5603 case GE:
5604 fputs ("nlt", file);
5605 break;
5606 case UNGT:
5607 case GT:
5608 fputs ("nle", file);
5609 break;
5610 case ORDERED:
5611 fputs ("ord", file);
5612 break;
5613 default:
5614 abort ();
5615 break;
5617 return;
5618 case 'C':
5619 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
5620 return;
5621 case 'F':
5622 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
5623 return;
5625 /* Like above, but reverse condition */
5626 case 'c':
5627 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
5628 return;
5629 case 'f':
5630 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
5631 return;
5632 case '+':
5634 rtx x;
5636 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
5637 return;
5639 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
5640 if (x)
5642 int pred_val = INTVAL (XEXP (x, 0));
5644 if (pred_val < REG_BR_PROB_BASE * 45 / 100
5645 || pred_val > REG_BR_PROB_BASE * 55 / 100)
5647 int taken = pred_val > REG_BR_PROB_BASE / 2;
5648 int cputaken = final_forward_branch_p (current_output_insn) == 0;
5650 /* Emit hints only in the case default branch prediction
5651 heruistics would fail. */
5652 if (taken != cputaken)
5654 /* We use 3e (DS) prefix for taken branches and
5655 2e (CS) prefix for not taken branches. */
5656 if (taken)
5657 fputs ("ds ; ", file);
5658 else
5659 fputs ("cs ; ", file);
5663 return;
5665 default:
5667 char str[50];
5668 sprintf (str, "invalid operand code `%c'", code);
5669 output_operand_lossage (str);
5674 if (GET_CODE (x) == REG)
5676 PRINT_REG (x, code, file);
5679 else if (GET_CODE (x) == MEM)
5681 /* No `byte ptr' prefix for call instructions. */
5682 if (ASSEMBLER_DIALECT != 0 && code != 'X' && code != 'P')
5684 const char * size;
5685 switch (GET_MODE_SIZE (GET_MODE (x)))
5687 case 1: size = "BYTE"; break;
5688 case 2: size = "WORD"; break;
5689 case 4: size = "DWORD"; break;
5690 case 8: size = "QWORD"; break;
5691 case 12: size = "XWORD"; break;
5692 case 16: size = "XMMWORD"; break;
5693 default:
5694 abort ();
5697 /* Check for explicit size override (codes 'b', 'w' and 'k') */
5698 if (code == 'b')
5699 size = "BYTE";
5700 else if (code == 'w')
5701 size = "WORD";
5702 else if (code == 'k')
5703 size = "DWORD";
5705 fputs (size, file);
5706 fputs (" PTR ", file);
5709 x = XEXP (x, 0);
5710 if (flag_pic && CONSTANT_ADDRESS_P (x))
5711 output_pic_addr_const (file, x, code);
5712 /* Avoid (%rip) for call operands. */
5713 else if (CONSTANT_ADDRESS_P (x) && code =='P'
5714 && GET_CODE (x) != CONST_INT)
5715 output_addr_const (file, x);
5716 else
5717 output_address (x);
5720 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
5722 REAL_VALUE_TYPE r;
5723 long l;
5725 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
5726 REAL_VALUE_TO_TARGET_SINGLE (r, l);
5728 if (ASSEMBLER_DIALECT == 0)
5729 putc ('$', file);
5730 fprintf (file, "0x%lx", l);
5733 /* These float cases don't actually occur as immediate operands. */
5734 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
5736 REAL_VALUE_TYPE r;
5737 char dstr[30];
5739 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
5740 REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr);
5741 fprintf (file, "%s", dstr);
5744 else if (GET_CODE (x) == CONST_DOUBLE
5745 && (GET_MODE (x) == XFmode || GET_MODE (x) == TFmode))
5747 REAL_VALUE_TYPE r;
5748 char dstr[30];
5750 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
5751 REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr);
5752 fprintf (file, "%s", dstr);
5754 else
5756 if (code != 'P')
5758 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
5760 if (ASSEMBLER_DIALECT == 0)
5761 putc ('$', file);
5763 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
5764 || GET_CODE (x) == LABEL_REF)
5766 if (ASSEMBLER_DIALECT == 0)
5767 putc ('$', file);
5768 else
5769 fputs ("OFFSET FLAT:", file);
5772 if (GET_CODE (x) == CONST_INT)
5773 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
5774 else if (flag_pic)
5775 output_pic_addr_const (file, x, code);
5776 else
5777 output_addr_const (file, x);
5781 /* Print a memory operand whose address is ADDR. */
5783 void
5784 print_operand_address (file, addr)
5785 FILE *file;
5786 register rtx addr;
5788 struct ix86_address parts;
5789 rtx base, index, disp;
5790 int scale;
5792 if (! ix86_decompose_address (addr, &parts))
5793 abort ();
5795 base = parts.base;
5796 index = parts.index;
5797 disp = parts.disp;
5798 scale = parts.scale;
5800 if (!base && !index)
5802 /* Displacement only requires special attention. */
5804 if (GET_CODE (disp) == CONST_INT)
5806 if (ASSEMBLER_DIALECT != 0)
5808 if (USER_LABEL_PREFIX[0] == 0)
5809 putc ('%', file);
5810 fputs ("ds:", file);
5812 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (addr));
5814 else if (flag_pic)
5815 output_pic_addr_const (file, addr, 0);
5816 else
5817 output_addr_const (file, addr);
5819 /* Use one byte shorter RIP relative addressing for 64bit mode. */
5820 if (GET_CODE (disp) != CONST_INT && TARGET_64BIT)
5821 fputs ("(%rip)", file);
5823 else
5825 if (ASSEMBLER_DIALECT == 0)
5827 if (disp)
5829 if (flag_pic)
5830 output_pic_addr_const (file, disp, 0);
5831 else if (GET_CODE (disp) == LABEL_REF)
5832 output_asm_label (disp);
5833 else
5834 output_addr_const (file, disp);
5837 putc ('(', file);
5838 if (base)
5839 PRINT_REG (base, 0, file);
5840 if (index)
5842 putc (',', file);
5843 PRINT_REG (index, 0, file);
5844 if (scale != 1)
5845 fprintf (file, ",%d", scale);
5847 putc (')', file);
5849 else
5851 rtx offset = NULL_RTX;
5853 if (disp)
5855 /* Pull out the offset of a symbol; print any symbol itself. */
5856 if (GET_CODE (disp) == CONST
5857 && GET_CODE (XEXP (disp, 0)) == PLUS
5858 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
5860 offset = XEXP (XEXP (disp, 0), 1);
5861 disp = gen_rtx_CONST (VOIDmode,
5862 XEXP (XEXP (disp, 0), 0));
5865 if (flag_pic)
5866 output_pic_addr_const (file, disp, 0);
5867 else if (GET_CODE (disp) == LABEL_REF)
5868 output_asm_label (disp);
5869 else if (GET_CODE (disp) == CONST_INT)
5870 offset = disp;
5871 else
5872 output_addr_const (file, disp);
5875 putc ('[', file);
5876 if (base)
5878 PRINT_REG (base, 0, file);
5879 if (offset)
5881 if (INTVAL (offset) >= 0)
5882 putc ('+', file);
5883 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
5886 else if (offset)
5887 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
5888 else
5889 putc ('0', file);
5891 if (index)
5893 putc ('+', file);
5894 PRINT_REG (index, 0, file);
5895 if (scale != 1)
5896 fprintf (file, "*%d", scale);
5898 putc (']', file);
5903 /* Split one or more DImode RTL references into pairs of SImode
5904 references. The RTL can be REG, offsettable MEM, integer constant, or
5905 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
5906 split and "num" is its length. lo_half and hi_half are output arrays
5907 that parallel "operands". */
5909 void
5910 split_di (operands, num, lo_half, hi_half)
5911 rtx operands[];
5912 int num;
5913 rtx lo_half[], hi_half[];
5915 while (num--)
5917 rtx op = operands[num];
5918 if (CONSTANT_P (op))
5919 split_double (op, &lo_half[num], &hi_half[num]);
5920 else if (! reload_completed)
5922 lo_half[num] = gen_lowpart (SImode, op);
5923 hi_half[num] = gen_highpart (SImode, op);
5925 else if (GET_CODE (op) == REG)
5927 if (TARGET_64BIT)
5928 abort();
5929 lo_half[num] = gen_rtx_REG (SImode, REGNO (op));
5930 hi_half[num] = gen_rtx_REG (SImode, REGNO (op) + 1);
5932 else if (offsettable_memref_p (op))
5934 lo_half[num] = adjust_address (op, SImode, 0);
5935 hi_half[num] = adjust_address (op, SImode, 4);
5937 else
5938 abort ();
5942 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
5943 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
5944 is the expression of the binary operation. The output may either be
5945 emitted here, or returned to the caller, like all output_* functions.
5947 There is no guarantee that the operands are the same mode, as they
5948 might be within FLOAT or FLOAT_EXTEND expressions. */
5950 #ifndef SYSV386_COMPAT
5951 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
5952 wants to fix the assemblers because that causes incompatibility
5953 with gcc. No-one wants to fix gcc because that causes
5954 incompatibility with assemblers... You can use the option of
5955 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
5956 #define SYSV386_COMPAT 1
5957 #endif
5959 const char *
5960 output_387_binary_op (insn, operands)
5961 rtx insn;
5962 rtx *operands;
5964 static char buf[30];
5965 const char *p;
5966 const char *ssep;
5967 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
5969 #ifdef ENABLE_CHECKING
5970 /* Even if we do not want to check the inputs, this documents input
5971 constraints. Which helps in understanding the following code. */
5972 if (STACK_REG_P (operands[0])
5973 && ((REG_P (operands[1])
5974 && REGNO (operands[0]) == REGNO (operands[1])
5975 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
5976 || (REG_P (operands[2])
5977 && REGNO (operands[0]) == REGNO (operands[2])
5978 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
5979 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
5980 ; /* ok */
5981 else if (!is_sse)
5982 abort ();
5983 #endif
5985 switch (GET_CODE (operands[3]))
5987 case PLUS:
5988 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
5989 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
5990 p = "fiadd";
5991 else
5992 p = "fadd";
5993 ssep = "add";
5994 break;
5996 case MINUS:
5997 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
5998 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
5999 p = "fisub";
6000 else
6001 p = "fsub";
6002 ssep = "sub";
6003 break;
6005 case MULT:
6006 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6007 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6008 p = "fimul";
6009 else
6010 p = "fmul";
6011 ssep = "mul";
6012 break;
6014 case DIV:
6015 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6016 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6017 p = "fidiv";
6018 else
6019 p = "fdiv";
6020 ssep = "div";
6021 break;
6023 default:
6024 abort ();
6027 if (is_sse)
6029 strcpy (buf, ssep);
6030 if (GET_MODE (operands[0]) == SFmode)
6031 strcat (buf, "ss\t{%2, %0|%0, %2}");
6032 else
6033 strcat (buf, "sd\t{%2, %0|%0, %2}");
6034 return buf;
6036 strcpy (buf, p);
6038 switch (GET_CODE (operands[3]))
6040 case MULT:
6041 case PLUS:
6042 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
6044 rtx temp = operands[2];
6045 operands[2] = operands[1];
6046 operands[1] = temp;
6049 /* know operands[0] == operands[1]. */
6051 if (GET_CODE (operands[2]) == MEM)
6053 p = "%z2\t%2";
6054 break;
6057 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6059 if (STACK_TOP_P (operands[0]))
6060 /* How is it that we are storing to a dead operand[2]?
6061 Well, presumably operands[1] is dead too. We can't
6062 store the result to st(0) as st(0) gets popped on this
6063 instruction. Instead store to operands[2] (which I
6064 think has to be st(1)). st(1) will be popped later.
6065 gcc <= 2.8.1 didn't have this check and generated
6066 assembly code that the Unixware assembler rejected. */
6067 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6068 else
6069 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
6070 break;
6073 if (STACK_TOP_P (operands[0]))
6074 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
6075 else
6076 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
6077 break;
6079 case MINUS:
6080 case DIV:
6081 if (GET_CODE (operands[1]) == MEM)
6083 p = "r%z1\t%1";
6084 break;
6087 if (GET_CODE (operands[2]) == MEM)
6089 p = "%z2\t%2";
6090 break;
6093 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6095 #if SYSV386_COMPAT
6096 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
6097 derived assemblers, confusingly reverse the direction of
6098 the operation for fsub{r} and fdiv{r} when the
6099 destination register is not st(0). The Intel assembler
6100 doesn't have this brain damage. Read !SYSV386_COMPAT to
6101 figure out what the hardware really does. */
6102 if (STACK_TOP_P (operands[0]))
6103 p = "{p\t%0, %2|rp\t%2, %0}";
6104 else
6105 p = "{rp\t%2, %0|p\t%0, %2}";
6106 #else
6107 if (STACK_TOP_P (operands[0]))
6108 /* As above for fmul/fadd, we can't store to st(0). */
6109 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6110 else
6111 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
6112 #endif
6113 break;
6116 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
6118 #if SYSV386_COMPAT
6119 if (STACK_TOP_P (operands[0]))
6120 p = "{rp\t%0, %1|p\t%1, %0}";
6121 else
6122 p = "{p\t%1, %0|rp\t%0, %1}";
6123 #else
6124 if (STACK_TOP_P (operands[0]))
6125 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
6126 else
6127 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
6128 #endif
6129 break;
6132 if (STACK_TOP_P (operands[0]))
6134 if (STACK_TOP_P (operands[1]))
6135 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
6136 else
6137 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
6138 break;
6140 else if (STACK_TOP_P (operands[1]))
6142 #if SYSV386_COMPAT
6143 p = "{\t%1, %0|r\t%0, %1}";
6144 #else
6145 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
6146 #endif
6148 else
6150 #if SYSV386_COMPAT
6151 p = "{r\t%2, %0|\t%0, %2}";
6152 #else
6153 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
6154 #endif
6156 break;
6158 default:
6159 abort ();
6162 strcat (buf, p);
6163 return buf;
6166 /* Output code to initialize control word copies used by
6167 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
6168 is set to control word rounding downwards. */
6169 void
6170 emit_i387_cw_initialization (normal, round_down)
6171 rtx normal, round_down;
6173 rtx reg = gen_reg_rtx (HImode);
6175 emit_insn (gen_x86_fnstcw_1 (normal));
6176 emit_move_insn (reg, normal);
6177 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
6178 && !TARGET_64BIT)
6179 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
6180 else
6181 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00)));
6182 emit_move_insn (round_down, reg);
6185 /* Output code for INSN to convert a float to a signed int. OPERANDS
6186 are the insn operands. The output may be [HSD]Imode and the input
6187 operand may be [SDX]Fmode. */
6189 const char *
6190 output_fix_trunc (insn, operands)
6191 rtx insn;
6192 rtx *operands;
6194 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
6195 int dimode_p = GET_MODE (operands[0]) == DImode;
6197 /* Jump through a hoop or two for DImode, since the hardware has no
6198 non-popping instruction. We used to do this a different way, but
6199 that was somewhat fragile and broke with post-reload splitters. */
6200 if (dimode_p && !stack_top_dies)
6201 output_asm_insn ("fld\t%y1", operands);
6203 if (!STACK_TOP_P (operands[1]))
6204 abort ();
6206 if (GET_CODE (operands[0]) != MEM)
6207 abort ();
6209 output_asm_insn ("fldcw\t%3", operands);
6210 if (stack_top_dies || dimode_p)
6211 output_asm_insn ("fistp%z0\t%0", operands);
6212 else
6213 output_asm_insn ("fist%z0\t%0", operands);
6214 output_asm_insn ("fldcw\t%2", operands);
6216 return "";
6219 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
6220 should be used and 2 when fnstsw should be used. UNORDERED_P is true
6221 when fucom should be used. */
6223 const char *
6224 output_fp_compare (insn, operands, eflags_p, unordered_p)
6225 rtx insn;
6226 rtx *operands;
6227 int eflags_p, unordered_p;
6229 int stack_top_dies;
6230 rtx cmp_op0 = operands[0];
6231 rtx cmp_op1 = operands[1];
6232 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
6234 if (eflags_p == 2)
6236 cmp_op0 = cmp_op1;
6237 cmp_op1 = operands[2];
6239 if (is_sse)
6241 if (GET_MODE (operands[0]) == SFmode)
6242 if (unordered_p)
6243 return "ucomiss\t{%1, %0|%0, %1}";
6244 else
6245 return "comiss\t{%1, %0|%0, %y}";
6246 else
6247 if (unordered_p)
6248 return "ucomisd\t{%1, %0|%0, %1}";
6249 else
6250 return "comisd\t{%1, %0|%0, %y}";
6253 if (! STACK_TOP_P (cmp_op0))
6254 abort ();
6256 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
6258 if (STACK_REG_P (cmp_op1)
6259 && stack_top_dies
6260 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
6261 && REGNO (cmp_op1) != FIRST_STACK_REG)
6263 /* If both the top of the 387 stack dies, and the other operand
6264 is also a stack register that dies, then this must be a
6265 `fcompp' float compare */
6267 if (eflags_p == 1)
6269 /* There is no double popping fcomi variant. Fortunately,
6270 eflags is immune from the fstp's cc clobbering. */
6271 if (unordered_p)
6272 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
6273 else
6274 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
6275 return "fstp\t%y0";
6277 else
6279 if (eflags_p == 2)
6281 if (unordered_p)
6282 return "fucompp\n\tfnstsw\t%0";
6283 else
6284 return "fcompp\n\tfnstsw\t%0";
6286 else
6288 if (unordered_p)
6289 return "fucompp";
6290 else
6291 return "fcompp";
6295 else
6297 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
6299 static const char * const alt[24] =
6301 "fcom%z1\t%y1",
6302 "fcomp%z1\t%y1",
6303 "fucom%z1\t%y1",
6304 "fucomp%z1\t%y1",
6306 "ficom%z1\t%y1",
6307 "ficomp%z1\t%y1",
6308 NULL,
6309 NULL,
6311 "fcomi\t{%y1, %0|%0, %y1}",
6312 "fcomip\t{%y1, %0|%0, %y1}",
6313 "fucomi\t{%y1, %0|%0, %y1}",
6314 "fucomip\t{%y1, %0|%0, %y1}",
6316 NULL,
6317 NULL,
6318 NULL,
6319 NULL,
6321 "fcom%z2\t%y2\n\tfnstsw\t%0",
6322 "fcomp%z2\t%y2\n\tfnstsw\t%0",
6323 "fucom%z2\t%y2\n\tfnstsw\t%0",
6324 "fucomp%z2\t%y2\n\tfnstsw\t%0",
6326 "ficom%z2\t%y2\n\tfnstsw\t%0",
6327 "ficomp%z2\t%y2\n\tfnstsw\t%0",
6328 NULL,
6329 NULL
6332 int mask;
6333 const char *ret;
6335 mask = eflags_p << 3;
6336 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
6337 mask |= unordered_p << 1;
6338 mask |= stack_top_dies;
6340 if (mask >= 24)
6341 abort ();
6342 ret = alt[mask];
6343 if (ret == NULL)
6344 abort ();
6346 return ret;
6350 /* Output assembler code to FILE to initialize basic-block profiling.
6352 If profile_block_flag == 2
6354 Output code to call the subroutine `__bb_init_trace_func'
6355 and pass two parameters to it. The first parameter is
6356 the address of a block allocated in the object module.
6357 The second parameter is the number of the first basic block
6358 of the function.
6360 The name of the block is a local symbol made with this statement:
6362 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 0);
6364 Of course, since you are writing the definition of
6365 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
6366 can take a short cut in the definition of this macro and use the
6367 name that you know will result.
6369 The number of the first basic block of the function is
6370 passed to the macro in BLOCK_OR_LABEL.
6372 If described in a virtual assembler language the code to be
6373 output looks like:
6375 parameter1 <- LPBX0
6376 parameter2 <- BLOCK_OR_LABEL
6377 call __bb_init_trace_func
6379 else if profile_block_flag != 0
6381 Output code to call the subroutine `__bb_init_func'
6382 and pass one single parameter to it, which is the same
6383 as the first parameter to `__bb_init_trace_func'.
6385 The first word of this parameter is a flag which will be nonzero if
6386 the object module has already been initialized. So test this word
6387 first, and do not call `__bb_init_func' if the flag is nonzero.
6388 Note: When profile_block_flag == 2 the test need not be done
6389 but `__bb_init_trace_func' *must* be called.
6391 BLOCK_OR_LABEL may be used to generate a label number as a
6392 branch destination in case `__bb_init_func' will not be called.
6394 If described in a virtual assembler language the code to be
6395 output looks like:
6397 cmp (LPBX0),0
6398 jne local_label
6399 parameter1 <- LPBX0
6400 call __bb_init_func
6401 local_label:
6404 void
6405 ix86_output_function_block_profiler (file, block_or_label)
6406 FILE *file;
6407 int block_or_label;
6409 static int num_func = 0;
6410 rtx xops[8];
6411 char block_table[80], false_label[80];
6413 ASM_GENERATE_INTERNAL_LABEL (block_table, "LPBX", 0);
6415 xops[1] = gen_rtx_SYMBOL_REF (VOIDmode, block_table);
6416 xops[5] = stack_pointer_rtx;
6417 xops[7] = gen_rtx_REG (Pmode, 0); /* eax */
6419 CONSTANT_POOL_ADDRESS_P (xops[1]) = TRUE;
6421 switch (profile_block_flag)
6423 case 2:
6424 xops[2] = GEN_INT (block_or_label);
6425 xops[3] = gen_rtx_MEM (Pmode,
6426 gen_rtx_SYMBOL_REF (VOIDmode, "__bb_init_trace_func"));
6427 xops[6] = GEN_INT (8);
6429 output_asm_insn ("push{l}\t%2", xops);
6430 if (!flag_pic)
6431 output_asm_insn ("push{l}\t%1", xops);
6432 else
6434 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a1}", xops);
6435 output_asm_insn ("push{l}\t%7", xops);
6437 output_asm_insn ("call\t%P3", xops);
6438 output_asm_insn ("add{l}\t{%6, %5|%5, %6}", xops);
6439 break;
6441 default:
6442 ASM_GENERATE_INTERNAL_LABEL (false_label, "LPBZ", num_func);
6444 xops[0] = const0_rtx;
6445 xops[2] = gen_rtx_MEM (Pmode,
6446 gen_rtx_SYMBOL_REF (VOIDmode, false_label));
6447 xops[3] = gen_rtx_MEM (Pmode,
6448 gen_rtx_SYMBOL_REF (VOIDmode, "__bb_init_func"));
6449 xops[4] = gen_rtx_MEM (Pmode, xops[1]);
6450 xops[6] = GEN_INT (4);
6452 CONSTANT_POOL_ADDRESS_P (xops[2]) = TRUE;
6454 output_asm_insn ("cmp{l}\t{%0, %4|%4, %0}", xops);
6455 output_asm_insn ("jne\t%2", xops);
6457 if (!flag_pic)
6458 output_asm_insn ("push{l}\t%1", xops);
6459 else
6461 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a2}", xops);
6462 output_asm_insn ("push{l}\t%7", xops);
6464 output_asm_insn ("call\t%P3", xops);
6465 output_asm_insn ("add{l}\t{%6, %5|%5, %6}", xops);
6466 ASM_OUTPUT_INTERNAL_LABEL (file, "LPBZ", num_func);
6467 num_func++;
6468 break;
6472 /* Output assembler code to FILE to increment a counter associated
6473 with basic block number BLOCKNO.
6475 If profile_block_flag == 2
6477 Output code to initialize the global structure `__bb' and
6478 call the function `__bb_trace_func' which will increment the
6479 counter.
6481 `__bb' consists of two words. In the first word the number
6482 of the basic block has to be stored. In the second word
6483 the address of a block allocated in the object module
6484 has to be stored.
6486 The basic block number is given by BLOCKNO.
6488 The address of the block is given by the label created with
6490 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 0);
6492 by FUNCTION_BLOCK_PROFILER.
6494 Of course, since you are writing the definition of
6495 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
6496 can take a short cut in the definition of this macro and use the
6497 name that you know will result.
6499 If described in a virtual assembler language the code to be
6500 output looks like:
6502 move BLOCKNO -> (__bb)
6503 move LPBX0 -> (__bb+4)
6504 call __bb_trace_func
6506 Note that function `__bb_trace_func' must not change the
6507 machine state, especially the flag register. To grant
6508 this, you must output code to save and restore registers
6509 either in this macro or in the macros MACHINE_STATE_SAVE
6510 and MACHINE_STATE_RESTORE. The last two macros will be
6511 used in the function `__bb_trace_func', so you must make
6512 sure that the function prologue does not change any
6513 register prior to saving it with MACHINE_STATE_SAVE.
6515 else if profile_block_flag != 0
6517 Output code to increment the counter directly.
6518 Basic blocks are numbered separately from zero within each
6519 compiled object module. The count associated with block number
6520 BLOCKNO is at index BLOCKNO in an array of words; the name of
6521 this array is a local symbol made with this statement:
6523 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 2);
6525 Of course, since you are writing the definition of
6526 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
6527 can take a short cut in the definition of this macro and use the
6528 name that you know will result.
6530 If described in a virtual assembler language the code to be
6531 output looks like:
6533 inc (LPBX2+4*BLOCKNO)
6536 void
6537 ix86_output_block_profiler (file, blockno)
6538 FILE *file ATTRIBUTE_UNUSED;
6539 int blockno;
6541 rtx xops[8], cnt_rtx;
6542 char counts[80];
6543 char *block_table = counts;
6545 switch (profile_block_flag)
6547 case 2:
6548 ASM_GENERATE_INTERNAL_LABEL (block_table, "LPBX", 0);
6550 xops[1] = gen_rtx_SYMBOL_REF (VOIDmode, block_table);
6551 xops[2] = GEN_INT (blockno);
6552 xops[3] = gen_rtx_MEM (Pmode,
6553 gen_rtx_SYMBOL_REF (VOIDmode, "__bb_trace_func"));
6554 xops[4] = gen_rtx_SYMBOL_REF (VOIDmode, "__bb");
6555 xops[5] = plus_constant (xops[4], 4);
6556 xops[0] = gen_rtx_MEM (SImode, xops[4]);
6557 xops[6] = gen_rtx_MEM (SImode, xops[5]);
6559 CONSTANT_POOL_ADDRESS_P (xops[1]) = TRUE;
6561 output_asm_insn ("pushf", xops);
6562 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
6563 if (flag_pic)
6565 xops[7] = gen_rtx_REG (Pmode, 0); /* eax */
6566 output_asm_insn ("push{l}\t%7", xops);
6567 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a1}", xops);
6568 output_asm_insn ("mov{l}\t{%7, %6|%6, %7}", xops);
6569 output_asm_insn ("pop{l}\t%7", xops);
6571 else
6572 output_asm_insn ("mov{l}\t{%1, %6|%6, %1}", xops);
6573 output_asm_insn ("call\t%P3", xops);
6574 output_asm_insn ("popf", xops);
6576 break;
6578 default:
6579 ASM_GENERATE_INTERNAL_LABEL (counts, "LPBX", 2);
6580 cnt_rtx = gen_rtx_SYMBOL_REF (VOIDmode, counts);
6581 SYMBOL_REF_FLAG (cnt_rtx) = TRUE;
6583 if (blockno)
6584 cnt_rtx = plus_constant (cnt_rtx, blockno*4);
6586 if (flag_pic)
6587 cnt_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, cnt_rtx);
6589 xops[0] = gen_rtx_MEM (SImode, cnt_rtx);
6590 output_asm_insn ("inc{l}\t%0", xops);
6592 break;
6596 void
6597 ix86_expand_move (mode, operands)
6598 enum machine_mode mode;
6599 rtx operands[];
6601 int strict = (reload_in_progress || reload_completed);
6602 rtx insn;
6604 if (flag_pic && mode == Pmode && symbolic_operand (operands[1], Pmode))
6606 /* Emit insns to move operands[1] into operands[0]. */
6608 if (GET_CODE (operands[0]) == MEM)
6609 operands[1] = force_reg (Pmode, operands[1]);
6610 else
6612 rtx temp = operands[0];
6613 if (GET_CODE (temp) != REG)
6614 temp = gen_reg_rtx (Pmode);
6615 temp = legitimize_pic_address (operands[1], temp);
6616 if (temp == operands[0])
6617 return;
6618 operands[1] = temp;
6621 else
6623 if (GET_CODE (operands[0]) == MEM
6624 && (GET_MODE (operands[0]) == QImode
6625 || !push_operand (operands[0], mode))
6626 && GET_CODE (operands[1]) == MEM)
6627 operands[1] = force_reg (mode, operands[1]);
6629 if (push_operand (operands[0], mode)
6630 && ! general_no_elim_operand (operands[1], mode))
6631 operands[1] = copy_to_mode_reg (mode, operands[1]);
6633 if (FLOAT_MODE_P (mode))
6635 /* If we are loading a floating point constant to a register,
6636 force the value to memory now, since we'll get better code
6637 out the back end. */
6639 if (strict)
6641 else if (GET_CODE (operands[1]) == CONST_DOUBLE
6642 && register_operand (operands[0], mode))
6643 operands[1] = validize_mem (force_const_mem (mode, operands[1]));
6647 insn = gen_rtx_SET (VOIDmode, operands[0], operands[1]);
6649 emit_insn (insn);
6652 /* Attempt to expand a binary operator. Make the expansion closer to the
6653 actual machine, then just general_operand, which will allow 3 separate
6654 memory references (one output, two input) in a single insn. */
6656 void
6657 ix86_expand_binary_operator (code, mode, operands)
6658 enum rtx_code code;
6659 enum machine_mode mode;
6660 rtx operands[];
6662 int matching_memory;
6663 rtx src1, src2, dst, op, clob;
6665 dst = operands[0];
6666 src1 = operands[1];
6667 src2 = operands[2];
6669 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
6670 if (GET_RTX_CLASS (code) == 'c'
6671 && (rtx_equal_p (dst, src2)
6672 || immediate_operand (src1, mode)))
6674 rtx temp = src1;
6675 src1 = src2;
6676 src2 = temp;
6679 /* If the destination is memory, and we do not have matching source
6680 operands, do things in registers. */
6681 matching_memory = 0;
6682 if (GET_CODE (dst) == MEM)
6684 if (rtx_equal_p (dst, src1))
6685 matching_memory = 1;
6686 else if (GET_RTX_CLASS (code) == 'c'
6687 && rtx_equal_p (dst, src2))
6688 matching_memory = 2;
6689 else
6690 dst = gen_reg_rtx (mode);
6693 /* Both source operands cannot be in memory. */
6694 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
6696 if (matching_memory != 2)
6697 src2 = force_reg (mode, src2);
6698 else
6699 src1 = force_reg (mode, src1);
6702 /* If the operation is not commutable, source 1 cannot be a constant
6703 or non-matching memory. */
6704 if ((CONSTANT_P (src1)
6705 || (!matching_memory && GET_CODE (src1) == MEM))
6706 && GET_RTX_CLASS (code) != 'c')
6707 src1 = force_reg (mode, src1);
6709 /* If optimizing, copy to regs to improve CSE */
6710 if (optimize && ! no_new_pseudos)
6712 if (GET_CODE (dst) == MEM)
6713 dst = gen_reg_rtx (mode);
6714 if (GET_CODE (src1) == MEM)
6715 src1 = force_reg (mode, src1);
6716 if (GET_CODE (src2) == MEM)
6717 src2 = force_reg (mode, src2);
6720 /* Emit the instruction. */
6722 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
6723 if (reload_in_progress)
6725 /* Reload doesn't know about the flags register, and doesn't know that
6726 it doesn't want to clobber it. We can only do this with PLUS. */
6727 if (code != PLUS)
6728 abort ();
6729 emit_insn (op);
6731 else
6733 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
6734 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
6737 /* Fix up the destination if needed. */
6738 if (dst != operands[0])
6739 emit_move_insn (operands[0], dst);
6742 /* Return TRUE or FALSE depending on whether the binary operator meets the
6743 appropriate constraints. */
6746 ix86_binary_operator_ok (code, mode, operands)
6747 enum rtx_code code;
6748 enum machine_mode mode ATTRIBUTE_UNUSED;
6749 rtx operands[3];
6751 /* Both source operands cannot be in memory. */
6752 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
6753 return 0;
6754 /* If the operation is not commutable, source 1 cannot be a constant. */
6755 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
6756 return 0;
6757 /* If the destination is memory, we must have a matching source operand. */
6758 if (GET_CODE (operands[0]) == MEM
6759 && ! (rtx_equal_p (operands[0], operands[1])
6760 || (GET_RTX_CLASS (code) == 'c'
6761 && rtx_equal_p (operands[0], operands[2]))))
6762 return 0;
6763 /* If the operation is not commutable and the source 1 is memory, we must
6764 have a matching destionation. */
6765 if (GET_CODE (operands[1]) == MEM
6766 && GET_RTX_CLASS (code) != 'c'
6767 && ! rtx_equal_p (operands[0], operands[1]))
6768 return 0;
6769 return 1;
6772 /* Attempt to expand a unary operator. Make the expansion closer to the
6773 actual machine, then just general_operand, which will allow 2 separate
6774 memory references (one output, one input) in a single insn. */
6776 void
6777 ix86_expand_unary_operator (code, mode, operands)
6778 enum rtx_code code;
6779 enum machine_mode mode;
6780 rtx operands[];
6782 int matching_memory;
6783 rtx src, dst, op, clob;
6785 dst = operands[0];
6786 src = operands[1];
6788 /* If the destination is memory, and we do not have matching source
6789 operands, do things in registers. */
6790 matching_memory = 0;
6791 if (GET_CODE (dst) == MEM)
6793 if (rtx_equal_p (dst, src))
6794 matching_memory = 1;
6795 else
6796 dst = gen_reg_rtx (mode);
6799 /* When source operand is memory, destination must match. */
6800 if (!matching_memory && GET_CODE (src) == MEM)
6801 src = force_reg (mode, src);
6803 /* If optimizing, copy to regs to improve CSE */
6804 if (optimize && ! no_new_pseudos)
6806 if (GET_CODE (dst) == MEM)
6807 dst = gen_reg_rtx (mode);
6808 if (GET_CODE (src) == MEM)
6809 src = force_reg (mode, src);
6812 /* Emit the instruction. */
6814 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
6815 if (reload_in_progress || code == NOT)
6817 /* Reload doesn't know about the flags register, and doesn't know that
6818 it doesn't want to clobber it. */
6819 if (code != NOT)
6820 abort ();
6821 emit_insn (op);
6823 else
6825 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
6826 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
6829 /* Fix up the destination if needed. */
6830 if (dst != operands[0])
6831 emit_move_insn (operands[0], dst);
6834 /* Return TRUE or FALSE depending on whether the unary operator meets the
6835 appropriate constraints. */
6838 ix86_unary_operator_ok (code, mode, operands)
6839 enum rtx_code code ATTRIBUTE_UNUSED;
6840 enum machine_mode mode ATTRIBUTE_UNUSED;
6841 rtx operands[2] ATTRIBUTE_UNUSED;
6843 /* If one of operands is memory, source and destination must match. */
6844 if ((GET_CODE (operands[0]) == MEM
6845 || GET_CODE (operands[1]) == MEM)
6846 && ! rtx_equal_p (operands[0], operands[1]))
6847 return FALSE;
6848 return TRUE;
6851 /* Return TRUE or FALSE depending on whether the first SET in INSN
6852 has source and destination with matching CC modes, and that the
6853 CC mode is at least as constrained as REQ_MODE. */
6856 ix86_match_ccmode (insn, req_mode)
6857 rtx insn;
6858 enum machine_mode req_mode;
6860 rtx set;
6861 enum machine_mode set_mode;
6863 set = PATTERN (insn);
6864 if (GET_CODE (set) == PARALLEL)
6865 set = XVECEXP (set, 0, 0);
6866 if (GET_CODE (set) != SET)
6867 abort ();
6868 if (GET_CODE (SET_SRC (set)) != COMPARE)
6869 abort ();
6871 set_mode = GET_MODE (SET_DEST (set));
6872 switch (set_mode)
6874 case CCNOmode:
6875 if (req_mode != CCNOmode
6876 && (req_mode != CCmode
6877 || XEXP (SET_SRC (set), 1) != const0_rtx))
6878 return 0;
6879 break;
6880 case CCmode:
6881 if (req_mode == CCGCmode)
6882 return 0;
6883 /* FALLTHRU */
6884 case CCGCmode:
6885 if (req_mode == CCGOCmode || req_mode == CCNOmode)
6886 return 0;
6887 /* FALLTHRU */
6888 case CCGOCmode:
6889 if (req_mode == CCZmode)
6890 return 0;
6891 /* FALLTHRU */
6892 case CCZmode:
6893 break;
6895 default:
6896 abort ();
6899 return (GET_MODE (SET_SRC (set)) == set_mode);
6902 /* Generate insn patterns to do an integer compare of OPERANDS. */
6904 static rtx
6905 ix86_expand_int_compare (code, op0, op1)
6906 enum rtx_code code;
6907 rtx op0, op1;
6909 enum machine_mode cmpmode;
6910 rtx tmp, flags;
6912 cmpmode = SELECT_CC_MODE (code, op0, op1);
6913 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
6915 /* This is very simple, but making the interface the same as in the
6916 FP case makes the rest of the code easier. */
6917 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
6918 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
6920 /* Return the test that should be put into the flags user, i.e.
6921 the bcc, scc, or cmov instruction. */
6922 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
6925 /* Figure out whether to use ordered or unordered fp comparisons.
6926 Return the appropriate mode to use. */
6928 enum machine_mode
6929 ix86_fp_compare_mode (code)
6930 enum rtx_code code ATTRIBUTE_UNUSED;
6932 /* ??? In order to make all comparisons reversible, we do all comparisons
6933 non-trapping when compiling for IEEE. Once gcc is able to distinguish
6934 all forms trapping and nontrapping comparisons, we can make inequality
6935 comparisons trapping again, since it results in better code when using
6936 FCOM based compares. */
6937 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
6940 enum machine_mode
6941 ix86_cc_mode (code, op0, op1)
6942 enum rtx_code code;
6943 rtx op0, op1;
6945 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
6946 return ix86_fp_compare_mode (code);
6947 switch (code)
6949 /* Only zero flag is needed. */
6950 case EQ: /* ZF=0 */
6951 case NE: /* ZF!=0 */
6952 return CCZmode;
6953 /* Codes needing carry flag. */
6954 case GEU: /* CF=0 */
6955 case GTU: /* CF=0 & ZF=0 */
6956 case LTU: /* CF=1 */
6957 case LEU: /* CF=1 | ZF=1 */
6958 return CCmode;
6959 /* Codes possibly doable only with sign flag when
6960 comparing against zero. */
6961 case GE: /* SF=OF or SF=0 */
6962 case LT: /* SF<>OF or SF=1 */
6963 if (op1 == const0_rtx)
6964 return CCGOCmode;
6965 else
6966 /* For other cases Carry flag is not required. */
6967 return CCGCmode;
6968 /* Codes doable only with sign flag when comparing
6969 against zero, but we miss jump instruction for it
6970 so we need to use relational tests agains overflow
6971 that thus needs to be zero. */
6972 case GT: /* ZF=0 & SF=OF */
6973 case LE: /* ZF=1 | SF<>OF */
6974 if (op1 == const0_rtx)
6975 return CCNOmode;
6976 else
6977 return CCGCmode;
6978 default:
6979 abort ();
6983 /* Return true if we should use an FCOMI instruction for this fp comparison. */
6986 ix86_use_fcomi_compare (code)
6987 enum rtx_code code ATTRIBUTE_UNUSED;
6989 enum rtx_code swapped_code = swap_condition (code);
6990 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
6991 || (ix86_fp_comparison_cost (swapped_code)
6992 == ix86_fp_comparison_fcomi_cost (swapped_code)));
6995 /* Swap, force into registers, or otherwise massage the two operands
6996 to a fp comparison. The operands are updated in place; the new
6997 comparsion code is returned. */
6999 static enum rtx_code
7000 ix86_prepare_fp_compare_args (code, pop0, pop1)
7001 enum rtx_code code;
7002 rtx *pop0, *pop1;
7004 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
7005 rtx op0 = *pop0, op1 = *pop1;
7006 enum machine_mode op_mode = GET_MODE (op0);
7007 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
7009 /* All of the unordered compare instructions only work on registers.
7010 The same is true of the XFmode compare instructions. The same is
7011 true of the fcomi compare instructions. */
7013 if (!is_sse
7014 && (fpcmp_mode == CCFPUmode
7015 || op_mode == XFmode
7016 || op_mode == TFmode
7017 || ix86_use_fcomi_compare (code)))
7019 op0 = force_reg (op_mode, op0);
7020 op1 = force_reg (op_mode, op1);
7022 else
7024 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
7025 things around if they appear profitable, otherwise force op0
7026 into a register. */
7028 if (standard_80387_constant_p (op0) == 0
7029 || (GET_CODE (op0) == MEM
7030 && ! (standard_80387_constant_p (op1) == 0
7031 || GET_CODE (op1) == MEM)))
7033 rtx tmp;
7034 tmp = op0, op0 = op1, op1 = tmp;
7035 code = swap_condition (code);
7038 if (GET_CODE (op0) != REG)
7039 op0 = force_reg (op_mode, op0);
7041 if (CONSTANT_P (op1))
7043 if (standard_80387_constant_p (op1))
7044 op1 = force_reg (op_mode, op1);
7045 else
7046 op1 = validize_mem (force_const_mem (op_mode, op1));
7050 /* Try to rearrange the comparison to make it cheaper. */
7051 if (ix86_fp_comparison_cost (code)
7052 > ix86_fp_comparison_cost (swap_condition (code))
7053 && (GET_CODE (op0) == REG || !reload_completed))
7055 rtx tmp;
7056 tmp = op0, op0 = op1, op1 = tmp;
7057 code = swap_condition (code);
7058 if (GET_CODE (op0) != REG)
7059 op0 = force_reg (op_mode, op0);
7062 *pop0 = op0;
7063 *pop1 = op1;
7064 return code;
7067 /* Convert comparison codes we use to represent FP comparison to integer
7068 code that will result in proper branch. Return UNKNOWN if no such code
7069 is available. */
7070 static enum rtx_code
7071 ix86_fp_compare_code_to_integer (code)
7072 enum rtx_code code;
7074 switch (code)
7076 case GT:
7077 return GTU;
7078 case GE:
7079 return GEU;
7080 case ORDERED:
7081 case UNORDERED:
7082 return code;
7083 break;
7084 case UNEQ:
7085 return EQ;
7086 break;
7087 case UNLT:
7088 return LTU;
7089 break;
7090 case UNLE:
7091 return LEU;
7092 break;
7093 case LTGT:
7094 return NE;
7095 break;
7096 default:
7097 return UNKNOWN;
7101 /* Split comparison code CODE into comparisons we can do using branch
7102 instructions. BYPASS_CODE is comparison code for branch that will
7103 branch around FIRST_CODE and SECOND_CODE. If some of branches
7104 is not required, set value to NIL.
7105 We never require more than two branches. */
7106 static void
7107 ix86_fp_comparison_codes (code, bypass_code, first_code, second_code)
7108 enum rtx_code code, *bypass_code, *first_code, *second_code;
7110 *first_code = code;
7111 *bypass_code = NIL;
7112 *second_code = NIL;
7114 /* The fcomi comparison sets flags as follows:
7116 cmp ZF PF CF
7117 > 0 0 0
7118 < 0 0 1
7119 = 1 0 0
7120 un 1 1 1 */
7122 switch (code)
7124 case GT: /* GTU - CF=0 & ZF=0 */
7125 case GE: /* GEU - CF=0 */
7126 case ORDERED: /* PF=0 */
7127 case UNORDERED: /* PF=1 */
7128 case UNEQ: /* EQ - ZF=1 */
7129 case UNLT: /* LTU - CF=1 */
7130 case UNLE: /* LEU - CF=1 | ZF=1 */
7131 case LTGT: /* EQ - ZF=0 */
7132 break;
7133 case LT: /* LTU - CF=1 - fails on unordered */
7134 *first_code = UNLT;
7135 *bypass_code = UNORDERED;
7136 break;
7137 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
7138 *first_code = UNLE;
7139 *bypass_code = UNORDERED;
7140 break;
7141 case EQ: /* EQ - ZF=1 - fails on unordered */
7142 *first_code = UNEQ;
7143 *bypass_code = UNORDERED;
7144 break;
7145 case NE: /* NE - ZF=0 - fails on unordered */
7146 *first_code = LTGT;
7147 *second_code = UNORDERED;
7148 break;
7149 case UNGE: /* GEU - CF=0 - fails on unordered */
7150 *first_code = GE;
7151 *second_code = UNORDERED;
7152 break;
7153 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
7154 *first_code = GT;
7155 *second_code = UNORDERED;
7156 break;
7157 default:
7158 abort ();
7160 if (!TARGET_IEEE_FP)
7162 *second_code = NIL;
7163 *bypass_code = NIL;
7167 /* Return cost of comparison done fcom + arithmetics operations on AX.
7168 All following functions do use number of instructions as an cost metrics.
7169 In future this should be tweaked to compute bytes for optimize_size and
7170 take into account performance of various instructions on various CPUs. */
7171 static int
7172 ix86_fp_comparison_arithmetics_cost (code)
7173 enum rtx_code code;
7175 if (!TARGET_IEEE_FP)
7176 return 4;
7177 /* The cost of code output by ix86_expand_fp_compare. */
7178 switch (code)
7180 case UNLE:
7181 case UNLT:
7182 case LTGT:
7183 case GT:
7184 case GE:
7185 case UNORDERED:
7186 case ORDERED:
7187 case UNEQ:
7188 return 4;
7189 break;
7190 case LT:
7191 case NE:
7192 case EQ:
7193 case UNGE:
7194 return 5;
7195 break;
7196 case LE:
7197 case UNGT:
7198 return 6;
7199 break;
7200 default:
7201 abort ();
7205 /* Return cost of comparison done using fcomi operation.
7206 See ix86_fp_comparison_arithmetics_cost for the metrics. */
7207 static int
7208 ix86_fp_comparison_fcomi_cost (code)
7209 enum rtx_code code;
7211 enum rtx_code bypass_code, first_code, second_code;
7212 /* Return arbitarily high cost when instruction is not supported - this
7213 prevents gcc from using it. */
7214 if (!TARGET_CMOVE)
7215 return 1024;
7216 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
7217 return (bypass_code != NIL || second_code != NIL) + 2;
7220 /* Return cost of comparison done using sahf operation.
7221 See ix86_fp_comparison_arithmetics_cost for the metrics. */
7222 static int
7223 ix86_fp_comparison_sahf_cost (code)
7224 enum rtx_code code;
7226 enum rtx_code bypass_code, first_code, second_code;
7227 /* Return arbitarily high cost when instruction is not preferred - this
7228 avoids gcc from using it. */
7229 if (!TARGET_USE_SAHF && !optimize_size)
7230 return 1024;
7231 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
7232 return (bypass_code != NIL || second_code != NIL) + 3;
7235 /* Compute cost of the comparison done using any method.
7236 See ix86_fp_comparison_arithmetics_cost for the metrics. */
7237 static int
7238 ix86_fp_comparison_cost (code)
7239 enum rtx_code code;
7241 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
7242 int min;
7244 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
7245 sahf_cost = ix86_fp_comparison_sahf_cost (code);
7247 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
7248 if (min > sahf_cost)
7249 min = sahf_cost;
7250 if (min > fcomi_cost)
7251 min = fcomi_cost;
7252 return min;
7255 /* Generate insn patterns to do a floating point compare of OPERANDS. */
7257 static rtx
7258 ix86_expand_fp_compare (code, op0, op1, scratch, second_test, bypass_test)
7259 enum rtx_code code;
7260 rtx op0, op1, scratch;
7261 rtx *second_test;
7262 rtx *bypass_test;
7264 enum machine_mode fpcmp_mode, intcmp_mode;
7265 rtx tmp, tmp2;
7266 int cost = ix86_fp_comparison_cost (code);
7267 enum rtx_code bypass_code, first_code, second_code;
7269 fpcmp_mode = ix86_fp_compare_mode (code);
7270 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
7272 if (second_test)
7273 *second_test = NULL_RTX;
7274 if (bypass_test)
7275 *bypass_test = NULL_RTX;
7277 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
7279 /* Do fcomi/sahf based test when profitable. */
7280 if ((bypass_code == NIL || bypass_test)
7281 && (second_code == NIL || second_test)
7282 && ix86_fp_comparison_arithmetics_cost (code) > cost)
7284 if (TARGET_CMOVE)
7286 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
7287 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
7288 tmp);
7289 emit_insn (tmp);
7291 else
7293 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
7294 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), 9);
7295 if (!scratch)
7296 scratch = gen_reg_rtx (HImode);
7297 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
7298 emit_insn (gen_x86_sahf_1 (scratch));
7301 /* The FP codes work out to act like unsigned. */
7302 intcmp_mode = fpcmp_mode;
7303 code = first_code;
7304 if (bypass_code != NIL)
7305 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
7306 gen_rtx_REG (intcmp_mode, FLAGS_REG),
7307 const0_rtx);
7308 if (second_code != NIL)
7309 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
7310 gen_rtx_REG (intcmp_mode, FLAGS_REG),
7311 const0_rtx);
7313 else
7315 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
7316 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
7317 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), 9);
7318 if (!scratch)
7319 scratch = gen_reg_rtx (HImode);
7320 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
7322 /* In the unordered case, we have to check C2 for NaN's, which
7323 doesn't happen to work out to anything nice combination-wise.
7324 So do some bit twiddling on the value we've got in AH to come
7325 up with an appropriate set of condition codes. */
7327 intcmp_mode = CCNOmode;
7328 switch (code)
7330 case GT:
7331 case UNGT:
7332 if (code == GT || !TARGET_IEEE_FP)
7334 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
7335 code = EQ;
7337 else
7339 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
7340 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
7341 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
7342 intcmp_mode = CCmode;
7343 code = GEU;
7345 break;
7346 case LT:
7347 case UNLT:
7348 if (code == LT && TARGET_IEEE_FP)
7350 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
7351 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
7352 intcmp_mode = CCmode;
7353 code = EQ;
7355 else
7357 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
7358 code = NE;
7360 break;
7361 case GE:
7362 case UNGE:
7363 if (code == GE || !TARGET_IEEE_FP)
7365 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
7366 code = EQ;
7368 else
7370 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
7371 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
7372 GEN_INT (0x01)));
7373 code = NE;
7375 break;
7376 case LE:
7377 case UNLE:
7378 if (code == LE && TARGET_IEEE_FP)
7380 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
7381 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
7382 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
7383 intcmp_mode = CCmode;
7384 code = LTU;
7386 else
7388 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
7389 code = NE;
7391 break;
7392 case EQ:
7393 case UNEQ:
7394 if (code == EQ && TARGET_IEEE_FP)
7396 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
7397 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
7398 intcmp_mode = CCmode;
7399 code = EQ;
7401 else
7403 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
7404 code = NE;
7405 break;
7407 break;
7408 case NE:
7409 case LTGT:
7410 if (code == NE && TARGET_IEEE_FP)
7412 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
7413 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
7414 GEN_INT (0x40)));
7415 code = NE;
7417 else
7419 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
7420 code = EQ;
7422 break;
7424 case UNORDERED:
7425 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
7426 code = NE;
7427 break;
7428 case ORDERED:
7429 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
7430 code = EQ;
7431 break;
7433 default:
7434 abort ();
7438 /* Return the test that should be put into the flags user, i.e.
7439 the bcc, scc, or cmov instruction. */
7440 return gen_rtx_fmt_ee (code, VOIDmode,
7441 gen_rtx_REG (intcmp_mode, FLAGS_REG),
7442 const0_rtx);
7446 ix86_expand_compare (code, second_test, bypass_test)
7447 enum rtx_code code;
7448 rtx *second_test, *bypass_test;
7450 rtx op0, op1, ret;
7451 op0 = ix86_compare_op0;
7452 op1 = ix86_compare_op1;
7454 if (second_test)
7455 *second_test = NULL_RTX;
7456 if (bypass_test)
7457 *bypass_test = NULL_RTX;
7459 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
7460 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
7461 second_test, bypass_test);
7462 else
7463 ret = ix86_expand_int_compare (code, op0, op1);
7465 return ret;
7468 /* Return true if the CODE will result in nontrivial jump sequence. */
7469 bool
7470 ix86_fp_jump_nontrivial_p (code)
7471 enum rtx_code code;
7473 enum rtx_code bypass_code, first_code, second_code;
7474 if (!TARGET_CMOVE)
7475 return true;
7476 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
7477 return bypass_code != NIL || second_code != NIL;
7480 void
7481 ix86_expand_branch (code, label)
7482 enum rtx_code code;
7483 rtx label;
7485 rtx tmp;
7487 switch (GET_MODE (ix86_compare_op0))
7489 case QImode:
7490 case HImode:
7491 case SImode:
7492 simple:
7493 tmp = ix86_expand_compare (code, NULL, NULL);
7494 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
7495 gen_rtx_LABEL_REF (VOIDmode, label),
7496 pc_rtx);
7497 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
7498 return;
7500 case SFmode:
7501 case DFmode:
7502 case XFmode:
7503 case TFmode:
7505 rtvec vec;
7506 int use_fcomi;
7507 enum rtx_code bypass_code, first_code, second_code;
7509 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
7510 &ix86_compare_op1);
7512 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
7514 /* Check whether we will use the natural sequence with one jump. If
7515 so, we can expand jump early. Otherwise delay expansion by
7516 creating compound insn to not confuse optimizers. */
7517 if (bypass_code == NIL && second_code == NIL
7518 && TARGET_CMOVE)
7520 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
7521 gen_rtx_LABEL_REF (VOIDmode, label),
7522 pc_rtx, NULL_RTX);
7524 else
7526 tmp = gen_rtx_fmt_ee (code, VOIDmode,
7527 ix86_compare_op0, ix86_compare_op1);
7528 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
7529 gen_rtx_LABEL_REF (VOIDmode, label),
7530 pc_rtx);
7531 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
7533 use_fcomi = ix86_use_fcomi_compare (code);
7534 vec = rtvec_alloc (3 + !use_fcomi);
7535 RTVEC_ELT (vec, 0) = tmp;
7536 RTVEC_ELT (vec, 1)
7537 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
7538 RTVEC_ELT (vec, 2)
7539 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
7540 if (! use_fcomi)
7541 RTVEC_ELT (vec, 3)
7542 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
7544 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
7546 return;
7549 case DImode:
7550 if (TARGET_64BIT)
7551 goto simple;
7552 /* Expand DImode branch into multiple compare+branch. */
7554 rtx lo[2], hi[2], label2;
7555 enum rtx_code code1, code2, code3;
7557 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
7559 tmp = ix86_compare_op0;
7560 ix86_compare_op0 = ix86_compare_op1;
7561 ix86_compare_op1 = tmp;
7562 code = swap_condition (code);
7564 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
7565 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
7567 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
7568 avoid two branches. This costs one extra insn, so disable when
7569 optimizing for size. */
7571 if ((code == EQ || code == NE)
7572 && (!optimize_size
7573 || hi[1] == const0_rtx || lo[1] == const0_rtx))
7575 rtx xor0, xor1;
7577 xor1 = hi[0];
7578 if (hi[1] != const0_rtx)
7579 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
7580 NULL_RTX, 0, OPTAB_WIDEN);
7582 xor0 = lo[0];
7583 if (lo[1] != const0_rtx)
7584 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
7585 NULL_RTX, 0, OPTAB_WIDEN);
7587 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
7588 NULL_RTX, 0, OPTAB_WIDEN);
7590 ix86_compare_op0 = tmp;
7591 ix86_compare_op1 = const0_rtx;
7592 ix86_expand_branch (code, label);
7593 return;
7596 /* Otherwise, if we are doing less-than or greater-or-equal-than,
7597 op1 is a constant and the low word is zero, then we can just
7598 examine the high word. */
7600 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
7601 switch (code)
7603 case LT: case LTU: case GE: case GEU:
7604 ix86_compare_op0 = hi[0];
7605 ix86_compare_op1 = hi[1];
7606 ix86_expand_branch (code, label);
7607 return;
7608 default:
7609 break;
7612 /* Otherwise, we need two or three jumps. */
7614 label2 = gen_label_rtx ();
7616 code1 = code;
7617 code2 = swap_condition (code);
7618 code3 = unsigned_condition (code);
7620 switch (code)
7622 case LT: case GT: case LTU: case GTU:
7623 break;
7625 case LE: code1 = LT; code2 = GT; break;
7626 case GE: code1 = GT; code2 = LT; break;
7627 case LEU: code1 = LTU; code2 = GTU; break;
7628 case GEU: code1 = GTU; code2 = LTU; break;
7630 case EQ: code1 = NIL; code2 = NE; break;
7631 case NE: code2 = NIL; break;
7633 default:
7634 abort ();
7638 * a < b =>
7639 * if (hi(a) < hi(b)) goto true;
7640 * if (hi(a) > hi(b)) goto false;
7641 * if (lo(a) < lo(b)) goto true;
7642 * false:
7645 ix86_compare_op0 = hi[0];
7646 ix86_compare_op1 = hi[1];
7648 if (code1 != NIL)
7649 ix86_expand_branch (code1, label);
7650 if (code2 != NIL)
7651 ix86_expand_branch (code2, label2);
7653 ix86_compare_op0 = lo[0];
7654 ix86_compare_op1 = lo[1];
7655 ix86_expand_branch (code3, label);
7657 if (code2 != NIL)
7658 emit_label (label2);
7659 return;
7662 default:
7663 abort ();
7667 /* Split branch based on floating point condition. */
7668 void
7669 ix86_split_fp_branch (code, op1, op2, target1, target2, tmp)
7670 enum rtx_code code;
7671 rtx op1, op2, target1, target2, tmp;
7673 rtx second, bypass;
7674 rtx label = NULL_RTX;
7675 rtx condition;
7676 int bypass_probability = -1, second_probability = -1, probability = -1;
7677 rtx i;
7679 if (target2 != pc_rtx)
7681 rtx tmp = target2;
7682 code = reverse_condition_maybe_unordered (code);
7683 target2 = target1;
7684 target1 = tmp;
7687 condition = ix86_expand_fp_compare (code, op1, op2,
7688 tmp, &second, &bypass);
7690 if (split_branch_probability >= 0)
7692 /* Distribute the probabilities across the jumps.
7693 Assume the BYPASS and SECOND to be always test
7694 for UNORDERED. */
7695 probability = split_branch_probability;
7697 /* Value of 1 is low enought to make no need for probability
7698 to be updated. Later we may run some experiments and see
7699 if unordered values are more frequent in practice. */
7700 if (bypass)
7701 bypass_probability = 1;
7702 if (second)
7703 second_probability = 1;
7705 if (bypass != NULL_RTX)
7707 label = gen_label_rtx ();
7708 i = emit_jump_insn (gen_rtx_SET
7709 (VOIDmode, pc_rtx,
7710 gen_rtx_IF_THEN_ELSE (VOIDmode,
7711 bypass,
7712 gen_rtx_LABEL_REF (VOIDmode,
7713 label),
7714 pc_rtx)));
7715 if (bypass_probability >= 0)
7716 REG_NOTES (i)
7717 = gen_rtx_EXPR_LIST (REG_BR_PROB,
7718 GEN_INT (bypass_probability),
7719 REG_NOTES (i));
7721 i = emit_jump_insn (gen_rtx_SET
7722 (VOIDmode, pc_rtx,
7723 gen_rtx_IF_THEN_ELSE (VOIDmode,
7724 condition, target1, target2)));
7725 if (probability >= 0)
7726 REG_NOTES (i)
7727 = gen_rtx_EXPR_LIST (REG_BR_PROB,
7728 GEN_INT (probability),
7729 REG_NOTES (i));
7730 if (second != NULL_RTX)
7732 i = emit_jump_insn (gen_rtx_SET
7733 (VOIDmode, pc_rtx,
7734 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
7735 target2)));
7736 if (second_probability >= 0)
7737 REG_NOTES (i)
7738 = gen_rtx_EXPR_LIST (REG_BR_PROB,
7739 GEN_INT (second_probability),
7740 REG_NOTES (i));
7742 if (label != NULL_RTX)
7743 emit_label (label);
7747 ix86_expand_setcc (code, dest)
7748 enum rtx_code code;
7749 rtx dest;
7751 rtx ret, tmp, tmpreg;
7752 rtx second_test, bypass_test;
7753 int type;
7755 if (GET_MODE (ix86_compare_op0) == DImode
7756 && !TARGET_64BIT)
7757 return 0; /* FAIL */
7759 /* Three modes of generation:
7760 0 -- destination does not overlap compare sources:
7761 clear dest first, emit strict_low_part setcc.
7762 1 -- destination does overlap compare sources:
7763 emit subreg setcc, zero extend.
7764 2 -- destination is in QImode:
7765 emit setcc only.
7767 We don't use mode 0 early in compilation because it confuses CSE.
7768 There are peepholes to turn mode 1 into mode 0 if things work out
7769 nicely after reload. */
7771 type = cse_not_expected ? 0 : 1;
7773 if (GET_MODE (dest) == QImode)
7774 type = 2;
7775 else if (reg_overlap_mentioned_p (dest, ix86_compare_op0)
7776 || reg_overlap_mentioned_p (dest, ix86_compare_op1))
7777 type = 1;
7779 if (type == 0)
7780 emit_move_insn (dest, const0_rtx);
7782 ret = ix86_expand_compare (code, &second_test, &bypass_test);
7783 PUT_MODE (ret, QImode);
7785 tmp = dest;
7786 tmpreg = dest;
7787 if (type == 0)
7789 tmp = gen_lowpart (QImode, dest);
7790 tmpreg = tmp;
7791 tmp = gen_rtx_STRICT_LOW_PART (VOIDmode, tmp);
7793 else if (type == 1)
7795 if (!cse_not_expected)
7796 tmp = gen_reg_rtx (QImode);
7797 else
7798 tmp = gen_lowpart (QImode, dest);
7799 tmpreg = tmp;
7802 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
7803 if (bypass_test || second_test)
7805 rtx test = second_test;
7806 int bypass = 0;
7807 rtx tmp2 = gen_reg_rtx (QImode);
7808 if (bypass_test)
7810 if (second_test)
7811 abort();
7812 test = bypass_test;
7813 bypass = 1;
7814 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
7816 PUT_MODE (test, QImode);
7817 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
7819 if (bypass)
7820 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
7821 else
7822 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
7825 if (type == 1)
7827 rtx clob;
7829 tmp = gen_rtx_ZERO_EXTEND (GET_MODE (dest), tmp);
7830 tmp = gen_rtx_SET (VOIDmode, dest, tmp);
7831 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
7832 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
7833 emit_insn (tmp);
7836 return 1; /* DONE */
7840 ix86_expand_int_movcc (operands)
7841 rtx operands[];
7843 enum rtx_code code = GET_CODE (operands[1]), compare_code;
7844 rtx compare_seq, compare_op;
7845 rtx second_test, bypass_test;
7847 /* When the compare code is not LTU or GEU, we can not use sbbl case.
7848 In case comparsion is done with immediate, we can convert it to LTU or
7849 GEU by altering the integer. */
7851 if ((code == LEU || code == GTU)
7852 && GET_CODE (ix86_compare_op1) == CONST_INT
7853 && GET_MODE (operands[0]) != HImode
7854 && (unsigned int)INTVAL (ix86_compare_op1) != 0xffffffff
7855 && GET_CODE (operands[2]) == CONST_INT
7856 && GET_CODE (operands[3]) == CONST_INT)
7858 if (code == LEU)
7859 code = LTU;
7860 else
7861 code = GEU;
7862 ix86_compare_op1 = GEN_INT (INTVAL (ix86_compare_op1) + 1);
7865 start_sequence ();
7866 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
7867 compare_seq = gen_sequence ();
7868 end_sequence ();
7870 compare_code = GET_CODE (compare_op);
7872 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
7873 HImode insns, we'd be swallowed in word prefix ops. */
7875 if (GET_MODE (operands[0]) != HImode
7876 && (GET_MODE (operands[0]) != DImode || TARGET_64BIT)
7877 && GET_CODE (operands[2]) == CONST_INT
7878 && GET_CODE (operands[3]) == CONST_INT)
7880 rtx out = operands[0];
7881 HOST_WIDE_INT ct = INTVAL (operands[2]);
7882 HOST_WIDE_INT cf = INTVAL (operands[3]);
7883 HOST_WIDE_INT diff;
7885 if ((compare_code == LTU || compare_code == GEU)
7886 && !second_test && !bypass_test)
7889 /* Detect overlap between destination and compare sources. */
7890 rtx tmp = out;
7892 /* To simplify rest of code, restrict to the GEU case. */
7893 if (compare_code == LTU)
7895 int tmp = ct;
7896 ct = cf;
7897 cf = tmp;
7898 compare_code = reverse_condition (compare_code);
7899 code = reverse_condition (code);
7901 diff = ct - cf;
7903 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
7904 || reg_overlap_mentioned_p (out, ix86_compare_op1))
7905 tmp = gen_reg_rtx (GET_MODE (operands[0]));
7907 emit_insn (compare_seq);
7908 if (GET_MODE (tmp) == DImode)
7909 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp));
7910 else
7911 emit_insn (gen_x86_movsicc_0_m1 (tmp));
7913 if (diff == 1)
7916 * cmpl op0,op1
7917 * sbbl dest,dest
7918 * [addl dest, ct]
7920 * Size 5 - 8.
7922 if (ct)
7924 if (GET_MODE (tmp) == DImode)
7925 emit_insn (gen_adddi3 (tmp, tmp, GEN_INT (ct)));
7926 else
7927 emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (ct)));
7930 else if (cf == -1)
7933 * cmpl op0,op1
7934 * sbbl dest,dest
7935 * orl $ct, dest
7937 * Size 8.
7939 if (GET_MODE (tmp) == DImode)
7940 emit_insn (gen_iordi3 (tmp, tmp, GEN_INT (ct)));
7941 else
7942 emit_insn (gen_iorsi3 (tmp, tmp, GEN_INT (ct)));
7944 else if (diff == -1 && ct)
7947 * cmpl op0,op1
7948 * sbbl dest,dest
7949 * xorl $-1, dest
7950 * [addl dest, cf]
7952 * Size 8 - 11.
7954 if (GET_MODE (tmp) == DImode)
7956 emit_insn (gen_one_cmpldi2 (tmp, tmp));
7957 if (cf)
7958 emit_insn (gen_adddi3 (tmp, tmp, GEN_INT (cf)));
7960 else
7962 emit_insn (gen_one_cmplsi2 (tmp, tmp));
7963 if (cf)
7964 emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (cf)));
7967 else
7970 * cmpl op0,op1
7971 * sbbl dest,dest
7972 * andl cf - ct, dest
7973 * [addl dest, ct]
7975 * Size 8 - 11.
7977 if (GET_MODE (tmp) == DImode)
7979 emit_insn (gen_anddi3 (tmp, tmp, GEN_INT (trunc_int_for_mode
7980 (cf - ct, DImode))));
7981 if (ct)
7982 emit_insn (gen_adddi3 (tmp, tmp, GEN_INT (ct)));
7984 else
7986 emit_insn (gen_andsi3 (tmp, tmp, GEN_INT (trunc_int_for_mode
7987 (cf - ct, SImode))));
7988 if (ct)
7989 emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (ct)));
7993 if (tmp != out)
7994 emit_move_insn (out, tmp);
7996 return 1; /* DONE */
7999 diff = ct - cf;
8000 if (diff < 0)
8002 HOST_WIDE_INT tmp;
8003 tmp = ct, ct = cf, cf = tmp;
8004 diff = -diff;
8005 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
8007 /* We may be reversing unordered compare to normal compare, that
8008 is not valid in general (we may convert non-trapping condition
8009 to trapping one), however on i386 we currently emit all
8010 comparisons unordered. */
8011 compare_code = reverse_condition_maybe_unordered (compare_code);
8012 code = reverse_condition_maybe_unordered (code);
8014 else
8016 compare_code = reverse_condition (compare_code);
8017 code = reverse_condition (code);
8020 if (diff == 1 || diff == 2 || diff == 4 || diff == 8
8021 || diff == 3 || diff == 5 || diff == 9)
8024 * xorl dest,dest
8025 * cmpl op1,op2
8026 * setcc dest
8027 * lea cf(dest*(ct-cf)),dest
8029 * Size 14.
8031 * This also catches the degenerate setcc-only case.
8034 rtx tmp;
8035 int nops;
8037 out = emit_store_flag (out, code, ix86_compare_op0,
8038 ix86_compare_op1, VOIDmode, 0, 1);
8040 nops = 0;
8041 /* On x86_64 the lea instruction operates on Pmode, so we need to get arithmetics
8042 done in proper mode to match. */
8043 if (diff == 1)
8044 tmp = out;
8045 else
8047 rtx out1;
8048 out1 = out;
8049 tmp = gen_rtx_MULT (GET_MODE (out), out1, GEN_INT (diff & ~1));
8050 nops++;
8051 if (diff & 1)
8053 tmp = gen_rtx_PLUS (GET_MODE (out), tmp, out1);
8054 nops++;
8057 if (cf != 0)
8059 tmp = gen_rtx_PLUS (GET_MODE (out), tmp, GEN_INT (cf));
8060 nops++;
8062 if (tmp != out
8063 && (GET_CODE (tmp) != SUBREG || SUBREG_REG (tmp) != out))
8065 if (nops == 1)
8067 rtx clob;
8069 clob = gen_rtx_REG (CCmode, FLAGS_REG);
8070 clob = gen_rtx_CLOBBER (VOIDmode, clob);
8072 tmp = gen_rtx_SET (VOIDmode, out, tmp);
8073 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
8074 emit_insn (tmp);
8076 else
8077 emit_insn (gen_rtx_SET (VOIDmode, out, tmp));
8079 if (out != operands[0])
8080 emit_move_insn (operands[0], out);
8082 return 1; /* DONE */
8086 * General case: Jumpful:
8087 * xorl dest,dest cmpl op1, op2
8088 * cmpl op1, op2 movl ct, dest
8089 * setcc dest jcc 1f
8090 * decl dest movl cf, dest
8091 * andl (cf-ct),dest 1:
8092 * addl ct,dest
8094 * Size 20. Size 14.
8096 * This is reasonably steep, but branch mispredict costs are
8097 * high on modern cpus, so consider failing only if optimizing
8098 * for space.
8100 * %%% Parameterize branch_cost on the tuning architecture, then
8101 * use that. The 80386 couldn't care less about mispredicts.
8104 if (!optimize_size && !TARGET_CMOVE)
8106 if (ct == 0)
8108 ct = cf;
8109 cf = 0;
8110 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
8112 /* We may be reversing unordered compare to normal compare,
8113 that is not valid in general (we may convert non-trapping
8114 condition to trapping one), however on i386 we currently
8115 emit all comparisons unordered. */
8116 compare_code = reverse_condition_maybe_unordered (compare_code);
8117 code = reverse_condition_maybe_unordered (code);
8119 else
8121 compare_code = reverse_condition (compare_code);
8122 code = reverse_condition (code);
8126 out = emit_store_flag (out, code, ix86_compare_op0,
8127 ix86_compare_op1, VOIDmode, 0, 1);
8129 emit_insn (gen_addsi3 (out, out, constm1_rtx));
8130 emit_insn (gen_andsi3 (out, out, GEN_INT (trunc_int_for_mode
8131 (cf - ct, SImode))));
8132 if (ct != 0)
8133 emit_insn (gen_addsi3 (out, out, GEN_INT (ct)));
8134 if (out != operands[0])
8135 emit_move_insn (operands[0], out);
8137 return 1; /* DONE */
8141 if (!TARGET_CMOVE)
8143 /* Try a few things more with specific constants and a variable. */
8145 optab op;
8146 rtx var, orig_out, out, tmp;
8148 if (optimize_size)
8149 return 0; /* FAIL */
8151 /* If one of the two operands is an interesting constant, load a
8152 constant with the above and mask it in with a logical operation. */
8154 if (GET_CODE (operands[2]) == CONST_INT)
8156 var = operands[3];
8157 if (INTVAL (operands[2]) == 0)
8158 operands[3] = constm1_rtx, op = and_optab;
8159 else if (INTVAL (operands[2]) == -1)
8160 operands[3] = const0_rtx, op = ior_optab;
8161 else
8162 return 0; /* FAIL */
8164 else if (GET_CODE (operands[3]) == CONST_INT)
8166 var = operands[2];
8167 if (INTVAL (operands[3]) == 0)
8168 operands[2] = constm1_rtx, op = and_optab;
8169 else if (INTVAL (operands[3]) == -1)
8170 operands[2] = const0_rtx, op = ior_optab;
8171 else
8172 return 0; /* FAIL */
8174 else
8175 return 0; /* FAIL */
8177 orig_out = operands[0];
8178 tmp = gen_reg_rtx (GET_MODE (orig_out));
8179 operands[0] = tmp;
8181 /* Recurse to get the constant loaded. */
8182 if (ix86_expand_int_movcc (operands) == 0)
8183 return 0; /* FAIL */
8185 /* Mask in the interesting variable. */
8186 out = expand_binop (GET_MODE (orig_out), op, var, tmp, orig_out, 0,
8187 OPTAB_WIDEN);
8188 if (out != orig_out)
8189 emit_move_insn (orig_out, out);
8191 return 1; /* DONE */
8195 * For comparison with above,
8197 * movl cf,dest
8198 * movl ct,tmp
8199 * cmpl op1,op2
8200 * cmovcc tmp,dest
8202 * Size 15.
8205 if (! nonimmediate_operand (operands[2], GET_MODE (operands[0])))
8206 operands[2] = force_reg (GET_MODE (operands[0]), operands[2]);
8207 if (! nonimmediate_operand (operands[3], GET_MODE (operands[0])))
8208 operands[3] = force_reg (GET_MODE (operands[0]), operands[3]);
8210 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
8212 rtx tmp = gen_reg_rtx (GET_MODE (operands[0]));
8213 emit_move_insn (tmp, operands[3]);
8214 operands[3] = tmp;
8216 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
8218 rtx tmp = gen_reg_rtx (GET_MODE (operands[0]));
8219 emit_move_insn (tmp, operands[2]);
8220 operands[2] = tmp;
8222 if (! register_operand (operands[2], VOIDmode)
8223 && ! register_operand (operands[3], VOIDmode))
8224 operands[2] = force_reg (GET_MODE (operands[0]), operands[2]);
8226 emit_insn (compare_seq);
8227 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
8228 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
8229 compare_op, operands[2],
8230 operands[3])));
8231 if (bypass_test)
8232 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
8233 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
8234 bypass_test,
8235 operands[3],
8236 operands[0])));
8237 if (second_test)
8238 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
8239 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
8240 second_test,
8241 operands[2],
8242 operands[0])));
8244 return 1; /* DONE */
8248 ix86_expand_fp_movcc (operands)
8249 rtx operands[];
8251 enum rtx_code code;
8252 rtx tmp;
8253 rtx compare_op, second_test, bypass_test;
8255 /* For SF/DFmode conditional moves based on comparisons
8256 in same mode, we may want to use SSE min/max instructions. */
8257 if (((TARGET_SSE && GET_MODE (operands[0]) == SFmode)
8258 || (TARGET_SSE2 && GET_MODE (operands[0]) == DFmode))
8259 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
8260 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
8261 && (!TARGET_IEEE_FP
8262 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
8263 /* We may be called from the post-reload splitter. */
8264 && (!REG_P (operands[0])
8265 || SSE_REG_P (operands[0])
8266 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
8268 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
8269 code = GET_CODE (operands[1]);
8271 /* See if we have (cross) match between comparison operands and
8272 conditional move operands. */
8273 if (rtx_equal_p (operands[2], op1))
8275 rtx tmp = op0;
8276 op0 = op1;
8277 op1 = tmp;
8278 code = reverse_condition_maybe_unordered (code);
8280 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
8282 /* Check for min operation. */
8283 if (code == LT)
8285 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
8286 if (memory_operand (op0, VOIDmode))
8287 op0 = force_reg (GET_MODE (operands[0]), op0);
8288 if (GET_MODE (operands[0]) == SFmode)
8289 emit_insn (gen_minsf3 (operands[0], op0, op1));
8290 else
8291 emit_insn (gen_mindf3 (operands[0], op0, op1));
8292 return 1;
8294 /* Check for max operation. */
8295 if (code == GT)
8297 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
8298 if (memory_operand (op0, VOIDmode))
8299 op0 = force_reg (GET_MODE (operands[0]), op0);
8300 if (GET_MODE (operands[0]) == SFmode)
8301 emit_insn (gen_maxsf3 (operands[0], op0, op1));
8302 else
8303 emit_insn (gen_maxdf3 (operands[0], op0, op1));
8304 return 1;
8307 /* Manage condition to be sse_comparison_operator. In case we are
8308 in non-ieee mode, try to canonicalize the destination operand
8309 to be first in the comparison - this helps reload to avoid extra
8310 moves. */
8311 if (!sse_comparison_operator (operands[1], VOIDmode)
8312 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
8314 rtx tmp = ix86_compare_op0;
8315 ix86_compare_op0 = ix86_compare_op1;
8316 ix86_compare_op1 = tmp;
8317 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
8318 VOIDmode, ix86_compare_op0,
8319 ix86_compare_op1);
8321 /* Similary try to manage result to be first operand of conditional
8322 move. We also don't support the NE comparison on SSE, so try to
8323 avoid it. */
8324 if ((rtx_equal_p (operands[0], operands[3])
8325 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
8326 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
8328 rtx tmp = operands[2];
8329 operands[2] = operands[3];
8330 operands[3] = tmp;
8331 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
8332 (GET_CODE (operands[1])),
8333 VOIDmode, ix86_compare_op0,
8334 ix86_compare_op1);
8336 if (GET_MODE (operands[0]) == SFmode)
8337 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
8338 operands[2], operands[3],
8339 ix86_compare_op0, ix86_compare_op1));
8340 else
8341 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
8342 operands[2], operands[3],
8343 ix86_compare_op0, ix86_compare_op1));
8344 return 1;
8347 /* The floating point conditional move instructions don't directly
8348 support conditions resulting from a signed integer comparison. */
8350 code = GET_CODE (operands[1]);
8351 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
8353 /* The floating point conditional move instructions don't directly
8354 support signed integer comparisons. */
8356 if (!fcmov_comparison_operator (compare_op, VOIDmode))
8358 if (second_test != NULL || bypass_test != NULL)
8359 abort();
8360 tmp = gen_reg_rtx (QImode);
8361 ix86_expand_setcc (code, tmp);
8362 code = NE;
8363 ix86_compare_op0 = tmp;
8364 ix86_compare_op1 = const0_rtx;
8365 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
8367 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
8369 tmp = gen_reg_rtx (GET_MODE (operands[0]));
8370 emit_move_insn (tmp, operands[3]);
8371 operands[3] = tmp;
8373 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
8375 tmp = gen_reg_rtx (GET_MODE (operands[0]));
8376 emit_move_insn (tmp, operands[2]);
8377 operands[2] = tmp;
8380 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
8381 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
8382 compare_op,
8383 operands[2],
8384 operands[3])));
8385 if (bypass_test)
8386 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
8387 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
8388 bypass_test,
8389 operands[3],
8390 operands[0])));
8391 if (second_test)
8392 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
8393 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
8394 second_test,
8395 operands[2],
8396 operands[0])));
8398 return 1;
8401 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
8402 works for floating pointer parameters and nonoffsetable memories.
8403 For pushes, it returns just stack offsets; the values will be saved
8404 in the right order. Maximally three parts are generated. */
8406 static int
8407 ix86_split_to_parts (operand, parts, mode)
8408 rtx operand;
8409 rtx *parts;
8410 enum machine_mode mode;
8412 int size;
8414 if (!TARGET_64BIT)
8415 size = mode == TFmode ? 3 : (GET_MODE_SIZE (mode) / 4);
8416 else
8417 size = (GET_MODE_SIZE (mode) + 4) / 8;
8419 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
8420 abort ();
8421 if (size < 2 || size > 3)
8422 abort ();
8424 /* Optimize constant pool reference to immediates. This is used by fp moves,
8425 that force all constants to memory to allow combining. */
8427 if (GET_CODE (operand) == MEM
8428 && GET_CODE (XEXP (operand, 0)) == SYMBOL_REF
8429 && CONSTANT_POOL_ADDRESS_P (XEXP (operand, 0)))
8430 operand = get_pool_constant (XEXP (operand, 0));
8432 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
8434 /* The only non-offsetable memories we handle are pushes. */
8435 if (! push_operand (operand, VOIDmode))
8436 abort ();
8438 operand = copy_rtx (operand);
8439 PUT_MODE (operand, Pmode);
8440 parts[0] = parts[1] = parts[2] = operand;
8442 else if (!TARGET_64BIT)
8444 if (mode == DImode)
8445 split_di (&operand, 1, &parts[0], &parts[1]);
8446 else
8448 if (REG_P (operand))
8450 if (!reload_completed)
8451 abort ();
8452 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
8453 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
8454 if (size == 3)
8455 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
8457 else if (offsettable_memref_p (operand))
8459 operand = adjust_address (operand, SImode, 0);
8460 parts[0] = operand;
8461 parts[1] = adjust_address (operand, SImode, 4);
8462 if (size == 3)
8463 parts[2] = adjust_address (operand, SImode, 8);
8465 else if (GET_CODE (operand) == CONST_DOUBLE)
8467 REAL_VALUE_TYPE r;
8468 long l[4];
8470 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
8471 switch (mode)
8473 case XFmode:
8474 case TFmode:
8475 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
8476 parts[2] = GEN_INT (trunc_int_for_mode (l[2], SImode));
8477 break;
8478 case DFmode:
8479 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
8480 break;
8481 default:
8482 abort ();
8484 parts[1] = GEN_INT (trunc_int_for_mode (l[1], SImode));
8485 parts[0] = GEN_INT (trunc_int_for_mode (l[0], SImode));
8487 else
8488 abort ();
8491 else
8493 if (mode == XFmode || mode == TFmode)
8495 if (REG_P (operand))
8497 if (!reload_completed)
8498 abort ();
8499 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
8500 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
8502 else if (offsettable_memref_p (operand))
8504 operand = adjust_address (operand, DImode, 0);
8505 parts[0] = operand;
8506 parts[1] = adjust_address (operand, SImode, 8);
8508 else if (GET_CODE (operand) == CONST_DOUBLE)
8510 REAL_VALUE_TYPE r;
8511 long l[3];
8513 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
8514 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
8515 /* Do not use shift by 32 to avoid warning on 32bit systems. */
8516 if (HOST_BITS_PER_WIDE_INT >= 64)
8517 parts[0]
8518 = GEN_INT (trunc_int_for_mode (l[0] + ((l[1] << 31) << 1),
8519 SImode));
8520 else
8521 parts[0] = immed_double_const (l[0], l[1], DImode);
8522 parts[1] = GEN_INT (trunc_int_for_mode (l[2], SImode));
8524 else
8525 abort ();
8529 return size;
8532 /* Emit insns to perform a move or push of DI, DF, and XF values.
8533 Return false when normal moves are needed; true when all required
8534 insns have been emitted. Operands 2-4 contain the input values
8535 int the correct order; operands 5-7 contain the output values. */
8537 void
8538 ix86_split_long_move (operands)
8539 rtx operands[];
8541 rtx part[2][3];
8542 int nparts;
8543 int push = 0;
8544 int collisions = 0;
8545 enum machine_mode mode = GET_MODE (operands[0]);
8547 /* The DFmode expanders may ask us to move double.
8548 For 64bit target this is single move. By hiding the fact
8549 here we simplify i386.md splitters. */
8550 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
8552 /* Optimize constant pool reference to immediates. This is used by fp moves,
8553 that force all constants to memory to allow combining. */
8555 if (GET_CODE (operands[1]) == MEM
8556 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
8557 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
8558 operands[1] = get_pool_constant (XEXP (operands[1], 0));
8559 if (push_operand (operands[0], VOIDmode))
8561 operands[0] = copy_rtx (operands[0]);
8562 PUT_MODE (operands[0], Pmode);
8564 else
8565 operands[0] = gen_lowpart (DImode, operands[0]);
8566 operands[1] = gen_lowpart (DImode, operands[1]);
8567 emit_move_insn (operands[0], operands[1]);
8568 return;
8571 /* The only non-offsettable memory we handle is push. */
8572 if (push_operand (operands[0], VOIDmode))
8573 push = 1;
8574 else if (GET_CODE (operands[0]) == MEM
8575 && ! offsettable_memref_p (operands[0]))
8576 abort ();
8578 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
8579 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
8581 /* When emitting push, take care for source operands on the stack. */
8582 if (push && GET_CODE (operands[1]) == MEM
8583 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
8585 if (nparts == 3)
8586 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
8587 XEXP (part[1][2], 0));
8588 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
8589 XEXP (part[1][1], 0));
8592 /* We need to do copy in the right order in case an address register
8593 of the source overlaps the destination. */
8594 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
8596 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
8597 collisions++;
8598 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
8599 collisions++;
8600 if (nparts == 3
8601 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
8602 collisions++;
8604 /* Collision in the middle part can be handled by reordering. */
8605 if (collisions == 1 && nparts == 3
8606 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
8608 rtx tmp;
8609 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
8610 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
8613 /* If there are more collisions, we can't handle it by reordering.
8614 Do an lea to the last part and use only one colliding move. */
8615 else if (collisions > 1)
8617 collisions = 1;
8618 emit_insn (gen_rtx_SET (VOIDmode, part[0][nparts - 1],
8619 XEXP (part[1][0], 0)));
8620 part[1][0] = change_address (part[1][0],
8621 TARGET_64BIT ? DImode : SImode,
8622 part[0][nparts - 1]);
8623 part[1][1] = adjust_address (part[1][0], VOIDmode, UNITS_PER_WORD);
8624 if (nparts == 3)
8625 part[1][2] = adjust_address (part[1][0], VOIDmode, 8);
8629 if (push)
8631 if (!TARGET_64BIT)
8633 if (nparts == 3)
8635 /* We use only first 12 bytes of TFmode value, but for pushing we
8636 are required to adjust stack as if we were pushing real 16byte
8637 value. */
8638 if (mode == TFmode && !TARGET_64BIT)
8639 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
8640 GEN_INT (-4)));
8641 emit_move_insn (part[0][2], part[1][2]);
8644 else
8646 /* In 64bit mode we don't have 32bit push available. In case this is
8647 register, it is OK - we will just use larger counterpart. We also
8648 retype memory - these comes from attempt to avoid REX prefix on
8649 moving of second half of TFmode value. */
8650 if (GET_MODE (part[1][1]) == SImode)
8652 if (GET_CODE (part[1][1]) == MEM)
8653 part[1][1] = adjust_address (part[1][1], DImode, 0);
8654 else if (REG_P (part[1][1]))
8655 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
8656 else
8657 abort();
8658 if (GET_MODE (part[1][0]) == SImode)
8659 part[1][0] = part[1][1];
8662 emit_move_insn (part[0][1], part[1][1]);
8663 emit_move_insn (part[0][0], part[1][0]);
8664 return;
8667 /* Choose correct order to not overwrite the source before it is copied. */
8668 if ((REG_P (part[0][0])
8669 && REG_P (part[1][1])
8670 && (REGNO (part[0][0]) == REGNO (part[1][1])
8671 || (nparts == 3
8672 && REGNO (part[0][0]) == REGNO (part[1][2]))))
8673 || (collisions > 0
8674 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
8676 if (nparts == 3)
8678 operands[2] = part[0][2];
8679 operands[3] = part[0][1];
8680 operands[4] = part[0][0];
8681 operands[5] = part[1][2];
8682 operands[6] = part[1][1];
8683 operands[7] = part[1][0];
8685 else
8687 operands[2] = part[0][1];
8688 operands[3] = part[0][0];
8689 operands[5] = part[1][1];
8690 operands[6] = part[1][0];
8693 else
8695 if (nparts == 3)
8697 operands[2] = part[0][0];
8698 operands[3] = part[0][1];
8699 operands[4] = part[0][2];
8700 operands[5] = part[1][0];
8701 operands[6] = part[1][1];
8702 operands[7] = part[1][2];
8704 else
8706 operands[2] = part[0][0];
8707 operands[3] = part[0][1];
8708 operands[5] = part[1][0];
8709 operands[6] = part[1][1];
8712 emit_move_insn (operands[2], operands[5]);
8713 emit_move_insn (operands[3], operands[6]);
8714 if (nparts == 3)
8715 emit_move_insn (operands[4], operands[7]);
8717 return;
8720 void
8721 ix86_split_ashldi (operands, scratch)
8722 rtx *operands, scratch;
8724 rtx low[2], high[2];
8725 int count;
8727 if (GET_CODE (operands[2]) == CONST_INT)
8729 split_di (operands, 2, low, high);
8730 count = INTVAL (operands[2]) & 63;
8732 if (count >= 32)
8734 emit_move_insn (high[0], low[1]);
8735 emit_move_insn (low[0], const0_rtx);
8737 if (count > 32)
8738 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
8740 else
8742 if (!rtx_equal_p (operands[0], operands[1]))
8743 emit_move_insn (operands[0], operands[1]);
8744 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
8745 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
8748 else
8750 if (!rtx_equal_p (operands[0], operands[1]))
8751 emit_move_insn (operands[0], operands[1]);
8753 split_di (operands, 1, low, high);
8755 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
8756 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
8758 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
8760 if (! no_new_pseudos)
8761 scratch = force_reg (SImode, const0_rtx);
8762 else
8763 emit_move_insn (scratch, const0_rtx);
8765 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
8766 scratch));
8768 else
8769 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
8773 void
8774 ix86_split_ashrdi (operands, scratch)
8775 rtx *operands, scratch;
8777 rtx low[2], high[2];
8778 int count;
8780 if (GET_CODE (operands[2]) == CONST_INT)
8782 split_di (operands, 2, low, high);
8783 count = INTVAL (operands[2]) & 63;
8785 if (count >= 32)
8787 emit_move_insn (low[0], high[1]);
8789 if (! reload_completed)
8790 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
8791 else
8793 emit_move_insn (high[0], low[0]);
8794 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
8797 if (count > 32)
8798 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
8800 else
8802 if (!rtx_equal_p (operands[0], operands[1]))
8803 emit_move_insn (operands[0], operands[1]);
8804 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
8805 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
8808 else
8810 if (!rtx_equal_p (operands[0], operands[1]))
8811 emit_move_insn (operands[0], operands[1]);
8813 split_di (operands, 1, low, high);
8815 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
8816 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
8818 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
8820 if (! no_new_pseudos)
8821 scratch = gen_reg_rtx (SImode);
8822 emit_move_insn (scratch, high[0]);
8823 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
8824 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
8825 scratch));
8827 else
8828 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
8832 void
8833 ix86_split_lshrdi (operands, scratch)
8834 rtx *operands, scratch;
8836 rtx low[2], high[2];
8837 int count;
8839 if (GET_CODE (operands[2]) == CONST_INT)
8841 split_di (operands, 2, low, high);
8842 count = INTVAL (operands[2]) & 63;
8844 if (count >= 32)
8846 emit_move_insn (low[0], high[1]);
8847 emit_move_insn (high[0], const0_rtx);
8849 if (count > 32)
8850 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
8852 else
8854 if (!rtx_equal_p (operands[0], operands[1]))
8855 emit_move_insn (operands[0], operands[1]);
8856 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
8857 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
8860 else
8862 if (!rtx_equal_p (operands[0], operands[1]))
8863 emit_move_insn (operands[0], operands[1]);
8865 split_di (operands, 1, low, high);
8867 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
8868 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
8870 /* Heh. By reversing the arguments, we can reuse this pattern. */
8871 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
8873 if (! no_new_pseudos)
8874 scratch = force_reg (SImode, const0_rtx);
8875 else
8876 emit_move_insn (scratch, const0_rtx);
8878 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
8879 scratch));
8881 else
8882 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
8886 /* Helper function for the string operations below. Dest VARIABLE whether
8887 it is aligned to VALUE bytes. If true, jump to the label. */
8888 static rtx
8889 ix86_expand_aligntest (variable, value)
8890 rtx variable;
8891 int value;
8893 rtx label = gen_label_rtx ();
8894 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
8895 if (GET_MODE (variable) == DImode)
8896 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
8897 else
8898 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
8899 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
8900 1, 0, label);
8901 return label;
8904 /* Adjust COUNTER by the VALUE. */
8905 static void
8906 ix86_adjust_counter (countreg, value)
8907 rtx countreg;
8908 HOST_WIDE_INT value;
8910 if (GET_MODE (countreg) == DImode)
8911 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
8912 else
8913 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
8916 /* Zero extend possibly SImode EXP to Pmode register. */
8918 ix86_zero_extend_to_Pmode (exp)
8919 rtx exp;
8921 rtx r;
8922 if (GET_MODE (exp) == VOIDmode)
8923 return force_reg (Pmode, exp);
8924 if (GET_MODE (exp) == Pmode)
8925 return copy_to_mode_reg (Pmode, exp);
8926 r = gen_reg_rtx (Pmode);
8927 emit_insn (gen_zero_extendsidi2 (r, exp));
8928 return r;
8931 /* Expand string move (memcpy) operation. Use i386 string operations when
8932 profitable. expand_clrstr contains similar code. */
8934 ix86_expand_movstr (dst, src, count_exp, align_exp)
8935 rtx dst, src, count_exp, align_exp;
8937 rtx srcreg, destreg, countreg;
8938 enum machine_mode counter_mode;
8939 HOST_WIDE_INT align = 0;
8940 unsigned HOST_WIDE_INT count = 0;
8941 rtx insns;
8943 start_sequence ();
8945 if (GET_CODE (align_exp) == CONST_INT)
8946 align = INTVAL (align_exp);
8948 /* This simple hack avoids all inlining code and simplifies code bellow. */
8949 if (!TARGET_ALIGN_STRINGOPS)
8950 align = 64;
8952 if (GET_CODE (count_exp) == CONST_INT)
8953 count = INTVAL (count_exp);
8955 /* Figure out proper mode for counter. For 32bits it is always SImode,
8956 for 64bits use SImode when possible, otherwise DImode.
8957 Set count to number of bytes copied when known at compile time. */
8958 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
8959 || x86_64_zero_extended_value (count_exp))
8960 counter_mode = SImode;
8961 else
8962 counter_mode = DImode;
8964 if (counter_mode != SImode && counter_mode != DImode)
8965 abort ();
8967 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
8968 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
8970 emit_insn (gen_cld ());
8972 /* When optimizing for size emit simple rep ; movsb instruction for
8973 counts not divisible by 4. */
8975 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
8977 countreg = ix86_zero_extend_to_Pmode (count_exp);
8978 if (TARGET_64BIT)
8979 emit_insn (gen_rep_movqi_rex64 (destreg, srcreg, countreg,
8980 destreg, srcreg, countreg));
8981 else
8982 emit_insn (gen_rep_movqi (destreg, srcreg, countreg,
8983 destreg, srcreg, countreg));
8986 /* For constant aligned (or small unaligned) copies use rep movsl
8987 followed by code copying the rest. For PentiumPro ensure 8 byte
8988 alignment to allow rep movsl acceleration. */
8990 else if (count != 0
8991 && (align >= 8
8992 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
8993 || optimize_size || count < (unsigned int)64))
8995 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
8996 if (count & ~(size - 1))
8998 countreg = copy_to_mode_reg (counter_mode,
8999 GEN_INT ((count >> (size == 4 ? 2 : 3))
9000 & (TARGET_64BIT ? -1 : 0x3fffffff)));
9001 countreg = ix86_zero_extend_to_Pmode (countreg);
9002 if (size == 4)
9004 if (TARGET_64BIT)
9005 emit_insn (gen_rep_movsi_rex64 (destreg, srcreg, countreg,
9006 destreg, srcreg, countreg));
9007 else
9008 emit_insn (gen_rep_movsi (destreg, srcreg, countreg,
9009 destreg, srcreg, countreg));
9011 else
9012 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg,
9013 destreg, srcreg, countreg));
9015 if (size == 8 && (count & 0x04))
9016 emit_insn (gen_strmovsi (destreg, srcreg));
9017 if (count & 0x02)
9018 emit_insn (gen_strmovhi (destreg, srcreg));
9019 if (count & 0x01)
9020 emit_insn (gen_strmovqi (destreg, srcreg));
9022 /* The generic code based on the glibc implementation:
9023 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
9024 allowing accelerated copying there)
9025 - copy the data using rep movsl
9026 - copy the rest. */
9027 else
9029 rtx countreg2;
9030 rtx label = NULL;
9032 /* In case we don't know anything about the alignment, default to
9033 library version, since it is usually equally fast and result in
9034 shorter code. */
9035 if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD)
9037 end_sequence ();
9038 return 0;
9041 if (TARGET_SINGLE_STRINGOP)
9042 emit_insn (gen_cld ());
9044 countreg2 = gen_reg_rtx (Pmode);
9045 countreg = copy_to_mode_reg (counter_mode, count_exp);
9047 /* We don't use loops to align destination and to copy parts smaller
9048 than 4 bytes, because gcc is able to optimize such code better (in
9049 the case the destination or the count really is aligned, gcc is often
9050 able to predict the branches) and also it is friendlier to the
9051 hardware branch prediction.
9053 Using loops is benefical for generic case, because we can
9054 handle small counts using the loops. Many CPUs (such as Athlon)
9055 have large REP prefix setup costs.
9057 This is quite costy. Maybe we can revisit this decision later or
9058 add some customizability to this code. */
9060 if (count == 0
9061 && align < (TARGET_PENTIUMPRO && (count == 0
9062 || count >= (unsigned int)260)
9063 ? 8 : UNITS_PER_WORD))
9065 label = gen_label_rtx ();
9066 emit_cmp_and_jump_insns (countreg, GEN_INT (UNITS_PER_WORD - 1),
9067 LEU, 0, counter_mode, 1, 0, label);
9069 if (align <= 1)
9071 rtx label = ix86_expand_aligntest (destreg, 1);
9072 emit_insn (gen_strmovqi (destreg, srcreg));
9073 ix86_adjust_counter (countreg, 1);
9074 emit_label (label);
9075 LABEL_NUSES (label) = 1;
9077 if (align <= 2)
9079 rtx label = ix86_expand_aligntest (destreg, 2);
9080 emit_insn (gen_strmovhi (destreg, srcreg));
9081 ix86_adjust_counter (countreg, 2);
9082 emit_label (label);
9083 LABEL_NUSES (label) = 1;
9085 if (align <= 4
9086 && ((TARGET_PENTIUMPRO && (count == 0
9087 || count >= (unsigned int)260))
9088 || TARGET_64BIT))
9090 rtx label = ix86_expand_aligntest (destreg, 4);
9091 emit_insn (gen_strmovsi (destreg, srcreg));
9092 ix86_adjust_counter (countreg, 4);
9093 emit_label (label);
9094 LABEL_NUSES (label) = 1;
9097 if (!TARGET_SINGLE_STRINGOP)
9098 emit_insn (gen_cld ());
9099 if (TARGET_64BIT)
9101 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
9102 GEN_INT (3)));
9103 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg2,
9104 destreg, srcreg, countreg2));
9106 else
9108 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
9109 emit_insn (gen_rep_movsi (destreg, srcreg, countreg2,
9110 destreg, srcreg, countreg2));
9113 if (label)
9115 emit_label (label);
9116 LABEL_NUSES (label) = 1;
9118 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
9119 emit_insn (gen_strmovsi (destreg, srcreg));
9120 if ((align <= 4 || count == 0) && TARGET_64BIT)
9122 rtx label = ix86_expand_aligntest (countreg, 4);
9123 emit_insn (gen_strmovsi (destreg, srcreg));
9124 emit_label (label);
9125 LABEL_NUSES (label) = 1;
9127 if (align > 2 && count != 0 && (count & 2))
9128 emit_insn (gen_strmovhi (destreg, srcreg));
9129 if (align <= 2 || count == 0)
9131 rtx label = ix86_expand_aligntest (countreg, 2);
9132 emit_insn (gen_strmovhi (destreg, srcreg));
9133 emit_label (label);
9134 LABEL_NUSES (label) = 1;
9136 if (align > 1 && count != 0 && (count & 1))
9137 emit_insn (gen_strmovqi (destreg, srcreg));
9138 if (align <= 1 || count == 0)
9140 rtx label = ix86_expand_aligntest (countreg, 1);
9141 emit_insn (gen_strmovqi (destreg, srcreg));
9142 emit_label (label);
9143 LABEL_NUSES (label) = 1;
9147 insns = get_insns ();
9148 end_sequence ();
9150 ix86_set_move_mem_attrs (insns, dst, src, destreg, srcreg);
9151 emit_insns (insns);
9152 return 1;
9155 /* Expand string clear operation (bzero). Use i386 string operations when
9156 profitable. expand_movstr contains similar code. */
9158 ix86_expand_clrstr (src, count_exp, align_exp)
9159 rtx src, count_exp, align_exp;
9161 rtx destreg, zeroreg, countreg;
9162 enum machine_mode counter_mode;
9163 HOST_WIDE_INT align = 0;
9164 unsigned HOST_WIDE_INT count = 0;
9166 if (GET_CODE (align_exp) == CONST_INT)
9167 align = INTVAL (align_exp);
9169 /* This simple hack avoids all inlining code and simplifies code bellow. */
9170 if (!TARGET_ALIGN_STRINGOPS)
9171 align = 32;
9173 if (GET_CODE (count_exp) == CONST_INT)
9174 count = INTVAL (count_exp);
9175 /* Figure out proper mode for counter. For 32bits it is always SImode,
9176 for 64bits use SImode when possible, otherwise DImode.
9177 Set count to number of bytes copied when known at compile time. */
9178 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
9179 || x86_64_zero_extended_value (count_exp))
9180 counter_mode = SImode;
9181 else
9182 counter_mode = DImode;
9184 destreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
9186 emit_insn (gen_cld ());
9188 /* When optimizing for size emit simple rep ; movsb instruction for
9189 counts not divisible by 4. */
9191 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
9193 countreg = ix86_zero_extend_to_Pmode (count_exp);
9194 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
9195 if (TARGET_64BIT)
9196 emit_insn (gen_rep_stosqi_rex64 (destreg, countreg, zeroreg,
9197 destreg, countreg));
9198 else
9199 emit_insn (gen_rep_stosqi (destreg, countreg, zeroreg,
9200 destreg, countreg));
9202 else if (count != 0
9203 && (align >= 8
9204 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
9205 || optimize_size || count < (unsigned int)64))
9207 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
9208 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
9209 if (count & ~(size - 1))
9211 countreg = copy_to_mode_reg (counter_mode,
9212 GEN_INT ((count >> (size == 4 ? 2 : 3))
9213 & (TARGET_64BIT ? -1 : 0x3fffffff)));
9214 countreg = ix86_zero_extend_to_Pmode (countreg);
9215 if (size == 4)
9217 if (TARGET_64BIT)
9218 emit_insn (gen_rep_stossi_rex64 (destreg, countreg, zeroreg,
9219 destreg, countreg));
9220 else
9221 emit_insn (gen_rep_stossi (destreg, countreg, zeroreg,
9222 destreg, countreg));
9224 else
9225 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg, zeroreg,
9226 destreg, countreg));
9228 if (size == 8 && (count & 0x04))
9229 emit_insn (gen_strsetsi (destreg,
9230 gen_rtx_SUBREG (SImode, zeroreg, 0)));
9231 if (count & 0x02)
9232 emit_insn (gen_strsethi (destreg,
9233 gen_rtx_SUBREG (HImode, zeroreg, 0)));
9234 if (count & 0x01)
9235 emit_insn (gen_strsetqi (destreg,
9236 gen_rtx_SUBREG (QImode, zeroreg, 0)));
9238 else
9240 rtx countreg2;
9241 rtx label = NULL;
9243 /* In case we don't know anything about the alignment, default to
9244 library version, since it is usually equally fast and result in
9245 shorter code. */
9246 if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD)
9247 return 0;
9249 if (TARGET_SINGLE_STRINGOP)
9250 emit_insn (gen_cld ());
9252 countreg2 = gen_reg_rtx (Pmode);
9253 countreg = copy_to_mode_reg (counter_mode, count_exp);
9254 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
9256 if (count == 0
9257 && align < (TARGET_PENTIUMPRO && (count == 0
9258 || count >= (unsigned int)260)
9259 ? 8 : UNITS_PER_WORD))
9261 label = gen_label_rtx ();
9262 emit_cmp_and_jump_insns (countreg, GEN_INT (UNITS_PER_WORD - 1),
9263 LEU, 0, counter_mode, 1, 0, label);
9265 if (align <= 1)
9267 rtx label = ix86_expand_aligntest (destreg, 1);
9268 emit_insn (gen_strsetqi (destreg,
9269 gen_rtx_SUBREG (QImode, zeroreg, 0)));
9270 ix86_adjust_counter (countreg, 1);
9271 emit_label (label);
9272 LABEL_NUSES (label) = 1;
9274 if (align <= 2)
9276 rtx label = ix86_expand_aligntest (destreg, 2);
9277 emit_insn (gen_strsethi (destreg,
9278 gen_rtx_SUBREG (HImode, zeroreg, 0)));
9279 ix86_adjust_counter (countreg, 2);
9280 emit_label (label);
9281 LABEL_NUSES (label) = 1;
9283 if (align <= 4 && TARGET_PENTIUMPRO && (count == 0
9284 || count >= (unsigned int)260))
9286 rtx label = ix86_expand_aligntest (destreg, 4);
9287 emit_insn (gen_strsetsi (destreg, (TARGET_64BIT
9288 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
9289 : zeroreg)));
9290 ix86_adjust_counter (countreg, 4);
9291 emit_label (label);
9292 LABEL_NUSES (label) = 1;
9295 if (!TARGET_SINGLE_STRINGOP)
9296 emit_insn (gen_cld ());
9297 if (TARGET_64BIT)
9299 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
9300 GEN_INT (3)));
9301 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg2, zeroreg,
9302 destreg, countreg2));
9304 else
9306 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
9307 emit_insn (gen_rep_stossi (destreg, countreg2, zeroreg,
9308 destreg, countreg2));
9311 if (label)
9313 emit_label (label);
9314 LABEL_NUSES (label) = 1;
9316 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
9317 emit_insn (gen_strsetsi (destreg,
9318 gen_rtx_SUBREG (SImode, zeroreg, 0)));
9319 if (TARGET_64BIT && (align <= 4 || count == 0))
9321 rtx label = ix86_expand_aligntest (destreg, 2);
9322 emit_insn (gen_strsetsi (destreg,
9323 gen_rtx_SUBREG (SImode, zeroreg, 0)));
9324 emit_label (label);
9325 LABEL_NUSES (label) = 1;
9327 if (align > 2 && count != 0 && (count & 2))
9328 emit_insn (gen_strsethi (destreg,
9329 gen_rtx_SUBREG (HImode, zeroreg, 0)));
9330 if (align <= 2 || count == 0)
9332 rtx label = ix86_expand_aligntest (destreg, 2);
9333 emit_insn (gen_strsethi (destreg,
9334 gen_rtx_SUBREG (HImode, zeroreg, 0)));
9335 emit_label (label);
9336 LABEL_NUSES (label) = 1;
9338 if (align > 1 && count != 0 && (count & 1))
9339 emit_insn (gen_strsetqi (destreg,
9340 gen_rtx_SUBREG (QImode, zeroreg, 0)));
9341 if (align <= 1 || count == 0)
9343 rtx label = ix86_expand_aligntest (destreg, 1);
9344 emit_insn (gen_strsetqi (destreg,
9345 gen_rtx_SUBREG (QImode, zeroreg, 0)));
9346 emit_label (label);
9347 LABEL_NUSES (label) = 1;
9350 return 1;
9352 /* Expand strlen. */
9354 ix86_expand_strlen (out, src, eoschar, align)
9355 rtx out, src, eoschar, align;
9357 rtx addr, scratch1, scratch2, scratch3, scratch4;
9359 /* The generic case of strlen expander is long. Avoid it's
9360 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
9362 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
9363 && !TARGET_INLINE_ALL_STRINGOPS
9364 && !optimize_size
9365 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
9366 return 0;
9368 addr = force_reg (Pmode, XEXP (src, 0));
9369 scratch1 = gen_reg_rtx (Pmode);
9371 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
9372 && !optimize_size)
9374 /* Well it seems that some optimizer does not combine a call like
9375 foo(strlen(bar), strlen(bar));
9376 when the move and the subtraction is done here. It does calculate
9377 the length just once when these instructions are done inside of
9378 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
9379 often used and I use one fewer register for the lifetime of
9380 output_strlen_unroll() this is better. */
9382 emit_move_insn (out, addr);
9384 ix86_expand_strlensi_unroll_1 (out, align);
9386 /* strlensi_unroll_1 returns the address of the zero at the end of
9387 the string, like memchr(), so compute the length by subtracting
9388 the start address. */
9389 if (TARGET_64BIT)
9390 emit_insn (gen_subdi3 (out, out, addr));
9391 else
9392 emit_insn (gen_subsi3 (out, out, addr));
9394 else
9396 scratch2 = gen_reg_rtx (Pmode);
9397 scratch3 = gen_reg_rtx (Pmode);
9398 scratch4 = force_reg (Pmode, constm1_rtx);
9400 emit_move_insn (scratch3, addr);
9401 eoschar = force_reg (QImode, eoschar);
9403 emit_insn (gen_cld ());
9404 if (TARGET_64BIT)
9406 emit_insn (gen_strlenqi_rex_1 (scratch1, scratch3, eoschar,
9407 align, scratch4, scratch3));
9408 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
9409 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
9411 else
9413 emit_insn (gen_strlenqi_1 (scratch1, scratch3, eoschar,
9414 align, scratch4, scratch3));
9415 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
9416 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
9419 return 1;
9422 /* Expand the appropriate insns for doing strlen if not just doing
9423 repnz; scasb
9425 out = result, initialized with the start address
9426 align_rtx = alignment of the address.
9427 scratch = scratch register, initialized with the startaddress when
9428 not aligned, otherwise undefined
9430 This is just the body. It needs the initialisations mentioned above and
9431 some address computing at the end. These things are done in i386.md. */
9433 static void
9434 ix86_expand_strlensi_unroll_1 (out, align_rtx)
9435 rtx out, align_rtx;
9437 int align;
9438 rtx tmp;
9439 rtx align_2_label = NULL_RTX;
9440 rtx align_3_label = NULL_RTX;
9441 rtx align_4_label = gen_label_rtx ();
9442 rtx end_0_label = gen_label_rtx ();
9443 rtx mem;
9444 rtx tmpreg = gen_reg_rtx (SImode);
9445 rtx scratch = gen_reg_rtx (SImode);
9447 align = 0;
9448 if (GET_CODE (align_rtx) == CONST_INT)
9449 align = INTVAL (align_rtx);
9451 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
9453 /* Is there a known alignment and is it less than 4? */
9454 if (align < 4)
9456 rtx scratch1 = gen_reg_rtx (Pmode);
9457 emit_move_insn (scratch1, out);
9458 /* Is there a known alignment and is it not 2? */
9459 if (align != 2)
9461 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
9462 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
9464 /* Leave just the 3 lower bits. */
9465 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
9466 NULL_RTX, 0, OPTAB_WIDEN);
9468 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
9469 Pmode, 1, 0, align_4_label);
9470 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL,
9471 Pmode, 1, 0, align_2_label);
9472 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL,
9473 Pmode, 1, 0, align_3_label);
9475 else
9477 /* Since the alignment is 2, we have to check 2 or 0 bytes;
9478 check if is aligned to 4 - byte. */
9480 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (2),
9481 NULL_RTX, 0, OPTAB_WIDEN);
9483 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
9484 Pmode, 1, 0, align_4_label);
9487 mem = gen_rtx_MEM (QImode, out);
9489 /* Now compare the bytes. */
9491 /* Compare the first n unaligned byte on a byte per byte basis. */
9492 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
9493 QImode, 1, 0, end_0_label);
9495 /* Increment the address. */
9496 if (TARGET_64BIT)
9497 emit_insn (gen_adddi3 (out, out, const1_rtx));
9498 else
9499 emit_insn (gen_addsi3 (out, out, const1_rtx));
9501 /* Not needed with an alignment of 2 */
9502 if (align != 2)
9504 emit_label (align_2_label);
9506 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
9507 QImode, 1, 0, end_0_label);
9509 if (TARGET_64BIT)
9510 emit_insn (gen_adddi3 (out, out, const1_rtx));
9511 else
9512 emit_insn (gen_addsi3 (out, out, const1_rtx));
9514 emit_label (align_3_label);
9517 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
9518 QImode, 1, 0, end_0_label);
9520 if (TARGET_64BIT)
9521 emit_insn (gen_adddi3 (out, out, const1_rtx));
9522 else
9523 emit_insn (gen_addsi3 (out, out, const1_rtx));
9526 /* Generate loop to check 4 bytes at a time. It is not a good idea to
9527 align this loop. It gives only huge programs, but does not help to
9528 speed up. */
9529 emit_label (align_4_label);
9531 mem = gen_rtx_MEM (SImode, out);
9532 emit_move_insn (scratch, mem);
9533 if (TARGET_64BIT)
9534 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
9535 else
9536 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
9538 /* This formula yields a nonzero result iff one of the bytes is zero.
9539 This saves three branches inside loop and many cycles. */
9541 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
9542 emit_insn (gen_one_cmplsi2 (scratch, scratch));
9543 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
9544 emit_insn (gen_andsi3 (tmpreg, tmpreg,
9545 GEN_INT (trunc_int_for_mode
9546 (0x80808080, SImode))));
9547 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0,
9548 SImode, 1, 0, align_4_label);
9550 if (TARGET_CMOVE)
9552 rtx reg = gen_reg_rtx (SImode);
9553 rtx reg2 = gen_reg_rtx (Pmode);
9554 emit_move_insn (reg, tmpreg);
9555 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
9557 /* If zero is not in the first two bytes, move two bytes forward. */
9558 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
9559 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
9560 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
9561 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
9562 gen_rtx_IF_THEN_ELSE (SImode, tmp,
9563 reg,
9564 tmpreg)));
9565 /* Emit lea manually to avoid clobbering of flags. */
9566 emit_insn (gen_rtx_SET (SImode, reg2,
9567 gen_rtx_PLUS (Pmode, out, GEN_INT (2))));
9569 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
9570 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
9571 emit_insn (gen_rtx_SET (VOIDmode, out,
9572 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
9573 reg2,
9574 out)));
9577 else
9579 rtx end_2_label = gen_label_rtx ();
9580 /* Is zero in the first two bytes? */
9582 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
9583 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
9584 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
9585 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9586 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
9587 pc_rtx);
9588 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
9589 JUMP_LABEL (tmp) = end_2_label;
9591 /* Not in the first two. Move two bytes forward. */
9592 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
9593 if (TARGET_64BIT)
9594 emit_insn (gen_adddi3 (out, out, GEN_INT (2)));
9595 else
9596 emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
9598 emit_label (end_2_label);
9602 /* Avoid branch in fixing the byte. */
9603 tmpreg = gen_lowpart (QImode, tmpreg);
9604 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
9605 if (TARGET_64BIT)
9606 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3)));
9607 else
9608 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3)));
9610 emit_label (end_0_label);
9613 /* Clear stack slot assignments remembered from previous functions.
9614 This is called from INIT_EXPANDERS once before RTL is emitted for each
9615 function. */
9617 static void
9618 ix86_init_machine_status (p)
9619 struct function *p;
9621 p->machine = (struct machine_function *)
9622 xcalloc (1, sizeof (struct machine_function));
9625 /* Mark machine specific bits of P for GC. */
9626 static void
9627 ix86_mark_machine_status (p)
9628 struct function *p;
9630 struct machine_function *machine = p->machine;
9631 enum machine_mode mode;
9632 int n;
9634 if (! machine)
9635 return;
9637 for (mode = VOIDmode; (int) mode < (int) MAX_MACHINE_MODE;
9638 mode = (enum machine_mode) ((int) mode + 1))
9639 for (n = 0; n < MAX_386_STACK_LOCALS; n++)
9640 ggc_mark_rtx (machine->stack_locals[(int) mode][n]);
9643 static void
9644 ix86_free_machine_status (p)
9645 struct function *p;
9647 free (p->machine);
9648 p->machine = NULL;
9651 /* Return a MEM corresponding to a stack slot with mode MODE.
9652 Allocate a new slot if necessary.
9654 The RTL for a function can have several slots available: N is
9655 which slot to use. */
9658 assign_386_stack_local (mode, n)
9659 enum machine_mode mode;
9660 int n;
9662 if (n < 0 || n >= MAX_386_STACK_LOCALS)
9663 abort ();
9665 if (ix86_stack_locals[(int) mode][n] == NULL_RTX)
9666 ix86_stack_locals[(int) mode][n]
9667 = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
9669 return ix86_stack_locals[(int) mode][n];
9672 /* Calculate the length of the memory address in the instruction
9673 encoding. Does not include the one-byte modrm, opcode, or prefix. */
9675 static int
9676 memory_address_length (addr)
9677 rtx addr;
9679 struct ix86_address parts;
9680 rtx base, index, disp;
9681 int len;
9683 if (GET_CODE (addr) == PRE_DEC
9684 || GET_CODE (addr) == POST_INC
9685 || GET_CODE (addr) == PRE_MODIFY
9686 || GET_CODE (addr) == POST_MODIFY)
9687 return 0;
9689 if (! ix86_decompose_address (addr, &parts))
9690 abort ();
9692 base = parts.base;
9693 index = parts.index;
9694 disp = parts.disp;
9695 len = 0;
9697 /* Register Indirect. */
9698 if (base && !index && !disp)
9700 /* Special cases: ebp and esp need the two-byte modrm form. */
9701 if (addr == stack_pointer_rtx
9702 || addr == arg_pointer_rtx
9703 || addr == frame_pointer_rtx
9704 || addr == hard_frame_pointer_rtx)
9705 len = 1;
9708 /* Direct Addressing. */
9709 else if (disp && !base && !index)
9710 len = 4;
9712 else
9714 /* Find the length of the displacement constant. */
9715 if (disp)
9717 if (GET_CODE (disp) == CONST_INT
9718 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K'))
9719 len = 1;
9720 else
9721 len = 4;
9724 /* An index requires the two-byte modrm form. */
9725 if (index)
9726 len += 1;
9729 return len;
9732 /* Compute default value for "length_immediate" attribute. When SHORTFORM is set
9733 expect that insn have 8bit immediate alternative. */
9735 ix86_attr_length_immediate_default (insn, shortform)
9736 rtx insn;
9737 int shortform;
9739 int len = 0;
9740 int i;
9741 extract_insn_cached (insn);
9742 for (i = recog_data.n_operands - 1; i >= 0; --i)
9743 if (CONSTANT_P (recog_data.operand[i]))
9745 if (len)
9746 abort ();
9747 if (shortform
9748 && GET_CODE (recog_data.operand[i]) == CONST_INT
9749 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
9750 len = 1;
9751 else
9753 switch (get_attr_mode (insn))
9755 case MODE_QI:
9756 len+=1;
9757 break;
9758 case MODE_HI:
9759 len+=2;
9760 break;
9761 case MODE_SI:
9762 len+=4;
9763 break;
9764 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
9765 case MODE_DI:
9766 len+=4;
9767 break;
9768 default:
9769 fatal_insn ("Unknown insn mode", insn);
9773 return len;
9775 /* Compute default value for "length_address" attribute. */
9777 ix86_attr_length_address_default (insn)
9778 rtx insn;
9780 int i;
9781 extract_insn_cached (insn);
9782 for (i = recog_data.n_operands - 1; i >= 0; --i)
9783 if (GET_CODE (recog_data.operand[i]) == MEM)
9785 return memory_address_length (XEXP (recog_data.operand[i], 0));
9786 break;
9788 return 0;
9791 /* Return the maximum number of instructions a cpu can issue. */
9793 static int
9794 ix86_issue_rate ()
9796 switch (ix86_cpu)
9798 case PROCESSOR_PENTIUM:
9799 case PROCESSOR_K6:
9800 return 2;
9802 case PROCESSOR_PENTIUMPRO:
9803 case PROCESSOR_PENTIUM4:
9804 case PROCESSOR_ATHLON:
9805 return 3;
9807 default:
9808 return 1;
9812 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
9813 by DEP_INSN and nothing set by DEP_INSN. */
9815 static int
9816 ix86_flags_dependant (insn, dep_insn, insn_type)
9817 rtx insn, dep_insn;
9818 enum attr_type insn_type;
9820 rtx set, set2;
9822 /* Simplify the test for uninteresting insns. */
9823 if (insn_type != TYPE_SETCC
9824 && insn_type != TYPE_ICMOV
9825 && insn_type != TYPE_FCMOV
9826 && insn_type != TYPE_IBR)
9827 return 0;
9829 if ((set = single_set (dep_insn)) != 0)
9831 set = SET_DEST (set);
9832 set2 = NULL_RTX;
9834 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
9835 && XVECLEN (PATTERN (dep_insn), 0) == 2
9836 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
9837 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
9839 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
9840 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
9842 else
9843 return 0;
9845 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
9846 return 0;
9848 /* This test is true if the dependant insn reads the flags but
9849 not any other potentially set register. */
9850 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
9851 return 0;
9853 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
9854 return 0;
9856 return 1;
9859 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
9860 address with operands set by DEP_INSN. */
9862 static int
9863 ix86_agi_dependant (insn, dep_insn, insn_type)
9864 rtx insn, dep_insn;
9865 enum attr_type insn_type;
9867 rtx addr;
9869 if (insn_type == TYPE_LEA
9870 && TARGET_PENTIUM)
9872 addr = PATTERN (insn);
9873 if (GET_CODE (addr) == SET)
9875 else if (GET_CODE (addr) == PARALLEL
9876 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
9877 addr = XVECEXP (addr, 0, 0);
9878 else
9879 abort ();
9880 addr = SET_SRC (addr);
9882 else
9884 int i;
9885 extract_insn_cached (insn);
9886 for (i = recog_data.n_operands - 1; i >= 0; --i)
9887 if (GET_CODE (recog_data.operand[i]) == MEM)
9889 addr = XEXP (recog_data.operand[i], 0);
9890 goto found;
9892 return 0;
9893 found:;
9896 return modified_in_p (addr, dep_insn);
9899 static int
9900 ix86_adjust_cost (insn, link, dep_insn, cost)
9901 rtx insn, link, dep_insn;
9902 int cost;
9904 enum attr_type insn_type, dep_insn_type;
9905 enum attr_memory memory, dep_memory;
9906 rtx set, set2;
9907 int dep_insn_code_number;
9909 /* Anti and output depenancies have zero cost on all CPUs. */
9910 if (REG_NOTE_KIND (link) != 0)
9911 return 0;
9913 dep_insn_code_number = recog_memoized (dep_insn);
9915 /* If we can't recognize the insns, we can't really do anything. */
9916 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
9917 return cost;
9919 insn_type = get_attr_type (insn);
9920 dep_insn_type = get_attr_type (dep_insn);
9922 switch (ix86_cpu)
9924 case PROCESSOR_PENTIUM:
9925 /* Address Generation Interlock adds a cycle of latency. */
9926 if (ix86_agi_dependant (insn, dep_insn, insn_type))
9927 cost += 1;
9929 /* ??? Compares pair with jump/setcc. */
9930 if (ix86_flags_dependant (insn, dep_insn, insn_type))
9931 cost = 0;
9933 /* Floating point stores require value to be ready one cycle ealier. */
9934 if (insn_type == TYPE_FMOV
9935 && get_attr_memory (insn) == MEMORY_STORE
9936 && !ix86_agi_dependant (insn, dep_insn, insn_type))
9937 cost += 1;
9938 break;
9940 case PROCESSOR_PENTIUMPRO:
9941 memory = get_attr_memory (insn);
9942 dep_memory = get_attr_memory (dep_insn);
9944 /* Since we can't represent delayed latencies of load+operation,
9945 increase the cost here for non-imov insns. */
9946 if (dep_insn_type != TYPE_IMOV
9947 && dep_insn_type != TYPE_FMOV
9948 && (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH))
9949 cost += 1;
9951 /* INT->FP conversion is expensive. */
9952 if (get_attr_fp_int_src (dep_insn))
9953 cost += 5;
9955 /* There is one cycle extra latency between an FP op and a store. */
9956 if (insn_type == TYPE_FMOV
9957 && (set = single_set (dep_insn)) != NULL_RTX
9958 && (set2 = single_set (insn)) != NULL_RTX
9959 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
9960 && GET_CODE (SET_DEST (set2)) == MEM)
9961 cost += 1;
9963 /* Show ability of reorder buffer to hide latency of load by executing
9964 in parallel with previous instruction in case
9965 previous instruction is not needed to compute the address. */
9966 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
9967 && !ix86_agi_dependant (insn, dep_insn, insn_type))
9969 /* Claim moves to take one cycle, as core can issue one load
9970 at time and the next load can start cycle later. */
9971 if (dep_insn_type == TYPE_IMOV
9972 || dep_insn_type == TYPE_FMOV)
9973 cost = 1;
9974 else if (cost > 1)
9975 cost--;
9977 break;
9979 case PROCESSOR_K6:
9980 memory = get_attr_memory (insn);
9981 dep_memory = get_attr_memory (dep_insn);
9982 /* The esp dependency is resolved before the instruction is really
9983 finished. */
9984 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
9985 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
9986 return 1;
9988 /* Since we can't represent delayed latencies of load+operation,
9989 increase the cost here for non-imov insns. */
9990 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
9991 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
9993 /* INT->FP conversion is expensive. */
9994 if (get_attr_fp_int_src (dep_insn))
9995 cost += 5;
9997 /* Show ability of reorder buffer to hide latency of load by executing
9998 in parallel with previous instruction in case
9999 previous instruction is not needed to compute the address. */
10000 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
10001 && !ix86_agi_dependant (insn, dep_insn, insn_type))
10003 /* Claim moves to take one cycle, as core can issue one load
10004 at time and the next load can start cycle later. */
10005 if (dep_insn_type == TYPE_IMOV
10006 || dep_insn_type == TYPE_FMOV)
10007 cost = 1;
10008 else if (cost > 2)
10009 cost -= 2;
10010 else
10011 cost = 1;
10013 break;
10015 case PROCESSOR_ATHLON:
10016 memory = get_attr_memory (insn);
10017 dep_memory = get_attr_memory (dep_insn);
10019 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
10021 if (dep_insn_type == TYPE_IMOV || dep_insn_type == TYPE_FMOV)
10022 cost += 2;
10023 else
10024 cost += 3;
10026 /* Show ability of reorder buffer to hide latency of load by executing
10027 in parallel with previous instruction in case
10028 previous instruction is not needed to compute the address. */
10029 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
10030 && !ix86_agi_dependant (insn, dep_insn, insn_type))
10032 /* Claim moves to take one cycle, as core can issue one load
10033 at time and the next load can start cycle later. */
10034 if (dep_insn_type == TYPE_IMOV
10035 || dep_insn_type == TYPE_FMOV)
10036 cost = 0;
10037 else if (cost >= 3)
10038 cost -= 3;
10039 else
10040 cost = 0;
10043 default:
10044 break;
10047 return cost;
10050 static union
10052 struct ppro_sched_data
10054 rtx decode[3];
10055 int issued_this_cycle;
10056 } ppro;
10057 } ix86_sched_data;
10059 static int
10060 ix86_safe_length (insn)
10061 rtx insn;
10063 if (recog_memoized (insn) >= 0)
10064 return get_attr_length(insn);
10065 else
10066 return 128;
10069 static int
10070 ix86_safe_length_prefix (insn)
10071 rtx insn;
10073 if (recog_memoized (insn) >= 0)
10074 return get_attr_length(insn);
10075 else
10076 return 0;
10079 static enum attr_memory
10080 ix86_safe_memory (insn)
10081 rtx insn;
10083 if (recog_memoized (insn) >= 0)
10084 return get_attr_memory(insn);
10085 else
10086 return MEMORY_UNKNOWN;
10089 static enum attr_pent_pair
10090 ix86_safe_pent_pair (insn)
10091 rtx insn;
10093 if (recog_memoized (insn) >= 0)
10094 return get_attr_pent_pair(insn);
10095 else
10096 return PENT_PAIR_NP;
10099 static enum attr_ppro_uops
10100 ix86_safe_ppro_uops (insn)
10101 rtx insn;
10103 if (recog_memoized (insn) >= 0)
10104 return get_attr_ppro_uops (insn);
10105 else
10106 return PPRO_UOPS_MANY;
10109 static void
10110 ix86_dump_ppro_packet (dump)
10111 FILE *dump;
10113 if (ix86_sched_data.ppro.decode[0])
10115 fprintf (dump, "PPRO packet: %d",
10116 INSN_UID (ix86_sched_data.ppro.decode[0]));
10117 if (ix86_sched_data.ppro.decode[1])
10118 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
10119 if (ix86_sched_data.ppro.decode[2])
10120 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
10121 fputc ('\n', dump);
10125 /* We're beginning a new block. Initialize data structures as necessary. */
10127 static void
10128 ix86_sched_init (dump, sched_verbose, veclen)
10129 FILE *dump ATTRIBUTE_UNUSED;
10130 int sched_verbose ATTRIBUTE_UNUSED;
10131 int veclen ATTRIBUTE_UNUSED;
10133 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
10136 /* Shift INSN to SLOT, and shift everything else down. */
10138 static void
10139 ix86_reorder_insn (insnp, slot)
10140 rtx *insnp, *slot;
10142 if (insnp != slot)
10144 rtx insn = *insnp;
10146 insnp[0] = insnp[1];
10147 while (++insnp != slot);
10148 *insnp = insn;
10152 /* Find an instruction with given pairability and minimal amount of cycles
10153 lost by the fact that the CPU waits for both pipelines to finish before
10154 reading next instructions. Also take care that both instructions together
10155 can not exceed 7 bytes. */
10157 static rtx *
10158 ix86_pent_find_pair (e_ready, ready, type, first)
10159 rtx *e_ready;
10160 rtx *ready;
10161 enum attr_pent_pair type;
10162 rtx first;
10164 int mincycles, cycles;
10165 enum attr_pent_pair tmp;
10166 enum attr_memory memory;
10167 rtx *insnp, *bestinsnp = NULL;
10169 if (ix86_safe_length (first) > 7 + ix86_safe_length_prefix (first))
10170 return NULL;
10172 memory = ix86_safe_memory (first);
10173 cycles = result_ready_cost (first);
10174 mincycles = INT_MAX;
10176 for (insnp = e_ready; insnp >= ready && mincycles; --insnp)
10177 if ((tmp = ix86_safe_pent_pair (*insnp)) == type
10178 && ix86_safe_length (*insnp) <= 7 + ix86_safe_length_prefix (*insnp))
10180 enum attr_memory second_memory;
10181 int secondcycles, currentcycles;
10183 second_memory = ix86_safe_memory (*insnp);
10184 secondcycles = result_ready_cost (*insnp);
10185 currentcycles = abs (cycles - secondcycles);
10187 if (secondcycles >= 1 && cycles >= 1)
10189 /* Two read/modify/write instructions together takes two
10190 cycles longer. */
10191 if (memory == MEMORY_BOTH && second_memory == MEMORY_BOTH)
10192 currentcycles += 2;
10194 /* Read modify/write instruction followed by read/modify
10195 takes one cycle longer. */
10196 if (memory == MEMORY_BOTH && second_memory == MEMORY_LOAD
10197 && tmp != PENT_PAIR_UV
10198 && ix86_safe_pent_pair (first) != PENT_PAIR_UV)
10199 currentcycles += 1;
10201 if (currentcycles < mincycles)
10202 bestinsnp = insnp, mincycles = currentcycles;
10205 return bestinsnp;
10208 /* Subroutines of ix86_sched_reorder. */
10210 static void
10211 ix86_sched_reorder_pentium (ready, e_ready)
10212 rtx *ready;
10213 rtx *e_ready;
10215 enum attr_pent_pair pair1, pair2;
10216 rtx *insnp;
10218 /* This wouldn't be necessary if Haifa knew that static insn ordering
10219 is important to which pipe an insn is issued to. So we have to make
10220 some minor rearrangements. */
10222 pair1 = ix86_safe_pent_pair (*e_ready);
10224 /* If the first insn is non-pairable, let it be. */
10225 if (pair1 == PENT_PAIR_NP)
10226 return;
10228 pair2 = PENT_PAIR_NP;
10229 insnp = 0;
10231 /* If the first insn is UV or PV pairable, search for a PU
10232 insn to go with. */
10233 if (pair1 == PENT_PAIR_UV || pair1 == PENT_PAIR_PV)
10235 insnp = ix86_pent_find_pair (e_ready-1, ready,
10236 PENT_PAIR_PU, *e_ready);
10237 if (insnp)
10238 pair2 = PENT_PAIR_PU;
10241 /* If the first insn is PU or UV pairable, search for a PV
10242 insn to go with. */
10243 if (pair2 == PENT_PAIR_NP
10244 && (pair1 == PENT_PAIR_PU || pair1 == PENT_PAIR_UV))
10246 insnp = ix86_pent_find_pair (e_ready-1, ready,
10247 PENT_PAIR_PV, *e_ready);
10248 if (insnp)
10249 pair2 = PENT_PAIR_PV;
10252 /* If the first insn is pairable, search for a UV
10253 insn to go with. */
10254 if (pair2 == PENT_PAIR_NP)
10256 insnp = ix86_pent_find_pair (e_ready-1, ready,
10257 PENT_PAIR_UV, *e_ready);
10258 if (insnp)
10259 pair2 = PENT_PAIR_UV;
10262 if (pair2 == PENT_PAIR_NP)
10263 return;
10265 /* Found something! Decide if we need to swap the order. */
10266 if (pair1 == PENT_PAIR_PV || pair2 == PENT_PAIR_PU
10267 || (pair1 == PENT_PAIR_UV && pair2 == PENT_PAIR_UV
10268 && ix86_safe_memory (*e_ready) == MEMORY_BOTH
10269 && ix86_safe_memory (*insnp) == MEMORY_LOAD))
10270 ix86_reorder_insn (insnp, e_ready);
10271 else
10272 ix86_reorder_insn (insnp, e_ready - 1);
10275 static void
10276 ix86_sched_reorder_ppro (ready, e_ready)
10277 rtx *ready;
10278 rtx *e_ready;
10280 rtx decode[3];
10281 enum attr_ppro_uops cur_uops;
10282 int issued_this_cycle;
10283 rtx *insnp;
10284 int i;
10286 /* At this point .ppro.decode contains the state of the three
10287 decoders from last "cycle". That is, those insns that were
10288 actually independent. But here we're scheduling for the
10289 decoder, and we may find things that are decodable in the
10290 same cycle. */
10292 memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode));
10293 issued_this_cycle = 0;
10295 insnp = e_ready;
10296 cur_uops = ix86_safe_ppro_uops (*insnp);
10298 /* If the decoders are empty, and we've a complex insn at the
10299 head of the priority queue, let it issue without complaint. */
10300 if (decode[0] == NULL)
10302 if (cur_uops == PPRO_UOPS_MANY)
10304 decode[0] = *insnp;
10305 goto ppro_done;
10308 /* Otherwise, search for a 2-4 uop unsn to issue. */
10309 while (cur_uops != PPRO_UOPS_FEW)
10311 if (insnp == ready)
10312 break;
10313 cur_uops = ix86_safe_ppro_uops (*--insnp);
10316 /* If so, move it to the head of the line. */
10317 if (cur_uops == PPRO_UOPS_FEW)
10318 ix86_reorder_insn (insnp, e_ready);
10320 /* Issue the head of the queue. */
10321 issued_this_cycle = 1;
10322 decode[0] = *e_ready--;
10325 /* Look for simple insns to fill in the other two slots. */
10326 for (i = 1; i < 3; ++i)
10327 if (decode[i] == NULL)
10329 if (ready >= e_ready)
10330 goto ppro_done;
10332 insnp = e_ready;
10333 cur_uops = ix86_safe_ppro_uops (*insnp);
10334 while (cur_uops != PPRO_UOPS_ONE)
10336 if (insnp == ready)
10337 break;
10338 cur_uops = ix86_safe_ppro_uops (*--insnp);
10341 /* Found one. Move it to the head of the queue and issue it. */
10342 if (cur_uops == PPRO_UOPS_ONE)
10344 ix86_reorder_insn (insnp, e_ready);
10345 decode[i] = *e_ready--;
10346 issued_this_cycle++;
10347 continue;
10350 /* ??? Didn't find one. Ideally, here we would do a lazy split
10351 of 2-uop insns, issue one and queue the other. */
10354 ppro_done:
10355 if (issued_this_cycle == 0)
10356 issued_this_cycle = 1;
10357 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
10360 /* We are about to being issuing insns for this clock cycle.
10361 Override the default sort algorithm to better slot instructions. */
10362 static int
10363 ix86_sched_reorder (dump, sched_verbose, ready, n_readyp, clock_var)
10364 FILE *dump ATTRIBUTE_UNUSED;
10365 int sched_verbose ATTRIBUTE_UNUSED;
10366 rtx *ready;
10367 int *n_readyp;
10368 int clock_var ATTRIBUTE_UNUSED;
10370 int n_ready = *n_readyp;
10371 rtx *e_ready = ready + n_ready - 1;
10373 if (n_ready < 2)
10374 goto out;
10376 switch (ix86_cpu)
10378 default:
10379 break;
10381 case PROCESSOR_PENTIUM:
10382 ix86_sched_reorder_pentium (ready, e_ready);
10383 break;
10385 case PROCESSOR_PENTIUMPRO:
10386 ix86_sched_reorder_ppro (ready, e_ready);
10387 break;
10390 out:
10391 return ix86_issue_rate ();
10394 /* We are about to issue INSN. Return the number of insns left on the
10395 ready queue that can be issued this cycle. */
10397 static int
10398 ix86_variable_issue (dump, sched_verbose, insn, can_issue_more)
10399 FILE *dump;
10400 int sched_verbose;
10401 rtx insn;
10402 int can_issue_more;
10404 int i;
10405 switch (ix86_cpu)
10407 default:
10408 return can_issue_more - 1;
10410 case PROCESSOR_PENTIUMPRO:
10412 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
10414 if (uops == PPRO_UOPS_MANY)
10416 if (sched_verbose)
10417 ix86_dump_ppro_packet (dump);
10418 ix86_sched_data.ppro.decode[0] = insn;
10419 ix86_sched_data.ppro.decode[1] = NULL;
10420 ix86_sched_data.ppro.decode[2] = NULL;
10421 if (sched_verbose)
10422 ix86_dump_ppro_packet (dump);
10423 ix86_sched_data.ppro.decode[0] = NULL;
10425 else if (uops == PPRO_UOPS_FEW)
10427 if (sched_verbose)
10428 ix86_dump_ppro_packet (dump);
10429 ix86_sched_data.ppro.decode[0] = insn;
10430 ix86_sched_data.ppro.decode[1] = NULL;
10431 ix86_sched_data.ppro.decode[2] = NULL;
10433 else
10435 for (i = 0; i < 3; ++i)
10436 if (ix86_sched_data.ppro.decode[i] == NULL)
10438 ix86_sched_data.ppro.decode[i] = insn;
10439 break;
10441 if (i == 3)
10442 abort ();
10443 if (i == 2)
10445 if (sched_verbose)
10446 ix86_dump_ppro_packet (dump);
10447 ix86_sched_data.ppro.decode[0] = NULL;
10448 ix86_sched_data.ppro.decode[1] = NULL;
10449 ix86_sched_data.ppro.decode[2] = NULL;
10453 return --ix86_sched_data.ppro.issued_this_cycle;
10457 /* Walk through INSNS and look for MEM references whose address is DSTREG or
10458 SRCREG and set the memory attribute to those of DSTREF and SRCREF, as
10459 appropriate. */
10461 void
10462 ix86_set_move_mem_attrs (insns, dstref, srcref, dstreg, srcreg)
10463 rtx insns;
10464 rtx dstref, srcref, dstreg, srcreg;
10466 rtx insn;
10468 for (insn = insns; insn != 0 ; insn = NEXT_INSN (insn))
10469 if (INSN_P (insn))
10470 ix86_set_move_mem_attrs_1 (PATTERN (insn), dstref, srcref,
10471 dstreg, srcreg);
10474 /* Subroutine of above to actually do the updating by recursively walking
10475 the rtx. */
10477 static void
10478 ix86_set_move_mem_attrs_1 (x, dstref, srcref, dstreg, srcreg)
10479 rtx x;
10480 rtx dstref, srcref, dstreg, srcreg;
10482 enum rtx_code code = GET_CODE (x);
10483 const char *format_ptr = GET_RTX_FORMAT (code);
10484 int i, j;
10486 if (code == MEM && XEXP (x, 0) == dstreg)
10487 MEM_COPY_ATTRIBUTES (x, dstref);
10488 else if (code == MEM && XEXP (x, 0) == srcreg)
10489 MEM_COPY_ATTRIBUTES (x, srcref);
10491 for (i = 0; i < GET_RTX_LENGTH (code); i++, format_ptr++)
10493 if (*format_ptr == 'e')
10494 ix86_set_move_mem_attrs_1 (XEXP (x, i), dstref, srcref,
10495 dstreg, srcreg);
10496 else if (*format_ptr == 'E')
10497 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
10498 ix86_set_move_mem_attrs_1 (XVECEXP (x, i, j), dstref, srcref,
10499 dstreg, srcreg);
10503 /* Compute the alignment given to a constant that is being placed in memory.
10504 EXP is the constant and ALIGN is the alignment that the object would
10505 ordinarily have.
10506 The value of this function is used instead of that alignment to align
10507 the object. */
10510 ix86_constant_alignment (exp, align)
10511 tree exp;
10512 int align;
10514 if (TREE_CODE (exp) == REAL_CST)
10516 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
10517 return 64;
10518 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
10519 return 128;
10521 else if (TREE_CODE (exp) == STRING_CST && TREE_STRING_LENGTH (exp) >= 31
10522 && align < 256)
10523 return 256;
10525 return align;
10528 /* Compute the alignment for a static variable.
10529 TYPE is the data type, and ALIGN is the alignment that
10530 the object would ordinarily have. The value of this function is used
10531 instead of that alignment to align the object. */
10534 ix86_data_alignment (type, align)
10535 tree type;
10536 int align;
10538 if (AGGREGATE_TYPE_P (type)
10539 && TYPE_SIZE (type)
10540 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
10541 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
10542 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
10543 return 256;
10545 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
10546 to 16byte boundary. */
10547 if (TARGET_64BIT)
10549 if (AGGREGATE_TYPE_P (type)
10550 && TYPE_SIZE (type)
10551 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
10552 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
10553 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
10554 return 128;
10557 if (TREE_CODE (type) == ARRAY_TYPE)
10559 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
10560 return 64;
10561 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
10562 return 128;
10564 else if (TREE_CODE (type) == COMPLEX_TYPE)
10567 if (TYPE_MODE (type) == DCmode && align < 64)
10568 return 64;
10569 if (TYPE_MODE (type) == XCmode && align < 128)
10570 return 128;
10572 else if ((TREE_CODE (type) == RECORD_TYPE
10573 || TREE_CODE (type) == UNION_TYPE
10574 || TREE_CODE (type) == QUAL_UNION_TYPE)
10575 && TYPE_FIELDS (type))
10577 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
10578 return 64;
10579 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
10580 return 128;
10582 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
10583 || TREE_CODE (type) == INTEGER_TYPE)
10585 if (TYPE_MODE (type) == DFmode && align < 64)
10586 return 64;
10587 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
10588 return 128;
10591 return align;
10594 /* Compute the alignment for a local variable.
10595 TYPE is the data type, and ALIGN is the alignment that
10596 the object would ordinarily have. The value of this macro is used
10597 instead of that alignment to align the object. */
10600 ix86_local_alignment (type, align)
10601 tree type;
10602 int align;
10604 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
10605 to 16byte boundary. */
10606 if (TARGET_64BIT)
10608 if (AGGREGATE_TYPE_P (type)
10609 && TYPE_SIZE (type)
10610 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
10611 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
10612 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
10613 return 128;
10615 if (TREE_CODE (type) == ARRAY_TYPE)
10617 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
10618 return 64;
10619 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
10620 return 128;
10622 else if (TREE_CODE (type) == COMPLEX_TYPE)
10624 if (TYPE_MODE (type) == DCmode && align < 64)
10625 return 64;
10626 if (TYPE_MODE (type) == XCmode && align < 128)
10627 return 128;
10629 else if ((TREE_CODE (type) == RECORD_TYPE
10630 || TREE_CODE (type) == UNION_TYPE
10631 || TREE_CODE (type) == QUAL_UNION_TYPE)
10632 && TYPE_FIELDS (type))
10634 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
10635 return 64;
10636 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
10637 return 128;
10639 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
10640 || TREE_CODE (type) == INTEGER_TYPE)
10643 if (TYPE_MODE (type) == DFmode && align < 64)
10644 return 64;
10645 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
10646 return 128;
10648 return align;
10651 /* Emit RTL insns to initialize the variable parts of a trampoline.
10652 FNADDR is an RTX for the address of the function's pure code.
10653 CXT is an RTX for the static chain value for the function. */
10654 void
10655 x86_initialize_trampoline (tramp, fnaddr, cxt)
10656 rtx tramp, fnaddr, cxt;
10658 if (!TARGET_64BIT)
10660 /* Compute offset from the end of the jmp to the target function. */
10661 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
10662 plus_constant (tramp, 10),
10663 NULL_RTX, 1, OPTAB_DIRECT);
10664 emit_move_insn (gen_rtx_MEM (QImode, tramp),
10665 GEN_INT (trunc_int_for_mode (0xb9, QImode)));
10666 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
10667 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
10668 GEN_INT (trunc_int_for_mode (0xe9, QImode)));
10669 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
10671 else
10673 int offset = 0;
10674 /* Try to load address using shorter movl instead of movabs.
10675 We may want to support movq for kernel mode, but kernel does not use
10676 trampolines at the moment. */
10677 if (x86_64_zero_extended_value (fnaddr))
10679 fnaddr = copy_to_mode_reg (DImode, fnaddr);
10680 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
10681 GEN_INT (trunc_int_for_mode (0xbb41, HImode)));
10682 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
10683 gen_lowpart (SImode, fnaddr));
10684 offset += 6;
10686 else
10688 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
10689 GEN_INT (trunc_int_for_mode (0xbb49, HImode)));
10690 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
10691 fnaddr);
10692 offset += 10;
10694 /* Load static chain using movabs to r10. */
10695 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
10696 GEN_INT (trunc_int_for_mode (0xba49, HImode)));
10697 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
10698 cxt);
10699 offset += 10;
10700 /* Jump to the r11 */
10701 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
10702 GEN_INT (trunc_int_for_mode (0xff49, HImode)));
10703 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
10704 GEN_INT (trunc_int_for_mode (0xe3, HImode)));
10705 offset += 3;
10706 if (offset > TRAMPOLINE_SIZE)
10707 abort();
10711 #define def_builtin(MASK, NAME, TYPE, CODE) \
10712 do { \
10713 if ((MASK) & target_flags) \
10714 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, NULL); \
10715 } while (0)
10717 struct builtin_description
10719 const unsigned int mask;
10720 const enum insn_code icode;
10721 const char *const name;
10722 const enum ix86_builtins code;
10723 const enum rtx_code comparison;
10724 const unsigned int flag;
10727 static const struct builtin_description bdesc_comi[] =
10729 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, EQ, 0 },
10730 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, LT, 0 },
10731 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, LE, 0 },
10732 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, LT, 1 },
10733 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, LE, 1 },
10734 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, NE, 0 },
10735 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, EQ, 0 },
10736 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, LT, 0 },
10737 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, LE, 0 },
10738 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, LT, 1 },
10739 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, LE, 1 },
10740 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, NE, 0 }
10743 static const struct builtin_description bdesc_2arg[] =
10745 /* SSE */
10746 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
10747 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
10748 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
10749 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
10750 { MASK_SSE, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
10751 { MASK_SSE, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
10752 { MASK_SSE, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
10753 { MASK_SSE, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
10755 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
10756 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
10757 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
10758 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
10759 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
10760 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
10761 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
10762 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
10763 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
10764 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
10765 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
10766 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
10767 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
10768 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
10769 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
10770 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgtss", IX86_BUILTIN_CMPGTSS, LT, 1 },
10771 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgess", IX86_BUILTIN_CMPGESS, LE, 1 },
10772 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
10773 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
10774 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
10775 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
10776 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, LT, 1 },
10777 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, LE, 1 },
10778 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
10780 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
10781 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
10782 { MASK_SSE, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
10783 { MASK_SSE, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
10785 { MASK_SSE, CODE_FOR_sse_andti3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
10786 { MASK_SSE, CODE_FOR_sse_nandti3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
10787 { MASK_SSE, CODE_FOR_sse_iorti3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
10788 { MASK_SSE, CODE_FOR_sse_xorti3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
10790 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
10791 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
10792 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
10793 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
10794 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
10796 /* MMX */
10797 { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
10798 { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
10799 { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
10800 { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
10801 { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
10802 { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
10804 { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
10805 { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
10806 { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
10807 { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
10808 { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
10809 { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
10810 { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
10811 { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
10813 { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
10814 { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
10815 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
10817 { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
10818 { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
10819 { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
10820 { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
10822 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
10823 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
10825 { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
10826 { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
10827 { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
10828 { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
10829 { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
10830 { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
10832 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
10833 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
10834 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
10835 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
10837 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
10838 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
10839 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
10840 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
10841 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
10842 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
10844 /* Special. */
10845 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
10846 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
10847 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
10849 { MASK_SSE, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
10850 { MASK_SSE, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
10852 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
10853 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
10854 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
10855 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
10856 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
10857 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
10859 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
10860 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
10861 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
10862 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
10863 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
10864 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
10866 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
10867 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
10868 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
10869 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
10871 { MASK_SSE, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
10872 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 }
10876 static const struct builtin_description bdesc_1arg[] =
10878 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
10879 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
10881 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
10882 { MASK_SSE, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
10883 { MASK_SSE, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
10885 { MASK_SSE, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
10886 { MASK_SSE, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
10887 { MASK_SSE, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
10888 { MASK_SSE, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 }
10892 void
10893 ix86_init_builtins ()
10895 if (TARGET_MMX)
10896 ix86_init_mmx_sse_builtins ();
10899 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
10900 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
10901 builtins. */
10902 void
10903 ix86_init_mmx_sse_builtins ()
10905 const struct builtin_description * d;
10906 size_t i;
10907 tree endlink = void_list_node;
10909 tree pchar_type_node = build_pointer_type (char_type_node);
10910 tree pfloat_type_node = build_pointer_type (float_type_node);
10911 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
10912 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
10914 /* Comparisons. */
10915 tree int_ftype_v4sf_v4sf
10916 = build_function_type (integer_type_node,
10917 tree_cons (NULL_TREE, V4SF_type_node,
10918 tree_cons (NULL_TREE,
10919 V4SF_type_node,
10920 endlink)));
10921 tree v4si_ftype_v4sf_v4sf
10922 = build_function_type (V4SI_type_node,
10923 tree_cons (NULL_TREE, V4SF_type_node,
10924 tree_cons (NULL_TREE,
10925 V4SF_type_node,
10926 endlink)));
10927 /* MMX/SSE/integer conversions. */
10928 tree int_ftype_v4sf
10929 = build_function_type (integer_type_node,
10930 tree_cons (NULL_TREE, V4SF_type_node,
10931 endlink));
10932 tree int_ftype_v8qi
10933 = build_function_type (integer_type_node,
10934 tree_cons (NULL_TREE, V8QI_type_node,
10935 endlink));
10936 tree int_ftype_v2si
10937 = build_function_type (integer_type_node,
10938 tree_cons (NULL_TREE, V2SI_type_node,
10939 endlink));
10940 tree v2si_ftype_int
10941 = build_function_type (V2SI_type_node,
10942 tree_cons (NULL_TREE, integer_type_node,
10943 endlink));
10944 tree v4sf_ftype_v4sf_int
10945 = build_function_type (V4SF_type_node,
10946 tree_cons (NULL_TREE, V4SF_type_node,
10947 tree_cons (NULL_TREE, integer_type_node,
10948 endlink)));
10949 tree v4sf_ftype_v4sf_v2si
10950 = build_function_type (V4SF_type_node,
10951 tree_cons (NULL_TREE, V4SF_type_node,
10952 tree_cons (NULL_TREE, V2SI_type_node,
10953 endlink)));
10954 tree int_ftype_v4hi_int
10955 = build_function_type (integer_type_node,
10956 tree_cons (NULL_TREE, V4HI_type_node,
10957 tree_cons (NULL_TREE, integer_type_node,
10958 endlink)));
10959 tree v4hi_ftype_v4hi_int_int
10960 = build_function_type (V4HI_type_node,
10961 tree_cons (NULL_TREE, V4HI_type_node,
10962 tree_cons (NULL_TREE, integer_type_node,
10963 tree_cons (NULL_TREE,
10964 integer_type_node,
10965 endlink))));
10966 /* Miscellaneous. */
10967 tree v8qi_ftype_v4hi_v4hi
10968 = build_function_type (V8QI_type_node,
10969 tree_cons (NULL_TREE, V4HI_type_node,
10970 tree_cons (NULL_TREE, V4HI_type_node,
10971 endlink)));
10972 tree v4hi_ftype_v2si_v2si
10973 = build_function_type (V4HI_type_node,
10974 tree_cons (NULL_TREE, V2SI_type_node,
10975 tree_cons (NULL_TREE, V2SI_type_node,
10976 endlink)));
10977 tree v4sf_ftype_v4sf_v4sf_int
10978 = build_function_type (V4SF_type_node,
10979 tree_cons (NULL_TREE, V4SF_type_node,
10980 tree_cons (NULL_TREE, V4SF_type_node,
10981 tree_cons (NULL_TREE,
10982 integer_type_node,
10983 endlink))));
10984 tree v4hi_ftype_v8qi_v8qi
10985 = build_function_type (V4HI_type_node,
10986 tree_cons (NULL_TREE, V8QI_type_node,
10987 tree_cons (NULL_TREE, V8QI_type_node,
10988 endlink)));
10989 tree v2si_ftype_v4hi_v4hi
10990 = build_function_type (V2SI_type_node,
10991 tree_cons (NULL_TREE, V4HI_type_node,
10992 tree_cons (NULL_TREE, V4HI_type_node,
10993 endlink)));
10994 tree v4hi_ftype_v4hi_int
10995 = build_function_type (V4HI_type_node,
10996 tree_cons (NULL_TREE, V4HI_type_node,
10997 tree_cons (NULL_TREE, integer_type_node,
10998 endlink)));
10999 tree v4hi_ftype_v4hi_di
11000 = build_function_type (V4HI_type_node,
11001 tree_cons (NULL_TREE, V4HI_type_node,
11002 tree_cons (NULL_TREE,
11003 long_long_integer_type_node,
11004 endlink)));
11005 tree v2si_ftype_v2si_di
11006 = build_function_type (V2SI_type_node,
11007 tree_cons (NULL_TREE, V2SI_type_node,
11008 tree_cons (NULL_TREE,
11009 long_long_integer_type_node,
11010 endlink)));
11011 tree void_ftype_void
11012 = build_function_type (void_type_node, endlink);
11013 tree void_ftype_pchar_int
11014 = build_function_type (void_type_node,
11015 tree_cons (NULL_TREE, pchar_type_node,
11016 tree_cons (NULL_TREE, integer_type_node,
11017 endlink)));
11018 tree void_ftype_unsigned
11019 = build_function_type (void_type_node,
11020 tree_cons (NULL_TREE, unsigned_type_node,
11021 endlink));
11022 tree unsigned_ftype_void
11023 = build_function_type (unsigned_type_node, endlink);
11024 tree di_ftype_void
11025 = build_function_type (long_long_unsigned_type_node, endlink);
11026 tree ti_ftype_void
11027 = build_function_type (intTI_type_node, endlink);
11028 tree v2si_ftype_v4sf
11029 = build_function_type (V2SI_type_node,
11030 tree_cons (NULL_TREE, V4SF_type_node,
11031 endlink));
11032 /* Loads/stores. */
11033 tree maskmovq_args = tree_cons (NULL_TREE, V8QI_type_node,
11034 tree_cons (NULL_TREE, V8QI_type_node,
11035 tree_cons (NULL_TREE,
11036 pchar_type_node,
11037 endlink)));
11038 tree void_ftype_v8qi_v8qi_pchar
11039 = build_function_type (void_type_node, maskmovq_args);
11040 tree v4sf_ftype_pfloat
11041 = build_function_type (V4SF_type_node,
11042 tree_cons (NULL_TREE, pfloat_type_node,
11043 endlink));
11044 tree v4sf_ftype_float
11045 = build_function_type (V4SF_type_node,
11046 tree_cons (NULL_TREE, float_type_node,
11047 endlink));
11048 tree v4sf_ftype_float_float_float_float
11049 = build_function_type (V4SF_type_node,
11050 tree_cons (NULL_TREE, float_type_node,
11051 tree_cons (NULL_TREE, float_type_node,
11052 tree_cons (NULL_TREE,
11053 float_type_node,
11054 tree_cons (NULL_TREE,
11055 float_type_node,
11056 endlink)))));
11057 /* @@@ the type is bogus */
11058 tree v4sf_ftype_v4sf_pv2si
11059 = build_function_type (V4SF_type_node,
11060 tree_cons (NULL_TREE, V4SF_type_node,
11061 tree_cons (NULL_TREE, pv2si_type_node,
11062 endlink)));
11063 tree void_ftype_pv2si_v4sf
11064 = build_function_type (void_type_node,
11065 tree_cons (NULL_TREE, pv2si_type_node,
11066 tree_cons (NULL_TREE, V4SF_type_node,
11067 endlink)));
11068 tree void_ftype_pfloat_v4sf
11069 = build_function_type (void_type_node,
11070 tree_cons (NULL_TREE, pfloat_type_node,
11071 tree_cons (NULL_TREE, V4SF_type_node,
11072 endlink)));
11073 tree void_ftype_pdi_di
11074 = build_function_type (void_type_node,
11075 tree_cons (NULL_TREE, pdi_type_node,
11076 tree_cons (NULL_TREE,
11077 long_long_unsigned_type_node,
11078 endlink)));
11079 /* Normal vector unops. */
11080 tree v4sf_ftype_v4sf
11081 = build_function_type (V4SF_type_node,
11082 tree_cons (NULL_TREE, V4SF_type_node,
11083 endlink));
11085 /* Normal vector binops. */
11086 tree v4sf_ftype_v4sf_v4sf
11087 = build_function_type (V4SF_type_node,
11088 tree_cons (NULL_TREE, V4SF_type_node,
11089 tree_cons (NULL_TREE, V4SF_type_node,
11090 endlink)));
11091 tree v8qi_ftype_v8qi_v8qi
11092 = build_function_type (V8QI_type_node,
11093 tree_cons (NULL_TREE, V8QI_type_node,
11094 tree_cons (NULL_TREE, V8QI_type_node,
11095 endlink)));
11096 tree v4hi_ftype_v4hi_v4hi
11097 = build_function_type (V4HI_type_node,
11098 tree_cons (NULL_TREE, V4HI_type_node,
11099 tree_cons (NULL_TREE, V4HI_type_node,
11100 endlink)));
11101 tree v2si_ftype_v2si_v2si
11102 = build_function_type (V2SI_type_node,
11103 tree_cons (NULL_TREE, V2SI_type_node,
11104 tree_cons (NULL_TREE, V2SI_type_node,
11105 endlink)));
11106 tree ti_ftype_ti_ti
11107 = build_function_type (intTI_type_node,
11108 tree_cons (NULL_TREE, intTI_type_node,
11109 tree_cons (NULL_TREE, intTI_type_node,
11110 endlink)));
11111 tree di_ftype_di_di
11112 = build_function_type (long_long_unsigned_type_node,
11113 tree_cons (NULL_TREE, long_long_unsigned_type_node,
11114 tree_cons (NULL_TREE,
11115 long_long_unsigned_type_node,
11116 endlink)));
11118 tree v2si_ftype_v2sf
11119 = build_function_type (V2SI_type_node,
11120 tree_cons (NULL_TREE, V2SF_type_node,
11121 endlink));
11122 tree v2sf_ftype_v2si
11123 = build_function_type (V2SF_type_node,
11124 tree_cons (NULL_TREE, V2SI_type_node,
11125 endlink));
11126 tree v2si_ftype_v2si
11127 = build_function_type (V2SI_type_node,
11128 tree_cons (NULL_TREE, V2SI_type_node,
11129 endlink));
11130 tree v2sf_ftype_v2sf
11131 = build_function_type (V2SF_type_node,
11132 tree_cons (NULL_TREE, V2SF_type_node,
11133 endlink));
11134 tree v2sf_ftype_v2sf_v2sf
11135 = build_function_type (V2SF_type_node,
11136 tree_cons (NULL_TREE, V2SF_type_node,
11137 tree_cons (NULL_TREE,
11138 V2SF_type_node,
11139 endlink)));
11140 tree v2si_ftype_v2sf_v2sf
11141 = build_function_type (V2SI_type_node,
11142 tree_cons (NULL_TREE, V2SF_type_node,
11143 tree_cons (NULL_TREE,
11144 V2SF_type_node,
11145 endlink)));
11147 tree void_ftype_pchar
11148 = build_function_type (void_type_node,
11149 tree_cons (NULL_TREE, pchar_type_node,
11150 endlink));
11152 /* Add all builtins that are more or less simple operations on two
11153 operands. */
11154 for (i = 0, d = bdesc_2arg; i < sizeof (bdesc_2arg) / sizeof *d; i++, d++)
11156 /* Use one of the operands; the target can have a different mode for
11157 mask-generating compares. */
11158 enum machine_mode mode;
11159 tree type;
11161 if (d->name == 0)
11162 continue;
11163 mode = insn_data[d->icode].operand[1].mode;
11165 switch (mode)
11167 case V4SFmode:
11168 type = v4sf_ftype_v4sf_v4sf;
11169 break;
11170 case V8QImode:
11171 type = v8qi_ftype_v8qi_v8qi;
11172 break;
11173 case V4HImode:
11174 type = v4hi_ftype_v4hi_v4hi;
11175 break;
11176 case V2SImode:
11177 type = v2si_ftype_v2si_v2si;
11178 break;
11179 case TImode:
11180 type = ti_ftype_ti_ti;
11181 break;
11182 case DImode:
11183 type = di_ftype_di_di;
11184 break;
11186 default:
11187 abort ();
11190 /* Override for comparisons. */
11191 if (d->icode == CODE_FOR_maskcmpv4sf3
11192 || d->icode == CODE_FOR_maskncmpv4sf3
11193 || d->icode == CODE_FOR_vmmaskcmpv4sf3
11194 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
11195 type = v4si_ftype_v4sf_v4sf;
11197 def_builtin (d->mask, d->name, type, d->code);
11200 /* Add the remaining MMX insns with somewhat more complicated types. */
11201 def_builtin (MASK_MMX, "__builtin_ia32_m_from_int", v2si_ftype_int, IX86_BUILTIN_M_FROM_INT);
11202 def_builtin (MASK_MMX, "__builtin_ia32_m_to_int", int_ftype_v2si, IX86_BUILTIN_M_TO_INT);
11203 def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
11204 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
11205 def_builtin (MASK_MMX, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
11206 def_builtin (MASK_MMX, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
11207 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
11208 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
11209 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
11211 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
11212 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
11213 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
11215 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
11216 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
11218 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
11219 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
11221 /* comi/ucomi insns. */
11222 for (i = 0, d = bdesc_comi; i < sizeof (bdesc_comi) / sizeof *d; i++, d++)
11223 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
11225 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
11226 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
11227 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
11229 def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
11230 def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
11231 def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
11232 def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
11233 def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
11234 def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
11236 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
11237 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
11239 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
11241 def_builtin (MASK_SSE, "__builtin_ia32_loadaps", v4sf_ftype_pfloat, IX86_BUILTIN_LOADAPS);
11242 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pfloat, IX86_BUILTIN_LOADUPS);
11243 def_builtin (MASK_SSE, "__builtin_ia32_loadss", v4sf_ftype_pfloat, IX86_BUILTIN_LOADSS);
11244 def_builtin (MASK_SSE, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
11245 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
11246 def_builtin (MASK_SSE, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
11248 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
11249 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
11250 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
11251 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
11253 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
11254 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
11255 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
11256 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
11258 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
11259 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_prefetch", void_ftype_pchar_int, IX86_BUILTIN_PREFETCH);
11261 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", v4hi_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
11263 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
11264 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
11265 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
11266 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
11267 def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
11268 def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
11270 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
11272 /* Original 3DNow! */
11273 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
11274 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
11275 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
11276 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
11277 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
11278 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
11279 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
11280 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
11281 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
11282 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
11283 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
11284 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
11285 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
11286 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
11287 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
11288 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
11289 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
11290 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
11291 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
11292 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
11293 def_builtin (MASK_3DNOW, "__builtin_ia32_prefetch_3dnow", void_ftype_pchar, IX86_BUILTIN_PREFETCH_3DNOW);
11294 def_builtin (MASK_3DNOW, "__builtin_ia32_prefetchw", void_ftype_pchar, IX86_BUILTIN_PREFETCHW);
11296 /* 3DNow! extension as used in the Athlon CPU. */
11297 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
11298 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
11299 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
11300 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
11301 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
11302 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
11304 /* Composite intrinsics. */
11305 def_builtin (MASK_SSE, "__builtin_ia32_setps1", v4sf_ftype_float, IX86_BUILTIN_SETPS1);
11306 def_builtin (MASK_SSE, "__builtin_ia32_setps", v4sf_ftype_float_float_float_float, IX86_BUILTIN_SETPS);
11307 def_builtin (MASK_SSE, "__builtin_ia32_setzerops", ti_ftype_void, IX86_BUILTIN_CLRPS);
11308 def_builtin (MASK_SSE, "__builtin_ia32_loadps1", v4sf_ftype_pfloat, IX86_BUILTIN_LOADPS1);
11309 def_builtin (MASK_SSE, "__builtin_ia32_loadrps", v4sf_ftype_pfloat, IX86_BUILTIN_LOADRPS);
11310 def_builtin (MASK_SSE, "__builtin_ia32_storeps1", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREPS1);
11311 def_builtin (MASK_SSE, "__builtin_ia32_storerps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORERPS);
11314 /* Errors in the source file can cause expand_expr to return const0_rtx
11315 where we expect a vector. To avoid crashing, use one of the vector
11316 clear instructions. */
11317 static rtx
11318 safe_vector_operand (x, mode)
11319 rtx x;
11320 enum machine_mode mode;
11322 if (x != const0_rtx)
11323 return x;
11324 x = gen_reg_rtx (mode);
11326 if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
11327 emit_insn (gen_mmx_clrdi (mode == DImode ? x
11328 : gen_rtx_SUBREG (DImode, x, 0)));
11329 else
11330 emit_insn (gen_sse_clrti (mode == TImode ? x
11331 : gen_rtx_SUBREG (TImode, x, 0)));
11332 return x;
11335 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
11337 static rtx
11338 ix86_expand_binop_builtin (icode, arglist, target)
11339 enum insn_code icode;
11340 tree arglist;
11341 rtx target;
11343 rtx pat;
11344 tree arg0 = TREE_VALUE (arglist);
11345 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11346 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11347 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11348 enum machine_mode tmode = insn_data[icode].operand[0].mode;
11349 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
11350 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
11352 if (VECTOR_MODE_P (mode0))
11353 op0 = safe_vector_operand (op0, mode0);
11354 if (VECTOR_MODE_P (mode1))
11355 op1 = safe_vector_operand (op1, mode1);
11357 if (! target
11358 || GET_MODE (target) != tmode
11359 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11360 target = gen_reg_rtx (tmode);
11362 /* In case the insn wants input operands in modes different from
11363 the result, abort. */
11364 if (GET_MODE (op0) != mode0 || GET_MODE (op1) != mode1)
11365 abort ();
11367 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11368 op0 = copy_to_mode_reg (mode0, op0);
11369 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
11370 op1 = copy_to_mode_reg (mode1, op1);
11372 pat = GEN_FCN (icode) (target, op0, op1);
11373 if (! pat)
11374 return 0;
11375 emit_insn (pat);
11376 return target;
11379 /* Subroutine of ix86_expand_builtin to take care of stores. */
11381 static rtx
11382 ix86_expand_store_builtin (icode, arglist, shuffle)
11383 enum insn_code icode;
11384 tree arglist;
11385 int shuffle;
11387 rtx pat;
11388 tree arg0 = TREE_VALUE (arglist);
11389 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11390 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11391 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11392 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
11393 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
11395 if (VECTOR_MODE_P (mode1))
11396 op1 = safe_vector_operand (op1, mode1);
11398 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
11399 if (shuffle >= 0 || ! (*insn_data[icode].operand[1].predicate) (op1, mode1))
11400 op1 = copy_to_mode_reg (mode1, op1);
11401 if (shuffle >= 0)
11402 emit_insn (gen_sse_shufps (op1, op1, op1, GEN_INT (shuffle)));
11403 pat = GEN_FCN (icode) (op0, op1);
11404 if (pat)
11405 emit_insn (pat);
11406 return 0;
11409 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
11411 static rtx
11412 ix86_expand_unop_builtin (icode, arglist, target, do_load)
11413 enum insn_code icode;
11414 tree arglist;
11415 rtx target;
11416 int do_load;
11418 rtx pat;
11419 tree arg0 = TREE_VALUE (arglist);
11420 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11421 enum machine_mode tmode = insn_data[icode].operand[0].mode;
11422 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
11424 if (! target
11425 || GET_MODE (target) != tmode
11426 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11427 target = gen_reg_rtx (tmode);
11428 if (do_load)
11429 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
11430 else
11432 if (VECTOR_MODE_P (mode0))
11433 op0 = safe_vector_operand (op0, mode0);
11435 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11436 op0 = copy_to_mode_reg (mode0, op0);
11439 pat = GEN_FCN (icode) (target, op0);
11440 if (! pat)
11441 return 0;
11442 emit_insn (pat);
11443 return target;
11446 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
11447 sqrtss, rsqrtss, rcpss. */
11449 static rtx
11450 ix86_expand_unop1_builtin (icode, arglist, target)
11451 enum insn_code icode;
11452 tree arglist;
11453 rtx target;
11455 rtx pat;
11456 tree arg0 = TREE_VALUE (arglist);
11457 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11458 enum machine_mode tmode = insn_data[icode].operand[0].mode;
11459 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
11461 if (! target
11462 || GET_MODE (target) != tmode
11463 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11464 target = gen_reg_rtx (tmode);
11466 if (VECTOR_MODE_P (mode0))
11467 op0 = safe_vector_operand (op0, mode0);
11469 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11470 op0 = copy_to_mode_reg (mode0, op0);
11472 pat = GEN_FCN (icode) (target, op0, op0);
11473 if (! pat)
11474 return 0;
11475 emit_insn (pat);
11476 return target;
11479 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
11481 static rtx
11482 ix86_expand_sse_compare (d, arglist, target)
11483 const struct builtin_description *d;
11484 tree arglist;
11485 rtx target;
11487 rtx pat;
11488 tree arg0 = TREE_VALUE (arglist);
11489 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11490 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11491 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11492 rtx op2;
11493 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
11494 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
11495 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
11496 enum rtx_code comparison = d->comparison;
11498 if (VECTOR_MODE_P (mode0))
11499 op0 = safe_vector_operand (op0, mode0);
11500 if (VECTOR_MODE_P (mode1))
11501 op1 = safe_vector_operand (op1, mode1);
11503 /* Swap operands if we have a comparison that isn't available in
11504 hardware. */
11505 if (d->flag)
11507 rtx tmp = gen_reg_rtx (mode1);
11508 emit_move_insn (tmp, op1);
11509 op1 = op0;
11510 op0 = tmp;
11513 if (! target
11514 || GET_MODE (target) != tmode
11515 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
11516 target = gen_reg_rtx (tmode);
11518 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
11519 op0 = copy_to_mode_reg (mode0, op0);
11520 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
11521 op1 = copy_to_mode_reg (mode1, op1);
11523 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
11524 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
11525 if (! pat)
11526 return 0;
11527 emit_insn (pat);
11528 return target;
11531 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
11533 static rtx
11534 ix86_expand_sse_comi (d, arglist, target)
11535 const struct builtin_description *d;
11536 tree arglist;
11537 rtx target;
11539 rtx pat;
11540 tree arg0 = TREE_VALUE (arglist);
11541 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11542 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11543 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11544 rtx op2;
11545 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
11546 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
11547 enum rtx_code comparison = d->comparison;
11549 if (VECTOR_MODE_P (mode0))
11550 op0 = safe_vector_operand (op0, mode0);
11551 if (VECTOR_MODE_P (mode1))
11552 op1 = safe_vector_operand (op1, mode1);
11554 /* Swap operands if we have a comparison that isn't available in
11555 hardware. */
11556 if (d->flag)
11558 rtx tmp = op1;
11559 op1 = op0;
11560 op0 = tmp;
11563 target = gen_reg_rtx (SImode);
11564 emit_move_insn (target, const0_rtx);
11565 target = gen_rtx_SUBREG (QImode, target, 0);
11567 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
11568 op0 = copy_to_mode_reg (mode0, op0);
11569 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
11570 op1 = copy_to_mode_reg (mode1, op1);
11572 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
11573 pat = GEN_FCN (d->icode) (op0, op1, op2);
11574 if (! pat)
11575 return 0;
11576 emit_insn (pat);
11577 emit_insn (gen_setcc_2 (target, op2));
11579 return target;
11582 /* Expand an expression EXP that calls a built-in function,
11583 with result going to TARGET if that's convenient
11584 (and in mode MODE if that's convenient).
11585 SUBTARGET may be used as the target for computing one of EXP's operands.
11586 IGNORE is nonzero if the value is to be ignored. */
11589 ix86_expand_builtin (exp, target, subtarget, mode, ignore)
11590 tree exp;
11591 rtx target;
11592 rtx subtarget ATTRIBUTE_UNUSED;
11593 enum machine_mode mode ATTRIBUTE_UNUSED;
11594 int ignore ATTRIBUTE_UNUSED;
11596 const struct builtin_description *d;
11597 size_t i;
11598 enum insn_code icode;
11599 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
11600 tree arglist = TREE_OPERAND (exp, 1);
11601 tree arg0, arg1, arg2, arg3;
11602 rtx op0, op1, op2, pat;
11603 enum machine_mode tmode, mode0, mode1, mode2;
11604 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
11606 switch (fcode)
11608 case IX86_BUILTIN_EMMS:
11609 emit_insn (gen_emms ());
11610 return 0;
11612 case IX86_BUILTIN_SFENCE:
11613 emit_insn (gen_sfence ());
11614 return 0;
11616 case IX86_BUILTIN_M_FROM_INT:
11617 target = gen_reg_rtx (DImode);
11618 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
11619 emit_move_insn (gen_rtx_SUBREG (SImode, target, 0), op0);
11620 return target;
11622 case IX86_BUILTIN_M_TO_INT:
11623 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
11624 op0 = copy_to_mode_reg (DImode, op0);
11625 target = gen_reg_rtx (SImode);
11626 emit_move_insn (target, gen_rtx_SUBREG (SImode, op0, 0));
11627 return target;
11629 case IX86_BUILTIN_PEXTRW:
11630 icode = CODE_FOR_mmx_pextrw;
11631 arg0 = TREE_VALUE (arglist);
11632 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11633 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11634 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11635 tmode = insn_data[icode].operand[0].mode;
11636 mode0 = insn_data[icode].operand[1].mode;
11637 mode1 = insn_data[icode].operand[2].mode;
11639 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11640 op0 = copy_to_mode_reg (mode0, op0);
11641 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
11643 /* @@@ better error message */
11644 error ("selector must be an immediate");
11645 return const0_rtx;
11647 if (target == 0
11648 || GET_MODE (target) != tmode
11649 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11650 target = gen_reg_rtx (tmode);
11651 pat = GEN_FCN (icode) (target, op0, op1);
11652 if (! pat)
11653 return 0;
11654 emit_insn (pat);
11655 return target;
11657 case IX86_BUILTIN_PINSRW:
11658 icode = CODE_FOR_mmx_pinsrw;
11659 arg0 = TREE_VALUE (arglist);
11660 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11661 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
11662 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11663 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11664 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
11665 tmode = insn_data[icode].operand[0].mode;
11666 mode0 = insn_data[icode].operand[1].mode;
11667 mode1 = insn_data[icode].operand[2].mode;
11668 mode2 = insn_data[icode].operand[3].mode;
11670 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11671 op0 = copy_to_mode_reg (mode0, op0);
11672 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
11673 op1 = copy_to_mode_reg (mode1, op1);
11674 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
11676 /* @@@ better error message */
11677 error ("selector must be an immediate");
11678 return const0_rtx;
11680 if (target == 0
11681 || GET_MODE (target) != tmode
11682 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11683 target = gen_reg_rtx (tmode);
11684 pat = GEN_FCN (icode) (target, op0, op1, op2);
11685 if (! pat)
11686 return 0;
11687 emit_insn (pat);
11688 return target;
11690 case IX86_BUILTIN_MASKMOVQ:
11691 icode = CODE_FOR_mmx_maskmovq;
11692 /* Note the arg order is different from the operand order. */
11693 arg1 = TREE_VALUE (arglist);
11694 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
11695 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
11696 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11697 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11698 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
11699 mode0 = insn_data[icode].operand[0].mode;
11700 mode1 = insn_data[icode].operand[1].mode;
11701 mode2 = insn_data[icode].operand[2].mode;
11703 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11704 op0 = copy_to_mode_reg (mode0, op0);
11705 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
11706 op1 = copy_to_mode_reg (mode1, op1);
11707 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
11708 op2 = copy_to_mode_reg (mode2, op2);
11709 pat = GEN_FCN (icode) (op0, op1, op2);
11710 if (! pat)
11711 return 0;
11712 emit_insn (pat);
11713 return 0;
11715 case IX86_BUILTIN_SQRTSS:
11716 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
11717 case IX86_BUILTIN_RSQRTSS:
11718 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
11719 case IX86_BUILTIN_RCPSS:
11720 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
11722 case IX86_BUILTIN_LOADAPS:
11723 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
11725 case IX86_BUILTIN_LOADUPS:
11726 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
11728 case IX86_BUILTIN_STOREAPS:
11729 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist, -1);
11730 case IX86_BUILTIN_STOREUPS:
11731 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist, -1);
11733 case IX86_BUILTIN_LOADSS:
11734 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
11736 case IX86_BUILTIN_STORESS:
11737 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist, -1);
11739 case IX86_BUILTIN_LOADHPS:
11740 case IX86_BUILTIN_LOADLPS:
11741 icode = (fcode == IX86_BUILTIN_LOADHPS
11742 ? CODE_FOR_sse_movhps : CODE_FOR_sse_movlps);
11743 arg0 = TREE_VALUE (arglist);
11744 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11745 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11746 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11747 tmode = insn_data[icode].operand[0].mode;
11748 mode0 = insn_data[icode].operand[1].mode;
11749 mode1 = insn_data[icode].operand[2].mode;
11751 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11752 op0 = copy_to_mode_reg (mode0, op0);
11753 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
11754 if (target == 0
11755 || GET_MODE (target) != tmode
11756 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11757 target = gen_reg_rtx (tmode);
11758 pat = GEN_FCN (icode) (target, op0, op1);
11759 if (! pat)
11760 return 0;
11761 emit_insn (pat);
11762 return target;
11764 case IX86_BUILTIN_STOREHPS:
11765 case IX86_BUILTIN_STORELPS:
11766 icode = (fcode == IX86_BUILTIN_STOREHPS
11767 ? CODE_FOR_sse_movhps : CODE_FOR_sse_movlps);
11768 arg0 = TREE_VALUE (arglist);
11769 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11770 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11771 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11772 mode0 = insn_data[icode].operand[1].mode;
11773 mode1 = insn_data[icode].operand[2].mode;
11775 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
11776 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
11777 op1 = copy_to_mode_reg (mode1, op1);
11779 pat = GEN_FCN (icode) (op0, op0, op1);
11780 if (! pat)
11781 return 0;
11782 emit_insn (pat);
11783 return 0;
11785 case IX86_BUILTIN_MOVNTPS:
11786 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist, -1);
11787 case IX86_BUILTIN_MOVNTQ:
11788 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist, -1);
11790 case IX86_BUILTIN_LDMXCSR:
11791 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
11792 target = assign_386_stack_local (SImode, 0);
11793 emit_move_insn (target, op0);
11794 emit_insn (gen_ldmxcsr (target));
11795 return 0;
11797 case IX86_BUILTIN_STMXCSR:
11798 target = assign_386_stack_local (SImode, 0);
11799 emit_insn (gen_stmxcsr (target));
11800 return copy_to_mode_reg (SImode, target);
11802 case IX86_BUILTIN_PREFETCH:
11803 icode = CODE_FOR_prefetch;
11804 arg0 = TREE_VALUE (arglist);
11805 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11806 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11807 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11808 mode0 = insn_data[icode].operand[0].mode;
11809 mode1 = insn_data[icode].operand[1].mode;
11811 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
11813 /* @@@ better error message */
11814 error ("selector must be an immediate");
11815 return const0_rtx;
11818 op0 = copy_to_mode_reg (Pmode, op0);
11819 pat = GEN_FCN (icode) (op0, op1);
11820 if (! pat)
11821 return 0;
11822 emit_insn (pat);
11823 return target;
11825 case IX86_BUILTIN_SHUFPS:
11826 icode = CODE_FOR_sse_shufps;
11827 arg0 = TREE_VALUE (arglist);
11828 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11829 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
11830 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11831 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11832 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
11833 tmode = insn_data[icode].operand[0].mode;
11834 mode0 = insn_data[icode].operand[1].mode;
11835 mode1 = insn_data[icode].operand[2].mode;
11836 mode2 = insn_data[icode].operand[3].mode;
11838 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11839 op0 = copy_to_mode_reg (mode0, op0);
11840 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
11841 op1 = copy_to_mode_reg (mode1, op1);
11842 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
11844 /* @@@ better error message */
11845 error ("mask must be an immediate");
11846 return const0_rtx;
11848 if (target == 0
11849 || GET_MODE (target) != tmode
11850 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11851 target = gen_reg_rtx (tmode);
11852 pat = GEN_FCN (icode) (target, op0, op1, op2);
11853 if (! pat)
11854 return 0;
11855 emit_insn (pat);
11856 return target;
11858 case IX86_BUILTIN_PSHUFW:
11859 icode = CODE_FOR_mmx_pshufw;
11860 arg0 = TREE_VALUE (arglist);
11861 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11862 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11863 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11864 tmode = insn_data[icode].operand[0].mode;
11865 mode0 = insn_data[icode].operand[2].mode;
11866 mode1 = insn_data[icode].operand[3].mode;
11868 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11869 op0 = copy_to_mode_reg (mode0, op0);
11870 if (! (*insn_data[icode].operand[3].predicate) (op1, mode1))
11872 /* @@@ better error message */
11873 error ("mask must be an immediate");
11874 return const0_rtx;
11876 if (target == 0
11877 || GET_MODE (target) != tmode
11878 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11879 target = gen_reg_rtx (tmode);
11880 pat = GEN_FCN (icode) (target, target, op0, op1);
11881 if (! pat)
11882 return 0;
11883 emit_insn (pat);
11884 return target;
11886 case IX86_BUILTIN_FEMMS:
11887 emit_insn (gen_femms ());
11888 return NULL_RTX;
11890 case IX86_BUILTIN_PAVGUSB:
11891 return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target);
11893 case IX86_BUILTIN_PF2ID:
11894 return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0);
11896 case IX86_BUILTIN_PFACC:
11897 return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target);
11899 case IX86_BUILTIN_PFADD:
11900 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target);
11902 case IX86_BUILTIN_PFCMPEQ:
11903 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target);
11905 case IX86_BUILTIN_PFCMPGE:
11906 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target);
11908 case IX86_BUILTIN_PFCMPGT:
11909 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target);
11911 case IX86_BUILTIN_PFMAX:
11912 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target);
11914 case IX86_BUILTIN_PFMIN:
11915 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target);
11917 case IX86_BUILTIN_PFMUL:
11918 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target);
11920 case IX86_BUILTIN_PFRCP:
11921 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0);
11923 case IX86_BUILTIN_PFRCPIT1:
11924 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target);
11926 case IX86_BUILTIN_PFRCPIT2:
11927 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target);
11929 case IX86_BUILTIN_PFRSQIT1:
11930 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target);
11932 case IX86_BUILTIN_PFRSQRT:
11933 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0);
11935 case IX86_BUILTIN_PFSUB:
11936 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target);
11938 case IX86_BUILTIN_PFSUBR:
11939 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target);
11941 case IX86_BUILTIN_PI2FD:
11942 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0);
11944 case IX86_BUILTIN_PMULHRW:
11945 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
11947 case IX86_BUILTIN_PREFETCH_3DNOW:
11948 icode = CODE_FOR_prefetch_3dnow;
11949 arg0 = TREE_VALUE (arglist);
11950 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11951 mode0 = insn_data[icode].operand[0].mode;
11952 pat = GEN_FCN (icode) (copy_to_mode_reg (Pmode, op0));
11953 if (! pat)
11954 return NULL_RTX;
11955 emit_insn (pat);
11956 return NULL_RTX;
11958 case IX86_BUILTIN_PREFETCHW:
11959 icode = CODE_FOR_prefetchw;
11960 arg0 = TREE_VALUE (arglist);
11961 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11962 mode0 = insn_data[icode].operand[0].mode;
11963 pat = GEN_FCN (icode) (copy_to_mode_reg (Pmode, op0));
11964 if (! pat)
11965 return NULL_RTX;
11966 emit_insn (pat);
11967 return NULL_RTX;
11969 case IX86_BUILTIN_PF2IW:
11970 return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
11972 case IX86_BUILTIN_PFNACC:
11973 return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target);
11975 case IX86_BUILTIN_PFPNACC:
11976 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target);
11978 case IX86_BUILTIN_PI2FW:
11979 return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0);
11981 case IX86_BUILTIN_PSWAPDSI:
11982 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0);
11984 case IX86_BUILTIN_PSWAPDSF:
11985 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
11987 /* Composite intrinsics. */
11988 case IX86_BUILTIN_SETPS1:
11989 target = assign_386_stack_local (SFmode, 0);
11990 arg0 = TREE_VALUE (arglist);
11991 emit_move_insn (adjust_address (target, SFmode, 0),
11992 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
11993 op0 = gen_reg_rtx (V4SFmode);
11994 emit_insn (gen_sse_loadss (op0, adjust_address (target, V4SFmode, 0)));
11995 emit_insn (gen_sse_shufps (op0, op0, op0, GEN_INT (0)));
11996 return op0;
11998 case IX86_BUILTIN_SETPS:
11999 target = assign_386_stack_local (V4SFmode, 0);
12000 arg0 = TREE_VALUE (arglist);
12001 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12002 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
12003 arg3 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
12004 emit_move_insn (adjust_address (target, SFmode, 0),
12005 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
12006 emit_move_insn (adjust_address (target, SFmode, 4),
12007 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
12008 emit_move_insn (adjust_address (target, SFmode, 8),
12009 expand_expr (arg2, NULL_RTX, VOIDmode, 0));
12010 emit_move_insn (adjust_address (target, SFmode, 12),
12011 expand_expr (arg3, NULL_RTX, VOIDmode, 0));
12012 op0 = gen_reg_rtx (V4SFmode);
12013 emit_insn (gen_sse_movaps (op0, target));
12014 return op0;
12016 case IX86_BUILTIN_CLRPS:
12017 target = gen_reg_rtx (TImode);
12018 emit_insn (gen_sse_clrti (target));
12019 return target;
12021 case IX86_BUILTIN_LOADRPS:
12022 target = ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist,
12023 gen_reg_rtx (V4SFmode), 1);
12024 emit_insn (gen_sse_shufps (target, target, target, GEN_INT (0x1b)));
12025 return target;
12027 case IX86_BUILTIN_LOADPS1:
12028 target = ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist,
12029 gen_reg_rtx (V4SFmode), 1);
12030 emit_insn (gen_sse_shufps (target, target, target, const0_rtx));
12031 return target;
12033 case IX86_BUILTIN_STOREPS1:
12034 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist, 0);
12035 case IX86_BUILTIN_STORERPS:
12036 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist, 0x1B);
12038 case IX86_BUILTIN_MMX_ZERO:
12039 target = gen_reg_rtx (DImode);
12040 emit_insn (gen_mmx_clrdi (target));
12041 return target;
12043 default:
12044 break;
12047 for (i = 0, d = bdesc_2arg; i < sizeof (bdesc_2arg) / sizeof *d; i++, d++)
12048 if (d->code == fcode)
12050 /* Compares are treated specially. */
12051 if (d->icode == CODE_FOR_maskcmpv4sf3
12052 || d->icode == CODE_FOR_vmmaskcmpv4sf3
12053 || d->icode == CODE_FOR_maskncmpv4sf3
12054 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
12055 return ix86_expand_sse_compare (d, arglist, target);
12057 return ix86_expand_binop_builtin (d->icode, arglist, target);
12060 for (i = 0, d = bdesc_1arg; i < sizeof (bdesc_1arg) / sizeof *d; i++, d++)
12061 if (d->code == fcode)
12062 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
12064 for (i = 0, d = bdesc_comi; i < sizeof (bdesc_comi) / sizeof *d; i++, d++)
12065 if (d->code == fcode)
12066 return ix86_expand_sse_comi (d, arglist, target);
12068 /* @@@ Should really do something sensible here. */
12069 return 0;
12072 /* Store OPERAND to the memory after reload is completed. This means
12073 that we can't easilly use assign_stack_local. */
12075 ix86_force_to_memory (mode, operand)
12076 enum machine_mode mode;
12077 rtx operand;
12079 rtx result;
12080 if (!reload_completed)
12081 abort ();
12082 if (TARGET_64BIT && TARGET_RED_ZONE)
12084 result = gen_rtx_MEM (mode,
12085 gen_rtx_PLUS (Pmode,
12086 stack_pointer_rtx,
12087 GEN_INT (-RED_ZONE_SIZE)));
12088 emit_move_insn (result, operand);
12090 else if (TARGET_64BIT && !TARGET_RED_ZONE)
12092 switch (mode)
12094 case HImode:
12095 case SImode:
12096 operand = gen_lowpart (DImode, operand);
12097 /* FALLTHRU */
12098 case DImode:
12099 emit_insn (
12100 gen_rtx_SET (VOIDmode,
12101 gen_rtx_MEM (DImode,
12102 gen_rtx_PRE_DEC (DImode,
12103 stack_pointer_rtx)),
12104 operand));
12105 break;
12106 default:
12107 abort ();
12109 result = gen_rtx_MEM (mode, stack_pointer_rtx);
12111 else
12113 switch (mode)
12115 case DImode:
12117 rtx operands[2];
12118 split_di (&operand, 1, operands, operands + 1);
12119 emit_insn (
12120 gen_rtx_SET (VOIDmode,
12121 gen_rtx_MEM (SImode,
12122 gen_rtx_PRE_DEC (Pmode,
12123 stack_pointer_rtx)),
12124 operands[1]));
12125 emit_insn (
12126 gen_rtx_SET (VOIDmode,
12127 gen_rtx_MEM (SImode,
12128 gen_rtx_PRE_DEC (Pmode,
12129 stack_pointer_rtx)),
12130 operands[0]));
12132 break;
12133 case HImode:
12134 /* It is better to store HImodes as SImodes. */
12135 if (!TARGET_PARTIAL_REG_STALL)
12136 operand = gen_lowpart (SImode, operand);
12137 /* FALLTHRU */
12138 case SImode:
12139 emit_insn (
12140 gen_rtx_SET (VOIDmode,
12141 gen_rtx_MEM (GET_MODE (operand),
12142 gen_rtx_PRE_DEC (SImode,
12143 stack_pointer_rtx)),
12144 operand));
12145 break;
12146 default:
12147 abort ();
12149 result = gen_rtx_MEM (mode, stack_pointer_rtx);
12151 return result;
12154 /* Free operand from the memory. */
12155 void
12156 ix86_free_from_memory (mode)
12157 enum machine_mode mode;
12159 if (!TARGET_64BIT || !TARGET_RED_ZONE)
12161 int size;
12163 if (mode == DImode || TARGET_64BIT)
12164 size = 8;
12165 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
12166 size = 2;
12167 else
12168 size = 4;
12169 /* Use LEA to deallocate stack space. In peephole2 it will be converted
12170 to pop or add instruction if registers are available. */
12171 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
12172 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12173 GEN_INT (size))));
12177 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
12178 QImode must go into class Q_REGS.
12179 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
12180 movdf to do mem-to-mem moves through integer regs. */
12181 enum reg_class
12182 ix86_preferred_reload_class (x, class)
12183 rtx x;
12184 enum reg_class class;
12186 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
12188 /* SSE can't load any constant directly yet. */
12189 if (SSE_CLASS_P (class))
12190 return NO_REGS;
12191 /* Floats can load 0 and 1. */
12192 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
12194 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
12195 if (MAYBE_SSE_CLASS_P (class))
12196 return (reg_class_subset_p (class, GENERAL_REGS)
12197 ? GENERAL_REGS : FLOAT_REGS);
12198 else
12199 return class;
12201 /* General regs can load everything. */
12202 if (reg_class_subset_p (class, GENERAL_REGS))
12203 return GENERAL_REGS;
12204 /* In case we haven't resolved FLOAT or SSE yet, give up. */
12205 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
12206 return NO_REGS;
12208 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
12209 return NO_REGS;
12210 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
12211 return Q_REGS;
12212 return class;
12215 /* If we are copying between general and FP registers, we need a memory
12216 location. The same is true for SSE and MMX registers.
12218 The macro can't work reliably when one of the CLASSES is class containing
12219 registers from multiple units (SSE, MMX, integer). We avoid this by never
12220 combining those units in single alternative in the machine description.
12221 Ensure that this constraint holds to avoid unexpected surprises.
12223 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
12224 enforce these sanity checks. */
12226 ix86_secondary_memory_needed (class1, class2, mode, strict)
12227 enum reg_class class1, class2;
12228 enum machine_mode mode;
12229 int strict;
12231 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
12232 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
12233 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
12234 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
12235 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
12236 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
12238 if (strict)
12239 abort ();
12240 else
12241 return 1;
12243 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
12244 || (SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
12245 && (mode) != SImode)
12246 || (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
12247 && (mode) != SImode));
12249 /* Return the cost of moving data from a register in class CLASS1 to
12250 one in class CLASS2.
12252 It is not required that the cost always equal 2 when FROM is the same as TO;
12253 on some machines it is expensive to move between registers if they are not
12254 general registers. */
12256 ix86_register_move_cost (mode, class1, class2)
12257 enum machine_mode mode;
12258 enum reg_class class1, class2;
12260 /* In case we require secondary memory, compute cost of the store followed
12261 by load. In case of copying from general_purpose_register we may emit
12262 multiple stores followed by single load causing memory size mismatch
12263 stall. Count this as arbitarily high cost of 20. */
12264 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
12266 int add_cost = 0;
12267 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
12268 add_cost = 20;
12269 return (MEMORY_MOVE_COST (mode, class1, 0)
12270 + MEMORY_MOVE_COST (mode, class2, 1) + add_cost);
12272 /* Moves between SSE/MMX and integer unit are expensive. */
12273 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
12274 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
12275 return ix86_cost->mmxsse_to_integer;
12276 if (MAYBE_FLOAT_CLASS_P (class1))
12277 return ix86_cost->fp_move;
12278 if (MAYBE_SSE_CLASS_P (class1))
12279 return ix86_cost->sse_move;
12280 if (MAYBE_MMX_CLASS_P (class1))
12281 return ix86_cost->mmx_move;
12282 return 2;
12285 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
12287 ix86_hard_regno_mode_ok (regno, mode)
12288 int regno;
12289 enum machine_mode mode;
12291 /* Flags and only flags can only hold CCmode values. */
12292 if (CC_REGNO_P (regno))
12293 return GET_MODE_CLASS (mode) == MODE_CC;
12294 if (GET_MODE_CLASS (mode) == MODE_CC
12295 || GET_MODE_CLASS (mode) == MODE_RANDOM
12296 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
12297 return 0;
12298 if (FP_REGNO_P (regno))
12299 return VALID_FP_MODE_P (mode);
12300 if (SSE_REGNO_P (regno))
12301 return VALID_SSE_REG_MODE (mode);
12302 if (MMX_REGNO_P (regno))
12303 return VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode);
12304 /* We handle both integer and floats in the general purpose registers.
12305 In future we should be able to handle vector modes as well. */
12306 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
12307 return 0;
12308 /* Take care for QImode values - they can be in non-QI regs, but then
12309 they do cause partial register stalls. */
12310 if (regno < 4 || mode != QImode || TARGET_64BIT)
12311 return 1;
12312 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
12315 /* Return the cost of moving data of mode M between a
12316 register and memory. A value of 2 is the default; this cost is
12317 relative to those in `REGISTER_MOVE_COST'.
12319 If moving between registers and memory is more expensive than
12320 between two registers, you should define this macro to express the
12321 relative cost.
12323 Model also increased moving costs of QImode registers in non
12324 Q_REGS classes.
12327 ix86_memory_move_cost (mode, class, in)
12328 enum machine_mode mode;
12329 enum reg_class class;
12330 int in;
12332 if (FLOAT_CLASS_P (class))
12334 int index;
12335 switch (mode)
12337 case SFmode:
12338 index = 0;
12339 break;
12340 case DFmode:
12341 index = 1;
12342 break;
12343 case XFmode:
12344 case TFmode:
12345 index = 2;
12346 break;
12347 default:
12348 return 100;
12350 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
12352 if (SSE_CLASS_P (class))
12354 int index;
12355 switch (GET_MODE_SIZE (mode))
12357 case 4:
12358 index = 0;
12359 break;
12360 case 8:
12361 index = 1;
12362 break;
12363 case 16:
12364 index = 2;
12365 break;
12366 default:
12367 return 100;
12369 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
12371 if (MMX_CLASS_P (class))
12373 int index;
12374 switch (GET_MODE_SIZE (mode))
12376 case 4:
12377 index = 0;
12378 break;
12379 case 8:
12380 index = 1;
12381 break;
12382 default:
12383 return 100;
12385 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
12387 switch (GET_MODE_SIZE (mode))
12389 case 1:
12390 if (in)
12391 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
12392 : ix86_cost->movzbl_load);
12393 else
12394 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
12395 : ix86_cost->int_store[0] + 4);
12396 break;
12397 case 2:
12398 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
12399 default:
12400 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
12401 if (mode == TFmode)
12402 mode = XFmode;
12403 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
12404 * (int) GET_MODE_SIZE (mode) / 4);
12408 #ifdef DO_GLOBAL_CTORS_BODY
12409 static void
12410 ix86_svr3_asm_out_constructor (symbol, priority)
12411 rtx symbol;
12412 int priority ATTRIBUTE_UNUSED;
12414 init_section ();
12415 fputs ("\tpushl $", asm_out_file);
12416 assemble_name (asm_out_file, XSTR (symbol, 0));
12417 fputc ('\n', asm_out_file);
12419 #endif
12421 #if defined(TARGET_ELF) && defined(TARGET_COFF)
12422 static void
12423 sco_asm_named_section (name, flags)
12424 const char *name;
12425 unsigned int flags;
12427 if (TARGET_ELF)
12428 default_elf_asm_named_section (name, flags);
12429 else
12430 default_coff_asm_named_section (name, flags);
12433 static void
12434 sco_asm_out_constructor (symbol, priority)
12435 rtx symbol;
12436 int priority;
12438 if (TARGET_ELF)
12439 default_named_section_asm_out_constrctor (symbol, priority);
12440 else
12441 ix86_svr3_asm_out_constructor (symbol, priority);
12443 #endif