* target.h (asm_out.byte_op, asm_out.aligned_op, asm_out.unaligned_op,
[official-gcc.git] / gcc / config / i386 / i386.c
blob0c60bdafd8d991a66b585bbf951115d23cfd7a57
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001
3 Free Software Foundation, Inc.
5 This file is part of GNU CC.
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
22 #include "config.h"
23 #include "system.h"
24 #include "rtl.h"
25 #include "tree.h"
26 #include "tm_p.h"
27 #include "regs.h"
28 #include "hard-reg-set.h"
29 #include "real.h"
30 #include "insn-config.h"
31 #include "conditions.h"
32 #include "output.h"
33 #include "insn-attr.h"
34 #include "flags.h"
35 #include "except.h"
36 #include "function.h"
37 #include "recog.h"
38 #include "expr.h"
39 #include "optabs.h"
40 #include "toplev.h"
41 #include "basic-block.h"
42 #include "ggc.h"
43 #include "target.h"
44 #include "target-def.h"
46 #ifndef CHECK_STACK_LIMIT
47 #define CHECK_STACK_LIMIT -1
48 #endif
50 /* Processor costs (relative to an add) */
51 static const
52 struct processor_costs size_cost = { /* costs for tunning for size */
53 2, /* cost of an add instruction */
54 3, /* cost of a lea instruction */
55 2, /* variable shift costs */
56 3, /* constant shift costs */
57 3, /* cost of starting a multiply */
58 0, /* cost of multiply per each bit set */
59 3, /* cost of a divide/mod */
60 3, /* cost of movsx */
61 3, /* cost of movzx */
62 0, /* "large" insn */
63 2, /* MOVE_RATIO */
64 2, /* cost for loading QImode using movzbl */
65 {2, 2, 2}, /* cost of loading integer registers
66 in QImode, HImode and SImode.
67 Relative to reg-reg move (2). */
68 {2, 2, 2}, /* cost of storing integer registers */
69 2, /* cost of reg,reg fld/fst */
70 {2, 2, 2}, /* cost of loading fp registers
71 in SFmode, DFmode and XFmode */
72 {2, 2, 2}, /* cost of loading integer registers */
73 3, /* cost of moving MMX register */
74 {3, 3}, /* cost of loading MMX registers
75 in SImode and DImode */
76 {3, 3}, /* cost of storing MMX registers
77 in SImode and DImode */
78 3, /* cost of moving SSE register */
79 {3, 3, 3}, /* cost of loading SSE registers
80 in SImode, DImode and TImode */
81 {3, 3, 3}, /* cost of storing SSE registers
82 in SImode, DImode and TImode */
83 3, /* MMX or SSE register to integer */
84 0, /* size of prefetch block */
85 0, /* number of parallel prefetches */
87 /* Processor costs (relative to an add) */
88 static const
89 struct processor_costs i386_cost = { /* 386 specific costs */
90 1, /* cost of an add instruction */
91 1, /* cost of a lea instruction */
92 3, /* variable shift costs */
93 2, /* constant shift costs */
94 6, /* cost of starting a multiply */
95 1, /* cost of multiply per each bit set */
96 23, /* cost of a divide/mod */
97 3, /* cost of movsx */
98 2, /* cost of movzx */
99 15, /* "large" insn */
100 3, /* MOVE_RATIO */
101 4, /* cost for loading QImode using movzbl */
102 {2, 4, 2}, /* cost of loading integer registers
103 in QImode, HImode and SImode.
104 Relative to reg-reg move (2). */
105 {2, 4, 2}, /* cost of storing integer registers */
106 2, /* cost of reg,reg fld/fst */
107 {8, 8, 8}, /* cost of loading fp registers
108 in SFmode, DFmode and XFmode */
109 {8, 8, 8}, /* cost of loading integer registers */
110 2, /* cost of moving MMX register */
111 {4, 8}, /* cost of loading MMX registers
112 in SImode and DImode */
113 {4, 8}, /* cost of storing MMX registers
114 in SImode and DImode */
115 2, /* cost of moving SSE register */
116 {4, 8, 16}, /* cost of loading SSE registers
117 in SImode, DImode and TImode */
118 {4, 8, 16}, /* cost of storing SSE registers
119 in SImode, DImode and TImode */
120 3, /* MMX or SSE register to integer */
121 0, /* size of prefetch block */
122 0, /* number of parallel prefetches */
125 static const
126 struct processor_costs i486_cost = { /* 486 specific costs */
127 1, /* cost of an add instruction */
128 1, /* cost of a lea instruction */
129 3, /* variable shift costs */
130 2, /* constant shift costs */
131 12, /* cost of starting a multiply */
132 1, /* cost of multiply per each bit set */
133 40, /* cost of a divide/mod */
134 3, /* cost of movsx */
135 2, /* cost of movzx */
136 15, /* "large" insn */
137 3, /* MOVE_RATIO */
138 4, /* cost for loading QImode using movzbl */
139 {2, 4, 2}, /* cost of loading integer registers
140 in QImode, HImode and SImode.
141 Relative to reg-reg move (2). */
142 {2, 4, 2}, /* cost of storing integer registers */
143 2, /* cost of reg,reg fld/fst */
144 {8, 8, 8}, /* cost of loading fp registers
145 in SFmode, DFmode and XFmode */
146 {8, 8, 8}, /* cost of loading integer registers */
147 2, /* cost of moving MMX register */
148 {4, 8}, /* cost of loading MMX registers
149 in SImode and DImode */
150 {4, 8}, /* cost of storing MMX registers
151 in SImode and DImode */
152 2, /* cost of moving SSE register */
153 {4, 8, 16}, /* cost of loading SSE registers
154 in SImode, DImode and TImode */
155 {4, 8, 16}, /* cost of storing SSE registers
156 in SImode, DImode and TImode */
157 3, /* MMX or SSE register to integer */
158 0, /* size of prefetch block */
159 0, /* number of parallel prefetches */
162 static const
163 struct processor_costs pentium_cost = {
164 1, /* cost of an add instruction */
165 1, /* cost of a lea instruction */
166 4, /* variable shift costs */
167 1, /* constant shift costs */
168 11, /* cost of starting a multiply */
169 0, /* cost of multiply per each bit set */
170 25, /* cost of a divide/mod */
171 3, /* cost of movsx */
172 2, /* cost of movzx */
173 8, /* "large" insn */
174 6, /* MOVE_RATIO */
175 6, /* cost for loading QImode using movzbl */
176 {2, 4, 2}, /* cost of loading integer registers
177 in QImode, HImode and SImode.
178 Relative to reg-reg move (2). */
179 {2, 4, 2}, /* cost of storing integer registers */
180 2, /* cost of reg,reg fld/fst */
181 {2, 2, 6}, /* cost of loading fp registers
182 in SFmode, DFmode and XFmode */
183 {4, 4, 6}, /* cost of loading integer registers */
184 8, /* cost of moving MMX register */
185 {8, 8}, /* cost of loading MMX registers
186 in SImode and DImode */
187 {8, 8}, /* cost of storing MMX registers
188 in SImode and DImode */
189 2, /* cost of moving SSE register */
190 {4, 8, 16}, /* cost of loading SSE registers
191 in SImode, DImode and TImode */
192 {4, 8, 16}, /* cost of storing SSE registers
193 in SImode, DImode and TImode */
194 3, /* MMX or SSE register to integer */
195 0, /* size of prefetch block */
196 0, /* number of parallel prefetches */
199 static const
200 struct processor_costs pentiumpro_cost = {
201 1, /* cost of an add instruction */
202 1, /* cost of a lea instruction */
203 1, /* variable shift costs */
204 1, /* constant shift costs */
205 4, /* cost of starting a multiply */
206 0, /* cost of multiply per each bit set */
207 17, /* cost of a divide/mod */
208 1, /* cost of movsx */
209 1, /* cost of movzx */
210 8, /* "large" insn */
211 6, /* MOVE_RATIO */
212 2, /* cost for loading QImode using movzbl */
213 {4, 4, 4}, /* cost of loading integer registers
214 in QImode, HImode and SImode.
215 Relative to reg-reg move (2). */
216 {2, 2, 2}, /* cost of storing integer registers */
217 2, /* cost of reg,reg fld/fst */
218 {2, 2, 6}, /* cost of loading fp registers
219 in SFmode, DFmode and XFmode */
220 {4, 4, 6}, /* cost of loading integer registers */
221 2, /* cost of moving MMX register */
222 {2, 2}, /* cost of loading MMX registers
223 in SImode and DImode */
224 {2, 2}, /* cost of storing MMX registers
225 in SImode and DImode */
226 2, /* cost of moving SSE register */
227 {2, 2, 8}, /* cost of loading SSE registers
228 in SImode, DImode and TImode */
229 {2, 2, 8}, /* cost of storing SSE registers
230 in SImode, DImode and TImode */
231 3, /* MMX or SSE register to integer */
232 32, /* size of prefetch block */
233 6, /* number of parallel prefetches */
236 static const
237 struct processor_costs k6_cost = {
238 1, /* cost of an add instruction */
239 2, /* cost of a lea instruction */
240 1, /* variable shift costs */
241 1, /* constant shift costs */
242 3, /* cost of starting a multiply */
243 0, /* cost of multiply per each bit set */
244 18, /* cost of a divide/mod */
245 2, /* cost of movsx */
246 2, /* cost of movzx */
247 8, /* "large" insn */
248 4, /* MOVE_RATIO */
249 3, /* cost for loading QImode using movzbl */
250 {4, 5, 4}, /* cost of loading integer registers
251 in QImode, HImode and SImode.
252 Relative to reg-reg move (2). */
253 {2, 3, 2}, /* cost of storing integer registers */
254 4, /* cost of reg,reg fld/fst */
255 {6, 6, 6}, /* cost of loading fp registers
256 in SFmode, DFmode and XFmode */
257 {4, 4, 4}, /* cost of loading integer registers */
258 2, /* cost of moving MMX register */
259 {2, 2}, /* cost of loading MMX registers
260 in SImode and DImode */
261 {2, 2}, /* cost of storing MMX registers
262 in SImode and DImode */
263 2, /* cost of moving SSE register */
264 {2, 2, 8}, /* cost of loading SSE registers
265 in SImode, DImode and TImode */
266 {2, 2, 8}, /* cost of storing SSE registers
267 in SImode, DImode and TImode */
268 6, /* MMX or SSE register to integer */
269 32, /* size of prefetch block */
270 1, /* number of parallel prefetches */
273 static const
274 struct processor_costs athlon_cost = {
275 1, /* cost of an add instruction */
276 2, /* cost of a lea instruction */
277 1, /* variable shift costs */
278 1, /* constant shift costs */
279 5, /* cost of starting a multiply */
280 0, /* cost of multiply per each bit set */
281 42, /* cost of a divide/mod */
282 1, /* cost of movsx */
283 1, /* cost of movzx */
284 8, /* "large" insn */
285 9, /* MOVE_RATIO */
286 4, /* cost for loading QImode using movzbl */
287 {4, 5, 4}, /* cost of loading integer registers
288 in QImode, HImode and SImode.
289 Relative to reg-reg move (2). */
290 {2, 3, 2}, /* cost of storing integer registers */
291 4, /* cost of reg,reg fld/fst */
292 {6, 6, 20}, /* cost of loading fp registers
293 in SFmode, DFmode and XFmode */
294 {4, 4, 16}, /* cost of loading integer registers */
295 2, /* cost of moving MMX register */
296 {2, 2}, /* cost of loading MMX registers
297 in SImode and DImode */
298 {2, 2}, /* cost of storing MMX registers
299 in SImode and DImode */
300 2, /* cost of moving SSE register */
301 {2, 2, 8}, /* cost of loading SSE registers
302 in SImode, DImode and TImode */
303 {2, 2, 8}, /* cost of storing SSE registers
304 in SImode, DImode and TImode */
305 6, /* MMX or SSE register to integer */
306 64, /* size of prefetch block */
307 6, /* number of parallel prefetches */
310 static const
311 struct processor_costs pentium4_cost = {
312 1, /* cost of an add instruction */
313 1, /* cost of a lea instruction */
314 8, /* variable shift costs */
315 8, /* constant shift costs */
316 30, /* cost of starting a multiply */
317 0, /* cost of multiply per each bit set */
318 112, /* cost of a divide/mod */
319 1, /* cost of movsx */
320 1, /* cost of movzx */
321 16, /* "large" insn */
322 6, /* MOVE_RATIO */
323 2, /* cost for loading QImode using movzbl */
324 {4, 5, 4}, /* cost of loading integer registers
325 in QImode, HImode and SImode.
326 Relative to reg-reg move (2). */
327 {2, 3, 2}, /* cost of storing integer registers */
328 2, /* cost of reg,reg fld/fst */
329 {2, 2, 6}, /* cost of loading fp registers
330 in SFmode, DFmode and XFmode */
331 {4, 4, 6}, /* cost of loading integer registers */
332 2, /* cost of moving MMX register */
333 {2, 2}, /* cost of loading MMX registers
334 in SImode and DImode */
335 {2, 2}, /* cost of storing MMX registers
336 in SImode and DImode */
337 12, /* cost of moving SSE register */
338 {12, 12, 12}, /* cost of loading SSE registers
339 in SImode, DImode and TImode */
340 {2, 2, 8}, /* cost of storing SSE registers
341 in SImode, DImode and TImode */
342 10, /* MMX or SSE register to integer */
343 64, /* size of prefetch block */
344 6, /* number of parallel prefetches */
347 const struct processor_costs *ix86_cost = &pentium_cost;
349 /* Processor feature/optimization bitmasks. */
350 #define m_386 (1<<PROCESSOR_I386)
351 #define m_486 (1<<PROCESSOR_I486)
352 #define m_PENT (1<<PROCESSOR_PENTIUM)
353 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
354 #define m_K6 (1<<PROCESSOR_K6)
355 #define m_ATHLON (1<<PROCESSOR_ATHLON)
356 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
358 const int x86_use_leave = m_386 | m_K6 | m_ATHLON;
359 const int x86_push_memory = m_386 | m_K6 | m_ATHLON | m_PENT4;
360 const int x86_zero_extend_with_and = m_486 | m_PENT;
361 const int x86_movx = m_ATHLON | m_PPRO | m_PENT4 /* m_386 | m_K6 */;
362 const int x86_double_with_add = ~m_386;
363 const int x86_use_bit_test = m_386;
364 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON | m_K6;
365 const int x86_cmove = m_PPRO | m_ATHLON | m_PENT4;
366 const int x86_3dnow_a = m_ATHLON;
367 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON | m_PENT4;
368 const int x86_branch_hints = m_PENT4;
369 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4;
370 const int x86_partial_reg_stall = m_PPRO;
371 const int x86_use_loop = m_K6;
372 const int x86_use_fiop = ~(m_PPRO | m_ATHLON | m_PENT);
373 const int x86_use_mov0 = m_K6;
374 const int x86_use_cltd = ~(m_PENT | m_K6);
375 const int x86_read_modify_write = ~m_PENT;
376 const int x86_read_modify = ~(m_PENT | m_PPRO);
377 const int x86_split_long_moves = m_PPRO;
378 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486;
379 const int x86_single_stringop = m_386 | m_PENT4;
380 const int x86_qimode_math = ~(0);
381 const int x86_promote_qi_regs = 0;
382 const int x86_himode_math = ~(m_PPRO);
383 const int x86_promote_hi_regs = m_PPRO;
384 const int x86_sub_esp_4 = m_ATHLON | m_PPRO | m_PENT4;
385 const int x86_sub_esp_8 = m_ATHLON | m_PPRO | m_386 | m_486 | m_PENT4;
386 const int x86_add_esp_4 = m_ATHLON | m_K6 | m_PENT4;
387 const int x86_add_esp_8 = m_ATHLON | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4;
388 const int x86_integer_DFmode_moves = ~(m_ATHLON | m_PENT4);
389 const int x86_partial_reg_dependency = m_ATHLON | m_PENT4;
390 const int x86_memory_mismatch_stall = m_ATHLON | m_PENT4;
391 const int x86_accumulate_outgoing_args = m_ATHLON | m_PENT4 | m_PPRO;
392 const int x86_prologue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
393 const int x86_epilogue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
394 const int x86_decompose_lea = m_PENT4;
396 /* In case the avreage insn count for single function invocation is
397 lower than this constant, emit fast (but longer) prologue and
398 epilogue code. */
399 #define FAST_PROLOGUE_INSN_COUNT 30
400 /* Set by prologue expander and used by epilogue expander to determine
401 the style used. */
402 static int use_fast_prologue_epilogue;
404 #define AT_BP(mode) (gen_rtx_MEM ((mode), hard_frame_pointer_rtx))
406 static const char *const hi_reg_name[] = HI_REGISTER_NAMES; /* names for 16 bit regs */
407 static const char *const qi_reg_name[] = QI_REGISTER_NAMES; /* names for 8 bit regs (low) */
408 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES; /* names for 8 bit regs (high) */
410 /* Array of the smallest class containing reg number REGNO, indexed by
411 REGNO. Used by REGNO_REG_CLASS in i386.h. */
413 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
415 /* ax, dx, cx, bx */
416 AREG, DREG, CREG, BREG,
417 /* si, di, bp, sp */
418 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
419 /* FP registers */
420 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
421 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
422 /* arg pointer */
423 NON_Q_REGS,
424 /* flags, fpsr, dirflag, frame */
425 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
426 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
427 SSE_REGS, SSE_REGS,
428 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
429 MMX_REGS, MMX_REGS,
430 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
431 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
432 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
433 SSE_REGS, SSE_REGS,
436 /* The "default" register map used in 32bit mode. */
438 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
440 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
441 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
442 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
443 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
444 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
445 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
446 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
449 static int x86_64_int_parameter_registers[6] = {5 /*RDI*/, 4 /*RSI*/,
450 1 /*RDX*/, 2 /*RCX*/,
451 FIRST_REX_INT_REG /*R8 */,
452 FIRST_REX_INT_REG + 1 /*R9 */};
453 static int x86_64_int_return_registers[4] = {0 /*RAX*/, 1 /*RDI*/, 5, 4};
455 /* The "default" register map used in 64bit mode. */
456 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
458 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
459 33, 34, 35, 36, 37, 38, 39, 40 /* fp regs */
460 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
461 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
462 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
463 8,9,10,11,12,13,14,15, /* extended integer registers */
464 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
467 /* Define the register numbers to be used in Dwarf debugging information.
468 The SVR4 reference port C compiler uses the following register numbers
469 in its Dwarf output code:
470 0 for %eax (gcc regno = 0)
471 1 for %ecx (gcc regno = 2)
472 2 for %edx (gcc regno = 1)
473 3 for %ebx (gcc regno = 3)
474 4 for %esp (gcc regno = 7)
475 5 for %ebp (gcc regno = 6)
476 6 for %esi (gcc regno = 4)
477 7 for %edi (gcc regno = 5)
478 The following three DWARF register numbers are never generated by
479 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
480 believes these numbers have these meanings.
481 8 for %eip (no gcc equivalent)
482 9 for %eflags (gcc regno = 17)
483 10 for %trapno (no gcc equivalent)
484 It is not at all clear how we should number the FP stack registers
485 for the x86 architecture. If the version of SDB on x86/svr4 were
486 a bit less brain dead with respect to floating-point then we would
487 have a precedent to follow with respect to DWARF register numbers
488 for x86 FP registers, but the SDB on x86/svr4 is so completely
489 broken with respect to FP registers that it is hardly worth thinking
490 of it as something to strive for compatibility with.
491 The version of x86/svr4 SDB I have at the moment does (partially)
492 seem to believe that DWARF register number 11 is associated with
493 the x86 register %st(0), but that's about all. Higher DWARF
494 register numbers don't seem to be associated with anything in
495 particular, and even for DWARF regno 11, SDB only seems to under-
496 stand that it should say that a variable lives in %st(0) (when
497 asked via an `=' command) if we said it was in DWARF regno 11,
498 but SDB still prints garbage when asked for the value of the
499 variable in question (via a `/' command).
500 (Also note that the labels SDB prints for various FP stack regs
501 when doing an `x' command are all wrong.)
502 Note that these problems generally don't affect the native SVR4
503 C compiler because it doesn't allow the use of -O with -g and
504 because when it is *not* optimizing, it allocates a memory
505 location for each floating-point variable, and the memory
506 location is what gets described in the DWARF AT_location
507 attribute for the variable in question.
508 Regardless of the severe mental illness of the x86/svr4 SDB, we
509 do something sensible here and we use the following DWARF
510 register numbers. Note that these are all stack-top-relative
511 numbers.
512 11 for %st(0) (gcc regno = 8)
513 12 for %st(1) (gcc regno = 9)
514 13 for %st(2) (gcc regno = 10)
515 14 for %st(3) (gcc regno = 11)
516 15 for %st(4) (gcc regno = 12)
517 16 for %st(5) (gcc regno = 13)
518 17 for %st(6) (gcc regno = 14)
519 18 for %st(7) (gcc regno = 15)
521 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
523 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
524 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
525 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
526 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
527 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
528 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded integer registers */
529 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded SSE registers */
532 /* Test and compare insns in i386.md store the information needed to
533 generate branch and scc insns here. */
535 struct rtx_def *ix86_compare_op0 = NULL_RTX;
536 struct rtx_def *ix86_compare_op1 = NULL_RTX;
538 #define MAX_386_STACK_LOCALS 3
539 /* Size of the register save area. */
540 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
542 /* Define the structure for the machine field in struct function. */
543 struct machine_function
545 rtx stack_locals[(int) MAX_MACHINE_MODE][MAX_386_STACK_LOCALS];
546 int save_varrargs_registers;
547 int accesses_prev_frame;
550 #define ix86_stack_locals (cfun->machine->stack_locals)
551 #define ix86_save_varrargs_registers (cfun->machine->save_varrargs_registers)
553 /* Structure describing stack frame layout.
554 Stack grows downward:
556 [arguments]
557 <- ARG_POINTER
558 saved pc
560 saved frame pointer if frame_pointer_needed
561 <- HARD_FRAME_POINTER
562 [saved regs]
564 [padding1] \
566 [va_arg registers] (
567 > to_allocate <- FRAME_POINTER
568 [frame] (
570 [padding2] /
572 struct ix86_frame
574 int nregs;
575 int padding1;
576 int va_arg_size;
577 HOST_WIDE_INT frame;
578 int padding2;
579 int outgoing_arguments_size;
580 int red_zone_size;
582 HOST_WIDE_INT to_allocate;
583 /* The offsets relative to ARG_POINTER. */
584 HOST_WIDE_INT frame_pointer_offset;
585 HOST_WIDE_INT hard_frame_pointer_offset;
586 HOST_WIDE_INT stack_pointer_offset;
589 /* Code model option as passed by user. */
590 const char *ix86_cmodel_string;
591 /* Parsed value. */
592 enum cmodel ix86_cmodel;
594 /* which cpu are we scheduling for */
595 enum processor_type ix86_cpu;
597 /* which unit we are generating floating point math for */
598 enum fpmath_unit ix86_fpmath;
600 /* which instruction set architecture to use. */
601 int ix86_arch;
603 /* Strings to hold which cpu and instruction set architecture to use. */
604 const char *ix86_cpu_string; /* for -mcpu=<xxx> */
605 const char *ix86_arch_string; /* for -march=<xxx> */
606 const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */
608 /* # of registers to use to pass arguments. */
609 const char *ix86_regparm_string;
611 /* true if sse prefetch instruction is not NOOP. */
612 int x86_prefetch_sse;
614 /* ix86_regparm_string as a number */
615 int ix86_regparm;
617 /* Alignment to use for loops and jumps: */
619 /* Power of two alignment for loops. */
620 const char *ix86_align_loops_string;
622 /* Power of two alignment for non-loop jumps. */
623 const char *ix86_align_jumps_string;
625 /* Power of two alignment for stack boundary in bytes. */
626 const char *ix86_preferred_stack_boundary_string;
628 /* Preferred alignment for stack boundary in bits. */
629 int ix86_preferred_stack_boundary;
631 /* Values 1-5: see jump.c */
632 int ix86_branch_cost;
633 const char *ix86_branch_cost_string;
635 /* Power of two alignment for functions. */
636 const char *ix86_align_funcs_string;
638 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
639 static char internal_label_prefix[16];
640 static int internal_label_prefix_len;
642 static int local_symbolic_operand PARAMS ((rtx, enum machine_mode));
643 static void output_pic_addr_const PARAMS ((FILE *, rtx, int));
644 static void put_condition_code PARAMS ((enum rtx_code, enum machine_mode,
645 int, int, FILE *));
646 static rtx ix86_expand_int_compare PARAMS ((enum rtx_code, rtx, rtx));
647 static enum rtx_code ix86_prepare_fp_compare_args PARAMS ((enum rtx_code,
648 rtx *, rtx *));
649 static rtx gen_push PARAMS ((rtx));
650 static int memory_address_length PARAMS ((rtx addr));
651 static int ix86_flags_dependant PARAMS ((rtx, rtx, enum attr_type));
652 static int ix86_agi_dependant PARAMS ((rtx, rtx, enum attr_type));
653 static int ix86_safe_length PARAMS ((rtx));
654 static enum attr_memory ix86_safe_memory PARAMS ((rtx));
655 static enum attr_pent_pair ix86_safe_pent_pair PARAMS ((rtx));
656 static enum attr_ppro_uops ix86_safe_ppro_uops PARAMS ((rtx));
657 static void ix86_dump_ppro_packet PARAMS ((FILE *));
658 static void ix86_reorder_insn PARAMS ((rtx *, rtx *));
659 static rtx * ix86_pent_find_pair PARAMS ((rtx *, rtx *, enum attr_pent_pair,
660 rtx));
661 static void ix86_init_machine_status PARAMS ((struct function *));
662 static void ix86_mark_machine_status PARAMS ((struct function *));
663 static void ix86_free_machine_status PARAMS ((struct function *));
664 static int ix86_split_to_parts PARAMS ((rtx, rtx *, enum machine_mode));
665 static int ix86_safe_length_prefix PARAMS ((rtx));
666 static int ix86_nsaved_regs PARAMS((void));
667 static void ix86_emit_save_regs PARAMS((void));
668 static void ix86_emit_save_regs_using_mov PARAMS ((rtx, HOST_WIDE_INT));
669 static void ix86_emit_restore_regs_using_mov PARAMS ((rtx, int, int));
670 static void ix86_set_move_mem_attrs_1 PARAMS ((rtx, rtx, rtx, rtx, rtx));
671 static void ix86_sched_reorder_pentium PARAMS((rtx *, rtx *));
672 static void ix86_sched_reorder_ppro PARAMS((rtx *, rtx *));
673 static HOST_WIDE_INT ix86_GOT_alias_set PARAMS ((void));
674 static void ix86_adjust_counter PARAMS ((rtx, HOST_WIDE_INT));
675 static rtx ix86_expand_aligntest PARAMS ((rtx, int));
676 static void ix86_expand_strlensi_unroll_1 PARAMS ((rtx, rtx));
677 static int ix86_issue_rate PARAMS ((void));
678 static int ix86_adjust_cost PARAMS ((rtx, rtx, rtx, int));
679 static void ix86_sched_init PARAMS ((FILE *, int, int));
680 static int ix86_sched_reorder PARAMS ((FILE *, int, rtx *, int *, int));
681 static int ix86_variable_issue PARAMS ((FILE *, int, rtx, int));
683 struct ix86_address
685 rtx base, index, disp;
686 HOST_WIDE_INT scale;
689 static int ix86_decompose_address PARAMS ((rtx, struct ix86_address *));
691 struct builtin_description;
692 static rtx ix86_expand_sse_comi PARAMS ((const struct builtin_description *,
693 tree, rtx));
694 static rtx ix86_expand_sse_compare PARAMS ((const struct builtin_description *,
695 tree, rtx));
696 static rtx ix86_expand_unop1_builtin PARAMS ((enum insn_code, tree, rtx));
697 static rtx ix86_expand_unop_builtin PARAMS ((enum insn_code, tree, rtx, int));
698 static rtx ix86_expand_binop_builtin PARAMS ((enum insn_code, tree, rtx));
699 static rtx ix86_expand_store_builtin PARAMS ((enum insn_code, tree, int));
700 static rtx safe_vector_operand PARAMS ((rtx, enum machine_mode));
701 static enum rtx_code ix86_fp_compare_code_to_integer PARAMS ((enum rtx_code));
702 static void ix86_fp_comparison_codes PARAMS ((enum rtx_code code,
703 enum rtx_code *,
704 enum rtx_code *,
705 enum rtx_code *));
706 static rtx ix86_expand_fp_compare PARAMS ((enum rtx_code, rtx, rtx, rtx,
707 rtx *, rtx *));
708 static int ix86_fp_comparison_arithmetics_cost PARAMS ((enum rtx_code code));
709 static int ix86_fp_comparison_fcomi_cost PARAMS ((enum rtx_code code));
710 static int ix86_fp_comparison_sahf_cost PARAMS ((enum rtx_code code));
711 static int ix86_fp_comparison_cost PARAMS ((enum rtx_code code));
712 static int ix86_save_reg PARAMS ((int, int));
713 static void ix86_compute_frame_layout PARAMS ((struct ix86_frame *));
714 static int ix86_comp_type_attributes PARAMS ((tree, tree));
715 const struct attribute_spec ix86_attribute_table[];
716 static tree ix86_handle_cdecl_attribute PARAMS ((tree *, tree, tree, int, bool *));
717 static tree ix86_handle_regparm_attribute PARAMS ((tree *, tree, tree, int, bool *));
719 #ifdef DO_GLOBAL_CTORS_BODY
720 static void ix86_svr3_asm_out_constructor PARAMS ((rtx, int));
721 #endif
723 /* Register class used for passing given 64bit part of the argument.
724 These represent classes as documented by the PS ABI, with the exception
725 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
726 use SF or DFmode move instead of DImode to avoid reformating penalties.
728 Similary we play games with INTEGERSI_CLASS to use cheaper SImode moves
729 whenever possible (upper half does contain padding).
731 enum x86_64_reg_class
733 X86_64_NO_CLASS,
734 X86_64_INTEGER_CLASS,
735 X86_64_INTEGERSI_CLASS,
736 X86_64_SSE_CLASS,
737 X86_64_SSESF_CLASS,
738 X86_64_SSEDF_CLASS,
739 X86_64_SSEUP_CLASS,
740 X86_64_X87_CLASS,
741 X86_64_X87UP_CLASS,
742 X86_64_MEMORY_CLASS
744 const char * const x86_64_reg_class_name[] =
745 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
747 #define MAX_CLASSES 4
748 static int classify_argument PARAMS ((enum machine_mode, tree,
749 enum x86_64_reg_class [MAX_CLASSES],
750 int));
751 static int examine_argument PARAMS ((enum machine_mode, tree, int, int *,
752 int *));
753 static rtx construct_container PARAMS ((enum machine_mode, tree, int, int, int,
754 int *, int));
755 static enum x86_64_reg_class merge_classes PARAMS ((enum x86_64_reg_class,
756 enum x86_64_reg_class));
758 /* Initialize the GCC target structure. */
759 #undef TARGET_ATTRIBUTE_TABLE
760 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
761 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
762 # undef TARGET_MERGE_DECL_ATTRIBUTES
763 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
764 #endif
766 #undef TARGET_COMP_TYPE_ATTRIBUTES
767 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
769 #undef TARGET_INIT_BUILTINS
770 #define TARGET_INIT_BUILTINS ix86_init_builtins
772 #undef TARGET_EXPAND_BUILTIN
773 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
775 #if defined (OSF_OS) || defined (TARGET_OSF1ELF)
776 static void ix86_osf_output_function_prologue PARAMS ((FILE *,
777 HOST_WIDE_INT));
778 # undef TARGET_ASM_FUNCTION_PROLOGUE
779 # define TARGET_ASM_FUNCTION_PROLOGUE ix86_osf_output_function_prologue
780 #endif
782 #undef TARGET_ASM_OPEN_PAREN
783 #define TARGET_ASM_OPEN_PAREN ""
784 #undef TARGET_ASM_CLOSE_PAREN
785 #define TARGET_ASM_CLOSE_PAREN ""
787 #undef TARGET_ASM_ALIGNED_HI_OP
788 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
789 #undef TARGET_ASM_ALIGNED_SI_OP
790 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
791 #ifdef ASM_QUAD
792 #undef TARGET_ASM_ALIGNED_DI_OP
793 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
794 #endif
796 #undef TARGET_ASM_UNALIGNED_HI_OP
797 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
798 #undef TARGET_ASM_UNALIGNED_SI_OP
799 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
800 #undef TARGET_ASM_UNALIGNED_DI_OP
801 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
803 #undef TARGET_SCHED_ADJUST_COST
804 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
805 #undef TARGET_SCHED_ISSUE_RATE
806 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
807 #undef TARGET_SCHED_VARIABLE_ISSUE
808 #define TARGET_SCHED_VARIABLE_ISSUE ix86_variable_issue
809 #undef TARGET_SCHED_INIT
810 #define TARGET_SCHED_INIT ix86_sched_init
811 #undef TARGET_SCHED_REORDER
812 #define TARGET_SCHED_REORDER ix86_sched_reorder
814 struct gcc_target targetm = TARGET_INITIALIZER;
816 /* Sometimes certain combinations of command options do not make
817 sense on a particular target machine. You can define a macro
818 `OVERRIDE_OPTIONS' to take account of this. This macro, if
819 defined, is executed once just after all the command options have
820 been parsed.
822 Don't use this macro to turn on various extra optimizations for
823 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
825 void
826 override_options ()
828 int i;
829 /* Comes from final.c -- no real reason to change it. */
830 #define MAX_CODE_ALIGN 16
832 static struct ptt
834 const struct processor_costs *cost; /* Processor costs */
835 const int target_enable; /* Target flags to enable. */
836 const int target_disable; /* Target flags to disable. */
837 const int align_loop; /* Default alignments. */
838 const int align_loop_max_skip;
839 const int align_jump;
840 const int align_jump_max_skip;
841 const int align_func;
842 const int branch_cost;
844 const processor_target_table[PROCESSOR_max] =
846 {&i386_cost, 0, 0, 4, 3, 4, 3, 4, 1},
847 {&i486_cost, 0, 0, 16, 15, 16, 15, 16, 1},
848 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16, 1},
849 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16, 1},
850 {&k6_cost, 0, 0, 32, 7, 32, 7, 32, 1},
851 {&athlon_cost, 0, 0, 16, 7, 64, 7, 16, 1},
852 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0, 1}
855 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
856 static struct pta
858 const char *const name; /* processor name or nickname. */
859 const enum processor_type processor;
860 const enum pta_flags
862 PTA_SSE = 1,
863 PTA_SSE2 = 2,
864 PTA_MMX = 4,
865 PTA_PREFETCH_SSE = 8,
866 PTA_3DNOW = 16,
867 PTA_3DNOW_A = 64
868 } flags;
870 const processor_alias_table[] =
872 {"i386", PROCESSOR_I386, 0},
873 {"i486", PROCESSOR_I486, 0},
874 {"i586", PROCESSOR_PENTIUM, 0},
875 {"pentium", PROCESSOR_PENTIUM, 0},
876 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
877 {"i686", PROCESSOR_PENTIUMPRO, 0},
878 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
879 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
880 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
881 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 |
882 PTA_MMX | PTA_PREFETCH_SSE},
883 {"k6", PROCESSOR_K6, PTA_MMX},
884 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
885 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
886 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
887 | PTA_3DNOW_A},
888 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
889 | PTA_3DNOW | PTA_3DNOW_A},
890 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
891 | PTA_3DNOW_A | PTA_SSE},
892 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
893 | PTA_3DNOW_A | PTA_SSE},
894 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
895 | PTA_3DNOW_A | PTA_SSE},
898 int const pta_size = sizeof (processor_alias_table) / sizeof (struct pta);
900 #ifdef SUBTARGET_OVERRIDE_OPTIONS
901 SUBTARGET_OVERRIDE_OPTIONS;
902 #endif
904 if (!ix86_cpu_string && ix86_arch_string)
905 ix86_cpu_string = ix86_arch_string;
906 if (!ix86_cpu_string)
907 ix86_cpu_string = cpu_names [TARGET_CPU_DEFAULT];
908 if (!ix86_arch_string)
909 ix86_arch_string = TARGET_64BIT ? "athlon-4" : "i386";
911 if (ix86_cmodel_string != 0)
913 if (!strcmp (ix86_cmodel_string, "small"))
914 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
915 else if (flag_pic)
916 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
917 else if (!strcmp (ix86_cmodel_string, "32"))
918 ix86_cmodel = CM_32;
919 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
920 ix86_cmodel = CM_KERNEL;
921 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
922 ix86_cmodel = CM_MEDIUM;
923 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
924 ix86_cmodel = CM_LARGE;
925 else
926 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
928 else
930 ix86_cmodel = CM_32;
931 if (TARGET_64BIT)
932 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
934 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
935 error ("code model `%s' not supported in the %s bit mode",
936 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
937 if (ix86_cmodel == CM_LARGE)
938 sorry ("code model `large' not supported yet");
939 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
940 sorry ("%i-bit mode not compiled in",
941 (target_flags & MASK_64BIT) ? 64 : 32);
943 for (i = 0; i < pta_size; i++)
944 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
946 ix86_arch = processor_alias_table[i].processor;
947 /* Default cpu tuning to the architecture. */
948 ix86_cpu = ix86_arch;
949 if (processor_alias_table[i].flags & PTA_MMX
950 && !(target_flags & MASK_MMX_SET))
951 target_flags |= MASK_MMX;
952 if (processor_alias_table[i].flags & PTA_3DNOW
953 && !(target_flags & MASK_3DNOW_SET))
954 target_flags |= MASK_3DNOW;
955 if (processor_alias_table[i].flags & PTA_3DNOW_A
956 && !(target_flags & MASK_3DNOW_A_SET))
957 target_flags |= MASK_3DNOW_A;
958 if (processor_alias_table[i].flags & PTA_SSE
959 && !(target_flags & MASK_SSE_SET))
960 target_flags |= MASK_SSE;
961 if (processor_alias_table[i].flags & PTA_SSE2
962 && !(target_flags & MASK_SSE2_SET))
963 target_flags |= MASK_SSE2;
964 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
965 x86_prefetch_sse = true;
966 break;
969 if (i == pta_size)
970 error ("bad value (%s) for -march= switch", ix86_arch_string);
972 for (i = 0; i < pta_size; i++)
973 if (! strcmp (ix86_cpu_string, processor_alias_table[i].name))
975 ix86_cpu = processor_alias_table[i].processor;
976 break;
978 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
979 x86_prefetch_sse = true;
980 if (i == pta_size)
981 error ("bad value (%s) for -mcpu= switch", ix86_cpu_string);
983 if (optimize_size)
984 ix86_cost = &size_cost;
985 else
986 ix86_cost = processor_target_table[ix86_cpu].cost;
987 target_flags |= processor_target_table[ix86_cpu].target_enable;
988 target_flags &= ~processor_target_table[ix86_cpu].target_disable;
990 /* Arrange to set up i386_stack_locals for all functions. */
991 init_machine_status = ix86_init_machine_status;
992 mark_machine_status = ix86_mark_machine_status;
993 free_machine_status = ix86_free_machine_status;
995 /* Validate -mregparm= value. */
996 if (ix86_regparm_string)
998 i = atoi (ix86_regparm_string);
999 if (i < 0 || i > REGPARM_MAX)
1000 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1001 else
1002 ix86_regparm = i;
1004 else
1005 if (TARGET_64BIT)
1006 ix86_regparm = REGPARM_MAX;
1008 /* If the user has provided any of the -malign-* options,
1009 warn and use that value only if -falign-* is not set.
1010 Remove this code in GCC 3.2 or later. */
1011 if (ix86_align_loops_string)
1013 warning ("-malign-loops is obsolete, use -falign-loops");
1014 if (align_loops == 0)
1016 i = atoi (ix86_align_loops_string);
1017 if (i < 0 || i > MAX_CODE_ALIGN)
1018 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1019 else
1020 align_loops = 1 << i;
1024 if (ix86_align_jumps_string)
1026 warning ("-malign-jumps is obsolete, use -falign-jumps");
1027 if (align_jumps == 0)
1029 i = atoi (ix86_align_jumps_string);
1030 if (i < 0 || i > MAX_CODE_ALIGN)
1031 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1032 else
1033 align_jumps = 1 << i;
1037 if (ix86_align_funcs_string)
1039 warning ("-malign-functions is obsolete, use -falign-functions");
1040 if (align_functions == 0)
1042 i = atoi (ix86_align_funcs_string);
1043 if (i < 0 || i > MAX_CODE_ALIGN)
1044 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1045 else
1046 align_functions = 1 << i;
1050 /* Default align_* from the processor table. */
1051 #define abs(n) (n < 0 ? -n : n)
1052 if (align_loops == 0)
1054 align_loops = processor_target_table[ix86_cpu].align_loop;
1055 align_loops_max_skip = processor_target_table[ix86_cpu].align_loop_max_skip;
1057 if (align_jumps == 0)
1059 align_jumps = processor_target_table[ix86_cpu].align_jump;
1060 align_jumps_max_skip = processor_target_table[ix86_cpu].align_jump_max_skip;
1062 if (align_functions == 0)
1064 align_functions = processor_target_table[ix86_cpu].align_func;
1067 /* Validate -mpreferred-stack-boundary= value, or provide default.
1068 The default of 128 bits is for Pentium III's SSE __m128, but we
1069 don't want additional code to keep the stack aligned when
1070 optimizing for code size. */
1071 ix86_preferred_stack_boundary = (optimize_size
1072 ? TARGET_64BIT ? 64 : 32
1073 : 128);
1074 if (ix86_preferred_stack_boundary_string)
1076 i = atoi (ix86_preferred_stack_boundary_string);
1077 if (i < (TARGET_64BIT ? 3 : 2) || i > 12)
1078 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1079 TARGET_64BIT ? 3 : 2);
1080 else
1081 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1084 /* Validate -mbranch-cost= value, or provide default. */
1085 ix86_branch_cost = processor_target_table[ix86_cpu].branch_cost;
1086 if (ix86_branch_cost_string)
1088 i = atoi (ix86_branch_cost_string);
1089 if (i < 0 || i > 5)
1090 error ("-mbranch-cost=%d is not between 0 and 5", i);
1091 else
1092 ix86_branch_cost = i;
1095 /* Keep nonleaf frame pointers. */
1096 if (TARGET_OMIT_LEAF_FRAME_POINTER)
1097 flag_omit_frame_pointer = 1;
1099 /* If we're doing fast math, we don't care about comparison order
1100 wrt NaNs. This lets us use a shorter comparison sequence. */
1101 if (flag_unsafe_math_optimizations)
1102 target_flags &= ~MASK_IEEE_FP;
1104 if (TARGET_64BIT)
1106 if (TARGET_ALIGN_DOUBLE)
1107 error ("-malign-double makes no sense in the 64bit mode");
1108 if (TARGET_RTD)
1109 error ("-mrtd calling convention not supported in the 64bit mode");
1110 /* Enable by default the SSE and MMX builtins. */
1111 target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE);
1112 ix86_fpmath = FPMATH_SSE;
1114 else
1115 ix86_fpmath = FPMATH_387;
1117 if (ix86_fpmath_string != 0)
1119 if (! strcmp (ix86_fpmath_string, "387"))
1120 ix86_fpmath = FPMATH_387;
1121 else if (! strcmp (ix86_fpmath_string, "sse"))
1123 if (!TARGET_SSE)
1125 warning ("SSE instruction set disabled, using 387 arithmetics");
1126 ix86_fpmath = FPMATH_387;
1128 else
1129 ix86_fpmath = FPMATH_SSE;
1131 else if (! strcmp (ix86_fpmath_string, "387,sse")
1132 || ! strcmp (ix86_fpmath_string, "sse,387"))
1134 if (!TARGET_SSE)
1136 warning ("SSE instruction set disabled, using 387 arithmetics");
1137 ix86_fpmath = FPMATH_387;
1139 else if (!TARGET_80387)
1141 warning ("387 instruction set disabled, using SSE arithmetics");
1142 ix86_fpmath = FPMATH_SSE;
1144 else
1145 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1147 else
1148 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1151 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1152 on by -msse. */
1153 if (TARGET_SSE)
1154 target_flags |= MASK_MMX;
1156 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1157 if (TARGET_3DNOW)
1159 target_flags |= MASK_MMX;
1160 /* If we are targetting the Athlon architecture, enable the 3Dnow/MMX
1161 extensions it adds. */
1162 if (x86_3dnow_a & (1 << ix86_arch))
1163 target_flags |= MASK_3DNOW_A;
1165 if ((x86_accumulate_outgoing_args & CPUMASK)
1166 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS_SET)
1167 && !optimize_size)
1168 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1170 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1172 char *p;
1173 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1174 p = strchr (internal_label_prefix, 'X');
1175 internal_label_prefix_len = p - internal_label_prefix;
1176 *p = '\0';
1180 void
1181 optimization_options (level, size)
1182 int level;
1183 int size ATTRIBUTE_UNUSED;
1185 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1186 make the problem with not enough registers even worse. */
1187 #ifdef INSN_SCHEDULING
1188 if (level > 1)
1189 flag_schedule_insns = 0;
1190 #endif
1191 if (TARGET_64BIT && optimize >= 1)
1192 flag_omit_frame_pointer = 1;
1193 if (TARGET_64BIT)
1195 flag_pcc_struct_return = 0;
1196 flag_asynchronous_unwind_tables = 1;
1200 /* Table of valid machine attributes. */
1201 const struct attribute_spec ix86_attribute_table[] =
1203 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1204 /* Stdcall attribute says callee is responsible for popping arguments
1205 if they are not variable. */
1206 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1207 /* Cdecl attribute says the callee is a normal C declaration */
1208 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1209 /* Regparm attribute specifies how many integer arguments are to be
1210 passed in registers. */
1211 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
1212 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
1213 { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1214 { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1215 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
1216 #endif
1217 { NULL, 0, 0, false, false, false, NULL }
1220 /* Handle a "cdecl" or "stdcall" attribute;
1221 arguments as in struct attribute_spec.handler. */
1222 static tree
1223 ix86_handle_cdecl_attribute (node, name, args, flags, no_add_attrs)
1224 tree *node;
1225 tree name;
1226 tree args ATTRIBUTE_UNUSED;
1227 int flags ATTRIBUTE_UNUSED;
1228 bool *no_add_attrs;
1230 if (TREE_CODE (*node) != FUNCTION_TYPE
1231 && TREE_CODE (*node) != METHOD_TYPE
1232 && TREE_CODE (*node) != FIELD_DECL
1233 && TREE_CODE (*node) != TYPE_DECL)
1235 warning ("`%s' attribute only applies to functions",
1236 IDENTIFIER_POINTER (name));
1237 *no_add_attrs = true;
1240 if (TARGET_64BIT)
1242 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1243 *no_add_attrs = true;
1246 return NULL_TREE;
1249 /* Handle a "regparm" attribute;
1250 arguments as in struct attribute_spec.handler. */
1251 static tree
1252 ix86_handle_regparm_attribute (node, name, args, flags, no_add_attrs)
1253 tree *node;
1254 tree name;
1255 tree args;
1256 int flags ATTRIBUTE_UNUSED;
1257 bool *no_add_attrs;
1259 if (TREE_CODE (*node) != FUNCTION_TYPE
1260 && TREE_CODE (*node) != METHOD_TYPE
1261 && TREE_CODE (*node) != FIELD_DECL
1262 && TREE_CODE (*node) != TYPE_DECL)
1264 warning ("`%s' attribute only applies to functions",
1265 IDENTIFIER_POINTER (name));
1266 *no_add_attrs = true;
1268 else
1270 tree cst;
1272 cst = TREE_VALUE (args);
1273 if (TREE_CODE (cst) != INTEGER_CST)
1275 warning ("`%s' attribute requires an integer constant argument",
1276 IDENTIFIER_POINTER (name));
1277 *no_add_attrs = true;
1279 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1281 warning ("argument to `%s' attribute larger than %d",
1282 IDENTIFIER_POINTER (name), REGPARM_MAX);
1283 *no_add_attrs = true;
1287 return NULL_TREE;
1290 #if defined (OSF_OS) || defined (TARGET_OSF1ELF)
1292 /* Generate the assembly code for function entry. FILE is a stdio
1293 stream to output the code to. SIZE is an int: how many units of
1294 temporary storage to allocate.
1296 Refer to the array `regs_ever_live' to determine which registers to
1297 save; `regs_ever_live[I]' is nonzero if register number I is ever
1298 used in the function. This function is responsible for knowing
1299 which registers should not be saved even if used.
1301 We override it here to allow for the new profiling code to go before
1302 the prologue and the old mcount code to go after the prologue (and
1303 after %ebx has been set up for ELF shared library support). */
1305 static void
1306 ix86_osf_output_function_prologue (file, size)
1307 FILE *file;
1308 HOST_WIDE_INT size;
1310 const char *prefix = "";
1311 const char *const lprefix = LPREFIX;
1312 int labelno = profile_label_no;
1314 #ifdef OSF_OS
1316 if (TARGET_UNDERSCORES)
1317 prefix = "_";
1319 if (profile_flag && OSF_PROFILE_BEFORE_PROLOGUE)
1321 if (!flag_pic && !HALF_PIC_P ())
1323 fprintf (file, "\tmovl $%sP%d,%%edx\n", lprefix, labelno);
1324 fprintf (file, "\tcall *%s_mcount_ptr\n", prefix);
1327 else if (HALF_PIC_P ())
1329 rtx symref;
1331 HALF_PIC_EXTERNAL ("_mcount_ptr");
1332 symref = HALF_PIC_PTR (gen_rtx_SYMBOL_REF (Pmode,
1333 "_mcount_ptr"));
1335 fprintf (file, "\tmovl $%sP%d,%%edx\n", lprefix, labelno);
1336 fprintf (file, "\tmovl %s%s,%%eax\n", prefix,
1337 XSTR (symref, 0));
1338 fprintf (file, "\tcall *(%%eax)\n");
1341 else
1343 static int call_no = 0;
1345 fprintf (file, "\tcall %sPc%d\n", lprefix, call_no);
1346 fprintf (file, "%sPc%d:\tpopl %%eax\n", lprefix, call_no);
1347 fprintf (file, "\taddl $_GLOBAL_OFFSET_TABLE_+[.-%sPc%d],%%eax\n",
1348 lprefix, call_no++);
1349 fprintf (file, "\tleal %sP%d@GOTOFF(%%eax),%%edx\n",
1350 lprefix, labelno);
1351 fprintf (file, "\tmovl %s_mcount_ptr@GOT(%%eax),%%eax\n",
1352 prefix);
1353 fprintf (file, "\tcall *(%%eax)\n");
1357 #else /* !OSF_OS */
1359 if (profile_flag && OSF_PROFILE_BEFORE_PROLOGUE)
1361 if (!flag_pic)
1363 fprintf (file, "\tmovl $%sP%d,%%edx\n", lprefix, labelno);
1364 fprintf (file, "\tcall *%s_mcount_ptr\n", prefix);
1367 else
1369 static int call_no = 0;
1371 fprintf (file, "\tcall %sPc%d\n", lprefix, call_no);
1372 fprintf (file, "%sPc%d:\tpopl %%eax\n", lprefix, call_no);
1373 fprintf (file, "\taddl $_GLOBAL_OFFSET_TABLE_+[.-%sPc%d],%%eax\n",
1374 lprefix, call_no++);
1375 fprintf (file, "\tleal %sP%d@GOTOFF(%%eax),%%edx\n",
1376 lprefix, labelno);
1377 fprintf (file, "\tmovl %s_mcount_ptr@GOT(%%eax),%%eax\n",
1378 prefix);
1379 fprintf (file, "\tcall *(%%eax)\n");
1382 #endif /* !OSF_OS */
1384 function_prologue (file, size);
1387 #endif /* OSF_OS || TARGET_OSF1ELF */
1389 /* Return 0 if the attributes for two types are incompatible, 1 if they
1390 are compatible, and 2 if they are nearly compatible (which causes a
1391 warning to be generated). */
1393 static int
1394 ix86_comp_type_attributes (type1, type2)
1395 tree type1;
1396 tree type2;
1398 /* Check for mismatch of non-default calling convention. */
1399 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
1401 if (TREE_CODE (type1) != FUNCTION_TYPE)
1402 return 1;
1404 /* Check for mismatched return types (cdecl vs stdcall). */
1405 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1406 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
1407 return 0;
1408 return 1;
1411 /* Value is the number of bytes of arguments automatically
1412 popped when returning from a subroutine call.
1413 FUNDECL is the declaration node of the function (as a tree),
1414 FUNTYPE is the data type of the function (as a tree),
1415 or for a library call it is an identifier node for the subroutine name.
1416 SIZE is the number of bytes of arguments passed on the stack.
1418 On the 80386, the RTD insn may be used to pop them if the number
1419 of args is fixed, but if the number is variable then the caller
1420 must pop them all. RTD can't be used for library calls now
1421 because the library is compiled with the Unix compiler.
1422 Use of RTD is a selectable option, since it is incompatible with
1423 standard Unix calling sequences. If the option is not selected,
1424 the caller must always pop the args.
1426 The attribute stdcall is equivalent to RTD on a per module basis. */
1429 ix86_return_pops_args (fundecl, funtype, size)
1430 tree fundecl;
1431 tree funtype;
1432 int size;
1434 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
1436 /* Cdecl functions override -mrtd, and never pop the stack. */
1437 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
1439 /* Stdcall functions will pop the stack if not variable args. */
1440 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype)))
1441 rtd = 1;
1443 if (rtd
1444 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
1445 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1446 == void_type_node)))
1447 return size;
1450 /* Lose any fake structure return argument. */
1451 if (aggregate_value_p (TREE_TYPE (funtype))
1452 && !TARGET_64BIT)
1453 return GET_MODE_SIZE (Pmode);
1455 return 0;
1458 /* Argument support functions. */
1460 /* Return true when register may be used to pass function parameters. */
1461 bool
1462 ix86_function_arg_regno_p (regno)
1463 int regno;
1465 int i;
1466 if (!TARGET_64BIT)
1467 return regno < REGPARM_MAX || (TARGET_SSE && SSE_REGNO_P (regno));
1468 if (SSE_REGNO_P (regno) && TARGET_SSE)
1469 return true;
1470 /* RAX is used as hidden argument to va_arg functions. */
1471 if (!regno)
1472 return true;
1473 for (i = 0; i < REGPARM_MAX; i++)
1474 if (regno == x86_64_int_parameter_registers[i])
1475 return true;
1476 return false;
1479 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1480 for a call to a function whose data type is FNTYPE.
1481 For a library call, FNTYPE is 0. */
1483 void
1484 init_cumulative_args (cum, fntype, libname)
1485 CUMULATIVE_ARGS *cum; /* Argument info to initialize */
1486 tree fntype; /* tree ptr for function decl */
1487 rtx libname; /* SYMBOL_REF of library name or 0 */
1489 static CUMULATIVE_ARGS zero_cum;
1490 tree param, next_param;
1492 if (TARGET_DEBUG_ARG)
1494 fprintf (stderr, "\ninit_cumulative_args (");
1495 if (fntype)
1496 fprintf (stderr, "fntype code = %s, ret code = %s",
1497 tree_code_name[(int) TREE_CODE (fntype)],
1498 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
1499 else
1500 fprintf (stderr, "no fntype");
1502 if (libname)
1503 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1506 *cum = zero_cum;
1508 /* Set up the number of registers to use for passing arguments. */
1509 cum->nregs = ix86_regparm;
1510 cum->sse_nregs = SSE_REGPARM_MAX;
1511 if (fntype && !TARGET_64BIT)
1513 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype));
1515 if (attr)
1516 cum->nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1518 cum->maybe_vaarg = false;
1520 /* Determine if this function has variable arguments. This is
1521 indicated by the last argument being 'void_type_mode' if there
1522 are no variable arguments. If there are variable arguments, then
1523 we won't pass anything in registers */
1525 if (cum->nregs)
1527 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
1528 param != 0; param = next_param)
1530 next_param = TREE_CHAIN (param);
1531 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
1533 if (!TARGET_64BIT)
1534 cum->nregs = 0;
1535 cum->maybe_vaarg = true;
1539 if ((!fntype && !libname)
1540 || (fntype && !TYPE_ARG_TYPES (fntype)))
1541 cum->maybe_vaarg = 1;
1543 if (TARGET_DEBUG_ARG)
1544 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
1546 return;
1549 /* x86-64 register passing impleemntation. See x86-64 ABI for details. Goal
1550 of this code is to classify each 8bytes of incoming argument by the register
1551 class and assign registers accordingly. */
1553 /* Return the union class of CLASS1 and CLASS2.
1554 See the x86-64 PS ABI for details. */
1556 static enum x86_64_reg_class
1557 merge_classes (class1, class2)
1558 enum x86_64_reg_class class1, class2;
1560 /* Rule #1: If both classes are equal, this is the resulting class. */
1561 if (class1 == class2)
1562 return class1;
1564 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
1565 the other class. */
1566 if (class1 == X86_64_NO_CLASS)
1567 return class2;
1568 if (class2 == X86_64_NO_CLASS)
1569 return class1;
1571 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
1572 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
1573 return X86_64_MEMORY_CLASS;
1575 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
1576 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
1577 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
1578 return X86_64_INTEGERSI_CLASS;
1579 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
1580 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
1581 return X86_64_INTEGER_CLASS;
1583 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
1584 if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS
1585 || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS)
1586 return X86_64_MEMORY_CLASS;
1588 /* Rule #6: Otherwise class SSE is used. */
1589 return X86_64_SSE_CLASS;
1592 /* Classify the argument of type TYPE and mode MODE.
1593 CLASSES will be filled by the register class used to pass each word
1594 of the operand. The number of words is returned. In case the parameter
1595 should be passed in memory, 0 is returned. As a special case for zero
1596 sized containers, classes[0] will be NO_CLASS and 1 is returned.
1598 BIT_OFFSET is used internally for handling records and specifies offset
1599 of the offset in bits modulo 256 to avoid overflow cases.
1601 See the x86-64 PS ABI for details.
1604 static int
1605 classify_argument (mode, type, classes, bit_offset)
1606 enum machine_mode mode;
1607 tree type;
1608 enum x86_64_reg_class classes[MAX_CLASSES];
1609 int bit_offset;
1611 int bytes =
1612 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1613 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1615 if (type && AGGREGATE_TYPE_P (type))
1617 int i;
1618 tree field;
1619 enum x86_64_reg_class subclasses[MAX_CLASSES];
1621 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
1622 if (bytes > 16)
1623 return 0;
1625 for (i = 0; i < words; i++)
1626 classes[i] = X86_64_NO_CLASS;
1628 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
1629 signalize memory class, so handle it as special case. */
1630 if (!words)
1632 classes[0] = X86_64_NO_CLASS;
1633 return 1;
1636 /* Classify each field of record and merge classes. */
1637 if (TREE_CODE (type) == RECORD_TYPE)
1639 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1641 if (TREE_CODE (field) == FIELD_DECL)
1643 int num;
1645 /* Bitfields are always classified as integer. Handle them
1646 early, since later code would consider them to be
1647 misaligned integers. */
1648 if (DECL_BIT_FIELD (field))
1650 for (i = int_bit_position (field) / 8 / 8;
1651 i < (int_bit_position (field)
1652 + tree_low_cst (DECL_SIZE (field), 0)
1653 + 63) / 8 / 8; i++)
1654 classes[i] =
1655 merge_classes (X86_64_INTEGER_CLASS,
1656 classes[i]);
1658 else
1660 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
1661 TREE_TYPE (field), subclasses,
1662 (int_bit_position (field)
1663 + bit_offset) % 256);
1664 if (!num)
1665 return 0;
1666 for (i = 0; i < num; i++)
1668 int pos =
1669 (int_bit_position (field) + bit_offset) / 8 / 8;
1670 classes[i + pos] =
1671 merge_classes (subclasses[i], classes[i + pos]);
1677 /* Arrays are handled as small records. */
1678 else if (TREE_CODE (type) == ARRAY_TYPE)
1680 int num;
1681 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
1682 TREE_TYPE (type), subclasses, bit_offset);
1683 if (!num)
1684 return 0;
1686 /* The partial classes are now full classes. */
1687 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
1688 subclasses[0] = X86_64_SSE_CLASS;
1689 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
1690 subclasses[0] = X86_64_INTEGER_CLASS;
1692 for (i = 0; i < words; i++)
1693 classes[i] = subclasses[i % num];
1695 /* Unions are similar to RECORD_TYPE but offset is always 0. */
1696 else if (TREE_CODE (type) == UNION_TYPE)
1698 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1700 if (TREE_CODE (field) == FIELD_DECL)
1702 int num;
1703 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
1704 TREE_TYPE (field), subclasses,
1705 bit_offset);
1706 if (!num)
1707 return 0;
1708 for (i = 0; i < num; i++)
1709 classes[i] = merge_classes (subclasses[i], classes[i]);
1713 else
1714 abort ();
1716 /* Final merger cleanup. */
1717 for (i = 0; i < words; i++)
1719 /* If one class is MEMORY, everything should be passed in
1720 memory. */
1721 if (classes[i] == X86_64_MEMORY_CLASS)
1722 return 0;
1724 /* The X86_64_SSEUP_CLASS should be always preceded by
1725 X86_64_SSE_CLASS. */
1726 if (classes[i] == X86_64_SSEUP_CLASS
1727 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
1728 classes[i] = X86_64_SSE_CLASS;
1730 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
1731 if (classes[i] == X86_64_X87UP_CLASS
1732 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
1733 classes[i] = X86_64_SSE_CLASS;
1735 return words;
1738 /* Compute alignment needed. We align all types to natural boundaries with
1739 exception of XFmode that is aligned to 64bits. */
1740 if (mode != VOIDmode && mode != BLKmode)
1742 int mode_alignment = GET_MODE_BITSIZE (mode);
1744 if (mode == XFmode)
1745 mode_alignment = 128;
1746 else if (mode == XCmode)
1747 mode_alignment = 256;
1748 /* Misaligned fields are always returned in memory. */
1749 if (bit_offset % mode_alignment)
1750 return 0;
1753 /* Classification of atomic types. */
1754 switch (mode)
1756 case DImode:
1757 case SImode:
1758 case HImode:
1759 case QImode:
1760 case CSImode:
1761 case CHImode:
1762 case CQImode:
1763 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
1764 classes[0] = X86_64_INTEGERSI_CLASS;
1765 else
1766 classes[0] = X86_64_INTEGER_CLASS;
1767 return 1;
1768 case CDImode:
1769 case TImode:
1770 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
1771 return 2;
1772 case CTImode:
1773 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
1774 classes[2] = classes[3] = X86_64_INTEGER_CLASS;
1775 return 4;
1776 case SFmode:
1777 if (!(bit_offset % 64))
1778 classes[0] = X86_64_SSESF_CLASS;
1779 else
1780 classes[0] = X86_64_SSE_CLASS;
1781 return 1;
1782 case DFmode:
1783 classes[0] = X86_64_SSEDF_CLASS;
1784 return 1;
1785 case TFmode:
1786 classes[0] = X86_64_X87_CLASS;
1787 classes[1] = X86_64_X87UP_CLASS;
1788 return 2;
1789 case TCmode:
1790 classes[0] = X86_64_X87_CLASS;
1791 classes[1] = X86_64_X87UP_CLASS;
1792 classes[2] = X86_64_X87_CLASS;
1793 classes[3] = X86_64_X87UP_CLASS;
1794 return 4;
1795 case DCmode:
1796 classes[0] = X86_64_SSEDF_CLASS;
1797 classes[1] = X86_64_SSEDF_CLASS;
1798 return 2;
1799 case SCmode:
1800 classes[0] = X86_64_SSE_CLASS;
1801 return 1;
1802 case BLKmode:
1803 return 0;
1804 default:
1805 abort ();
1809 /* Examine the argument and return set number of register required in each
1810 class. Return 0 iff parameter should be passed in memory. */
1811 static int
1812 examine_argument (mode, type, in_return, int_nregs, sse_nregs)
1813 enum machine_mode mode;
1814 tree type;
1815 int *int_nregs, *sse_nregs;
1816 int in_return;
1818 enum x86_64_reg_class class[MAX_CLASSES];
1819 int n = classify_argument (mode, type, class, 0);
1821 *int_nregs = 0;
1822 *sse_nregs = 0;
1823 if (!n)
1824 return 0;
1825 for (n--; n >= 0; n--)
1826 switch (class[n])
1828 case X86_64_INTEGER_CLASS:
1829 case X86_64_INTEGERSI_CLASS:
1830 (*int_nregs)++;
1831 break;
1832 case X86_64_SSE_CLASS:
1833 case X86_64_SSESF_CLASS:
1834 case X86_64_SSEDF_CLASS:
1835 (*sse_nregs)++;
1836 break;
1837 case X86_64_NO_CLASS:
1838 case X86_64_SSEUP_CLASS:
1839 break;
1840 case X86_64_X87_CLASS:
1841 case X86_64_X87UP_CLASS:
1842 if (!in_return)
1843 return 0;
1844 break;
1845 case X86_64_MEMORY_CLASS:
1846 abort ();
1848 return 1;
1850 /* Construct container for the argument used by GCC interface. See
1851 FUNCTION_ARG for the detailed description. */
1852 static rtx
1853 construct_container (mode, type, in_return, nintregs, nsseregs, intreg, sse_regno)
1854 enum machine_mode mode;
1855 tree type;
1856 int in_return;
1857 int nintregs, nsseregs;
1858 int *intreg, sse_regno;
1860 enum machine_mode tmpmode;
1861 int bytes =
1862 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1863 enum x86_64_reg_class class[MAX_CLASSES];
1864 int n;
1865 int i;
1866 int nexps = 0;
1867 int needed_sseregs, needed_intregs;
1868 rtx exp[MAX_CLASSES];
1869 rtx ret;
1871 n = classify_argument (mode, type, class, 0);
1872 if (TARGET_DEBUG_ARG)
1874 if (!n)
1875 fprintf (stderr, "Memory class\n");
1876 else
1878 fprintf (stderr, "Classes:");
1879 for (i = 0; i < n; i++)
1881 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
1883 fprintf (stderr, "\n");
1886 if (!n)
1887 return NULL;
1888 if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs))
1889 return NULL;
1890 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
1891 return NULL;
1893 /* First construct simple cases. Avoid SCmode, since we want to use
1894 single register to pass this type. */
1895 if (n == 1 && mode != SCmode)
1896 switch (class[0])
1898 case X86_64_INTEGER_CLASS:
1899 case X86_64_INTEGERSI_CLASS:
1900 return gen_rtx_REG (mode, intreg[0]);
1901 case X86_64_SSE_CLASS:
1902 case X86_64_SSESF_CLASS:
1903 case X86_64_SSEDF_CLASS:
1904 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
1905 case X86_64_X87_CLASS:
1906 return gen_rtx_REG (mode, FIRST_STACK_REG);
1907 case X86_64_NO_CLASS:
1908 /* Zero sized array, struct or class. */
1909 return NULL;
1910 default:
1911 abort ();
1913 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS)
1914 return gen_rtx_REG (TImode, SSE_REGNO (sse_regno));
1915 if (n == 2
1916 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
1917 return gen_rtx_REG (TFmode, FIRST_STACK_REG);
1918 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
1919 && class[1] == X86_64_INTEGER_CLASS
1920 && (mode == CDImode || mode == TImode)
1921 && intreg[0] + 1 == intreg[1])
1922 return gen_rtx_REG (mode, intreg[0]);
1923 if (n == 4
1924 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS
1925 && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS)
1926 return gen_rtx_REG (TCmode, FIRST_STACK_REG);
1928 /* Otherwise figure out the entries of the PARALLEL. */
1929 for (i = 0; i < n; i++)
1931 switch (class[i])
1933 case X86_64_NO_CLASS:
1934 break;
1935 case X86_64_INTEGER_CLASS:
1936 case X86_64_INTEGERSI_CLASS:
1937 /* Merge TImodes on aligned occassions here too. */
1938 if (i * 8 + 8 > bytes)
1939 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
1940 else if (class[i] == X86_64_INTEGERSI_CLASS)
1941 tmpmode = SImode;
1942 else
1943 tmpmode = DImode;
1944 /* We've requested 24 bytes we don't have mode for. Use DImode. */
1945 if (tmpmode == BLKmode)
1946 tmpmode = DImode;
1947 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
1948 gen_rtx_REG (tmpmode, *intreg),
1949 GEN_INT (i*8));
1950 intreg++;
1951 break;
1952 case X86_64_SSESF_CLASS:
1953 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
1954 gen_rtx_REG (SFmode,
1955 SSE_REGNO (sse_regno)),
1956 GEN_INT (i*8));
1957 sse_regno++;
1958 break;
1959 case X86_64_SSEDF_CLASS:
1960 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
1961 gen_rtx_REG (DFmode,
1962 SSE_REGNO (sse_regno)),
1963 GEN_INT (i*8));
1964 sse_regno++;
1965 break;
1966 case X86_64_SSE_CLASS:
1967 if (i < n && class[i + 1] == X86_64_SSEUP_CLASS)
1968 tmpmode = TImode, i++;
1969 else
1970 tmpmode = DImode;
1971 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
1972 gen_rtx_REG (tmpmode,
1973 SSE_REGNO (sse_regno)),
1974 GEN_INT (i*8));
1975 sse_regno++;
1976 break;
1977 default:
1978 abort ();
1981 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
1982 for (i = 0; i < nexps; i++)
1983 XVECEXP (ret, 0, i) = exp [i];
1984 return ret;
1987 /* Update the data in CUM to advance over an argument
1988 of mode MODE and data type TYPE.
1989 (TYPE is null for libcalls where that information may not be available.) */
1991 void
1992 function_arg_advance (cum, mode, type, named)
1993 CUMULATIVE_ARGS *cum; /* current arg information */
1994 enum machine_mode mode; /* current arg mode */
1995 tree type; /* type of the argument or 0 if lib support */
1996 int named; /* whether or not the argument was named */
1998 int bytes =
1999 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2000 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2002 if (TARGET_DEBUG_ARG)
2003 fprintf (stderr,
2004 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
2005 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2006 if (TARGET_64BIT)
2008 int int_nregs, sse_nregs;
2009 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2010 cum->words += words;
2011 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
2013 cum->nregs -= int_nregs;
2014 cum->sse_nregs -= sse_nregs;
2015 cum->regno += int_nregs;
2016 cum->sse_regno += sse_nregs;
2018 else
2019 cum->words += words;
2021 else
2023 if (TARGET_SSE && mode == TImode)
2025 cum->sse_words += words;
2026 cum->sse_nregs -= 1;
2027 cum->sse_regno += 1;
2028 if (cum->sse_nregs <= 0)
2030 cum->sse_nregs = 0;
2031 cum->sse_regno = 0;
2034 else
2036 cum->words += words;
2037 cum->nregs -= words;
2038 cum->regno += words;
2040 if (cum->nregs <= 0)
2042 cum->nregs = 0;
2043 cum->regno = 0;
2047 return;
2050 /* Define where to put the arguments to a function.
2051 Value is zero to push the argument on the stack,
2052 or a hard register in which to store the argument.
2054 MODE is the argument's machine mode.
2055 TYPE is the data type of the argument (as a tree).
2056 This is null for libcalls where that information may
2057 not be available.
2058 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2059 the preceding args and about the function being called.
2060 NAMED is nonzero if this argument is a named parameter
2061 (otherwise it is an extra parameter matching an ellipsis). */
2063 struct rtx_def *
2064 function_arg (cum, mode, type, named)
2065 CUMULATIVE_ARGS *cum; /* current arg information */
2066 enum machine_mode mode; /* current arg mode */
2067 tree type; /* type of the argument or 0 if lib support */
2068 int named; /* != 0 for normal args, == 0 for ... args */
2070 rtx ret = NULL_RTX;
2071 int bytes =
2072 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2073 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2075 /* Handle an hidden AL argument containing number of registers for varargs
2076 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2077 any AL settings. */
2078 if (mode == VOIDmode)
2080 if (TARGET_64BIT)
2081 return GEN_INT (cum->maybe_vaarg
2082 ? (cum->sse_nregs < 0
2083 ? SSE_REGPARM_MAX
2084 : cum->sse_regno)
2085 : -1);
2086 else
2087 return constm1_rtx;
2089 if (TARGET_64BIT)
2090 ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs,
2091 &x86_64_int_parameter_registers [cum->regno],
2092 cum->sse_regno);
2093 else
2094 switch (mode)
2096 /* For now, pass fp/complex values on the stack. */
2097 default:
2098 break;
2100 case BLKmode:
2101 case DImode:
2102 case SImode:
2103 case HImode:
2104 case QImode:
2105 if (words <= cum->nregs)
2106 ret = gen_rtx_REG (mode, cum->regno);
2107 break;
2108 case TImode:
2109 if (cum->sse_nregs)
2110 ret = gen_rtx_REG (mode, cum->sse_regno);
2111 break;
2114 if (TARGET_DEBUG_ARG)
2116 fprintf (stderr,
2117 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d",
2118 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2120 if (ret)
2121 fprintf (stderr, ", reg=%%e%s", reg_names[ REGNO(ret) ]);
2122 else
2123 fprintf (stderr, ", stack");
2125 fprintf (stderr, " )\n");
2128 return ret;
2131 /* Gives the alignment boundary, in bits, of an argument with the specified mode
2132 and type. */
2135 ix86_function_arg_boundary (mode, type)
2136 enum machine_mode mode;
2137 tree type;
2139 int align;
2140 if (!TARGET_64BIT)
2141 return PARM_BOUNDARY;
2142 if (type)
2143 align = TYPE_ALIGN (type);
2144 else
2145 align = GET_MODE_ALIGNMENT (mode);
2146 if (align < PARM_BOUNDARY)
2147 align = PARM_BOUNDARY;
2148 if (align > 128)
2149 align = 128;
2150 return align;
2153 /* Return true if N is a possible register number of function value. */
2154 bool
2155 ix86_function_value_regno_p (regno)
2156 int regno;
2158 if (!TARGET_64BIT)
2160 return ((regno) == 0
2161 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
2162 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
2164 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
2165 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
2166 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
2169 /* Define how to find the value returned by a function.
2170 VALTYPE is the data type of the value (as a tree).
2171 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2172 otherwise, FUNC is 0. */
2174 ix86_function_value (valtype)
2175 tree valtype;
2177 if (TARGET_64BIT)
2179 rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1,
2180 REGPARM_MAX, SSE_REGPARM_MAX,
2181 x86_64_int_return_registers, 0);
2182 /* For zero sized structures, construct_continer return NULL, but we need
2183 to keep rest of compiler happy by returning meaningfull value. */
2184 if (!ret)
2185 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
2186 return ret;
2188 else
2189 return gen_rtx_REG (TYPE_MODE (valtype), VALUE_REGNO (TYPE_MODE (valtype)));
2192 /* Return false iff type is returned in memory. */
2194 ix86_return_in_memory (type)
2195 tree type;
2197 int needed_intregs, needed_sseregs;
2198 if (TARGET_64BIT)
2200 return !examine_argument (TYPE_MODE (type), type, 1,
2201 &needed_intregs, &needed_sseregs);
2203 else
2205 if (TYPE_MODE (type) == BLKmode
2206 || (VECTOR_MODE_P (TYPE_MODE (type))
2207 && int_size_in_bytes (type) == 8)
2208 || (int_size_in_bytes (type) > 12 && TYPE_MODE (type) != TImode
2209 && TYPE_MODE (type) != TFmode
2210 && !VECTOR_MODE_P (TYPE_MODE (type))))
2211 return 1;
2212 return 0;
2216 /* Define how to find the value returned by a library function
2217 assuming the value has mode MODE. */
2219 ix86_libcall_value (mode)
2220 enum machine_mode mode;
2222 if (TARGET_64BIT)
2224 switch (mode)
2226 case SFmode:
2227 case SCmode:
2228 case DFmode:
2229 case DCmode:
2230 return gen_rtx_REG (mode, FIRST_SSE_REG);
2231 case TFmode:
2232 case TCmode:
2233 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
2234 default:
2235 return gen_rtx_REG (mode, 0);
2238 else
2239 return gen_rtx_REG (mode, VALUE_REGNO (mode));
2242 /* Create the va_list data type. */
2244 tree
2245 ix86_build_va_list ()
2247 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
2249 /* For i386 we use plain pointer to argument area. */
2250 if (!TARGET_64BIT)
2251 return build_pointer_type (char_type_node);
2253 record = make_lang_type (RECORD_TYPE);
2254 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
2256 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
2257 unsigned_type_node);
2258 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
2259 unsigned_type_node);
2260 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
2261 ptr_type_node);
2262 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
2263 ptr_type_node);
2265 DECL_FIELD_CONTEXT (f_gpr) = record;
2266 DECL_FIELD_CONTEXT (f_fpr) = record;
2267 DECL_FIELD_CONTEXT (f_ovf) = record;
2268 DECL_FIELD_CONTEXT (f_sav) = record;
2270 TREE_CHAIN (record) = type_decl;
2271 TYPE_NAME (record) = type_decl;
2272 TYPE_FIELDS (record) = f_gpr;
2273 TREE_CHAIN (f_gpr) = f_fpr;
2274 TREE_CHAIN (f_fpr) = f_ovf;
2275 TREE_CHAIN (f_ovf) = f_sav;
2277 layout_type (record);
2279 /* The correct type is an array type of one element. */
2280 return build_array_type (record, build_index_type (size_zero_node));
2283 /* Perform any needed actions needed for a function that is receiving a
2284 variable number of arguments.
2286 CUM is as above.
2288 MODE and TYPE are the mode and type of the current parameter.
2290 PRETEND_SIZE is a variable that should be set to the amount of stack
2291 that must be pushed by the prolog to pretend that our caller pushed
2294 Normally, this macro will push all remaining incoming registers on the
2295 stack and set PRETEND_SIZE to the length of the registers pushed. */
2297 void
2298 ix86_setup_incoming_varargs (cum, mode, type, pretend_size, no_rtl)
2299 CUMULATIVE_ARGS *cum;
2300 enum machine_mode mode;
2301 tree type;
2302 int *pretend_size ATTRIBUTE_UNUSED;
2303 int no_rtl;
2306 CUMULATIVE_ARGS next_cum;
2307 rtx save_area = NULL_RTX, mem;
2308 rtx label;
2309 rtx label_ref;
2310 rtx tmp_reg;
2311 rtx nsse_reg;
2312 int set;
2313 tree fntype;
2314 int stdarg_p;
2315 int i;
2317 if (!TARGET_64BIT)
2318 return;
2320 /* Indicate to allocate space on the stack for varargs save area. */
2321 ix86_save_varrargs_registers = 1;
2323 fntype = TREE_TYPE (current_function_decl);
2324 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
2325 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
2326 != void_type_node));
2328 /* For varargs, we do not want to skip the dummy va_dcl argument.
2329 For stdargs, we do want to skip the last named argument. */
2330 next_cum = *cum;
2331 if (stdarg_p)
2332 function_arg_advance (&next_cum, mode, type, 1);
2334 if (!no_rtl)
2335 save_area = frame_pointer_rtx;
2337 set = get_varargs_alias_set ();
2339 for (i = next_cum.regno; i < ix86_regparm; i++)
2341 mem = gen_rtx_MEM (Pmode,
2342 plus_constant (save_area, i * UNITS_PER_WORD));
2343 set_mem_alias_set (mem, set);
2344 emit_move_insn (mem, gen_rtx_REG (Pmode,
2345 x86_64_int_parameter_registers[i]));
2348 if (next_cum.sse_nregs)
2350 /* Now emit code to save SSE registers. The AX parameter contains number
2351 of SSE parameter regsiters used to call this function. We use
2352 sse_prologue_save insn template that produces computed jump across
2353 SSE saves. We need some preparation work to get this working. */
2355 label = gen_label_rtx ();
2356 label_ref = gen_rtx_LABEL_REF (Pmode, label);
2358 /* Compute address to jump to :
2359 label - 5*eax + nnamed_sse_arguments*5 */
2360 tmp_reg = gen_reg_rtx (Pmode);
2361 nsse_reg = gen_reg_rtx (Pmode);
2362 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
2363 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2364 gen_rtx_MULT (Pmode, nsse_reg,
2365 GEN_INT (4))));
2366 if (next_cum.sse_regno)
2367 emit_move_insn
2368 (nsse_reg,
2369 gen_rtx_CONST (DImode,
2370 gen_rtx_PLUS (DImode,
2371 label_ref,
2372 GEN_INT (next_cum.sse_regno * 4))));
2373 else
2374 emit_move_insn (nsse_reg, label_ref);
2375 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
2377 /* Compute address of memory block we save into. We always use pointer
2378 pointing 127 bytes after first byte to store - this is needed to keep
2379 instruction size limited by 4 bytes. */
2380 tmp_reg = gen_reg_rtx (Pmode);
2381 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2382 plus_constant (save_area,
2383 8 * REGPARM_MAX + 127)));
2384 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
2385 set_mem_alias_set (mem, set);
2386 set_mem_align (mem, BITS_PER_WORD);
2388 /* And finally do the dirty job! */
2389 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
2390 GEN_INT (next_cum.sse_regno), label));
2395 /* Implement va_start. */
2397 void
2398 ix86_va_start (stdarg_p, valist, nextarg)
2399 int stdarg_p;
2400 tree valist;
2401 rtx nextarg;
2403 HOST_WIDE_INT words, n_gpr, n_fpr;
2404 tree f_gpr, f_fpr, f_ovf, f_sav;
2405 tree gpr, fpr, ovf, sav, t;
2407 /* Only 64bit target needs something special. */
2408 if (!TARGET_64BIT)
2410 std_expand_builtin_va_start (stdarg_p, valist, nextarg);
2411 return;
2414 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2415 f_fpr = TREE_CHAIN (f_gpr);
2416 f_ovf = TREE_CHAIN (f_fpr);
2417 f_sav = TREE_CHAIN (f_ovf);
2419 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2420 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2421 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2422 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2423 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2425 /* Count number of gp and fp argument registers used. */
2426 words = current_function_args_info.words;
2427 n_gpr = current_function_args_info.regno;
2428 n_fpr = current_function_args_info.sse_regno;
2430 if (TARGET_DEBUG_ARG)
2431 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
2432 (int)words, (int)n_gpr, (int)n_fpr);
2434 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
2435 build_int_2 (n_gpr * 8, 0));
2436 TREE_SIDE_EFFECTS (t) = 1;
2437 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2439 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
2440 build_int_2 (n_fpr * 16 + 8*REGPARM_MAX, 0));
2441 TREE_SIDE_EFFECTS (t) = 1;
2442 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2444 /* Find the overflow area. */
2445 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
2446 if (words != 0)
2447 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
2448 build_int_2 (words * UNITS_PER_WORD, 0));
2449 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2450 TREE_SIDE_EFFECTS (t) = 1;
2451 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2453 /* Find the register save area.
2454 Prologue of the function save it right above stack frame. */
2455 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
2456 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
2457 TREE_SIDE_EFFECTS (t) = 1;
2458 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2461 /* Implement va_arg. */
2463 ix86_va_arg (valist, type)
2464 tree valist, type;
2466 static int intreg[6] = { 0, 1, 2, 3, 4, 5 };
2467 tree f_gpr, f_fpr, f_ovf, f_sav;
2468 tree gpr, fpr, ovf, sav, t;
2469 int size, rsize;
2470 rtx lab_false, lab_over = NULL_RTX;
2471 rtx addr_rtx, r;
2472 rtx container;
2474 /* Only 64bit target needs something special. */
2475 if (!TARGET_64BIT)
2477 return std_expand_builtin_va_arg (valist, type);
2480 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2481 f_fpr = TREE_CHAIN (f_gpr);
2482 f_ovf = TREE_CHAIN (f_fpr);
2483 f_sav = TREE_CHAIN (f_ovf);
2485 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2486 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2487 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2488 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2489 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2491 size = int_size_in_bytes (type);
2492 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2494 container = construct_container (TYPE_MODE (type), type, 0,
2495 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
2497 * Pull the value out of the saved registers ...
2500 addr_rtx = gen_reg_rtx (Pmode);
2502 if (container)
2504 rtx int_addr_rtx, sse_addr_rtx;
2505 int needed_intregs, needed_sseregs;
2506 int need_temp;
2508 lab_over = gen_label_rtx ();
2509 lab_false = gen_label_rtx ();
2511 examine_argument (TYPE_MODE (type), type, 0,
2512 &needed_intregs, &needed_sseregs);
2515 need_temp = ((needed_intregs && TYPE_ALIGN (type) > 64)
2516 || TYPE_ALIGN (type) > 128);
2518 /* In case we are passing structure, verify that it is consetuctive block
2519 on the register save area. If not we need to do moves. */
2520 if (!need_temp && !REG_P (container))
2522 /* Verify that all registers are strictly consetuctive */
2523 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
2525 int i;
2527 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2529 rtx slot = XVECEXP (container, 0, i);
2530 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int)i
2531 || INTVAL (XEXP (slot, 1)) != i * 16)
2532 need_temp = 1;
2535 else
2537 int i;
2539 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2541 rtx slot = XVECEXP (container, 0, i);
2542 if (REGNO (XEXP (slot, 0)) != (unsigned int)i
2543 || INTVAL (XEXP (slot, 1)) != i * 8)
2544 need_temp = 1;
2548 if (!need_temp)
2550 int_addr_rtx = addr_rtx;
2551 sse_addr_rtx = addr_rtx;
2553 else
2555 int_addr_rtx = gen_reg_rtx (Pmode);
2556 sse_addr_rtx = gen_reg_rtx (Pmode);
2558 /* First ensure that we fit completely in registers. */
2559 if (needed_intregs)
2561 emit_cmp_and_jump_insns (expand_expr
2562 (gpr, NULL_RTX, SImode, EXPAND_NORMAL),
2563 GEN_INT ((REGPARM_MAX - needed_intregs +
2564 1) * 8), GE, const1_rtx, SImode,
2565 1, lab_false);
2567 if (needed_sseregs)
2569 emit_cmp_and_jump_insns (expand_expr
2570 (fpr, NULL_RTX, SImode, EXPAND_NORMAL),
2571 GEN_INT ((SSE_REGPARM_MAX -
2572 needed_sseregs + 1) * 16 +
2573 REGPARM_MAX * 8), GE, const1_rtx,
2574 SImode, 1, lab_false);
2577 /* Compute index to start of area used for integer regs. */
2578 if (needed_intregs)
2580 t = build (PLUS_EXPR, ptr_type_node, sav, gpr);
2581 r = expand_expr (t, int_addr_rtx, Pmode, EXPAND_NORMAL);
2582 if (r != int_addr_rtx)
2583 emit_move_insn (int_addr_rtx, r);
2585 if (needed_sseregs)
2587 t = build (PLUS_EXPR, ptr_type_node, sav, fpr);
2588 r = expand_expr (t, sse_addr_rtx, Pmode, EXPAND_NORMAL);
2589 if (r != sse_addr_rtx)
2590 emit_move_insn (sse_addr_rtx, r);
2592 if (need_temp)
2594 int i;
2595 rtx mem;
2597 /* Never use the memory itself, as it has the alias set. */
2598 addr_rtx = XEXP (assign_temp (type, 0, 1, 0), 0);
2599 mem = gen_rtx_MEM (BLKmode, addr_rtx);
2600 set_mem_alias_set (mem, get_varargs_alias_set ());
2601 set_mem_align (mem, BITS_PER_UNIT);
2603 for (i = 0; i < XVECLEN (container, 0); i++)
2605 rtx slot = XVECEXP (container, 0, i);
2606 rtx reg = XEXP (slot, 0);
2607 enum machine_mode mode = GET_MODE (reg);
2608 rtx src_addr;
2609 rtx src_mem;
2610 int src_offset;
2611 rtx dest_mem;
2613 if (SSE_REGNO_P (REGNO (reg)))
2615 src_addr = sse_addr_rtx;
2616 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
2618 else
2620 src_addr = int_addr_rtx;
2621 src_offset = REGNO (reg) * 8;
2623 src_mem = gen_rtx_MEM (mode, src_addr);
2624 set_mem_alias_set (src_mem, get_varargs_alias_set ());
2625 src_mem = adjust_address (src_mem, mode, src_offset);
2626 dest_mem = adjust_address (mem, mode, INTVAL (XEXP (slot, 1)));
2627 emit_move_insn (dest_mem, src_mem);
2631 if (needed_intregs)
2634 build (PLUS_EXPR, TREE_TYPE (gpr), gpr,
2635 build_int_2 (needed_intregs * 8, 0));
2636 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
2637 TREE_SIDE_EFFECTS (t) = 1;
2638 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2640 if (needed_sseregs)
2643 build (PLUS_EXPR, TREE_TYPE (fpr), fpr,
2644 build_int_2 (needed_sseregs * 16, 0));
2645 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
2646 TREE_SIDE_EFFECTS (t) = 1;
2647 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2650 emit_jump_insn (gen_jump (lab_over));
2651 emit_barrier ();
2652 emit_label (lab_false);
2655 /* ... otherwise out of the overflow area. */
2657 /* Care for on-stack alignment if needed. */
2658 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
2659 t = ovf;
2660 else
2662 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
2663 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf, build_int_2 (align - 1, 0));
2664 t = build (BIT_AND_EXPR, TREE_TYPE (t), t, build_int_2 (-align, -1));
2666 t = save_expr (t);
2668 r = expand_expr (t, addr_rtx, Pmode, EXPAND_NORMAL);
2669 if (r != addr_rtx)
2670 emit_move_insn (addr_rtx, r);
2673 build (PLUS_EXPR, TREE_TYPE (t), t,
2674 build_int_2 (rsize * UNITS_PER_WORD, 0));
2675 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2676 TREE_SIDE_EFFECTS (t) = 1;
2677 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2679 if (container)
2680 emit_label (lab_over);
2682 return addr_rtx;
2685 /* Return nonzero if OP is general operand representable on x86_64. */
2688 x86_64_general_operand (op, mode)
2689 rtx op;
2690 enum machine_mode mode;
2692 if (!TARGET_64BIT)
2693 return general_operand (op, mode);
2694 if (nonimmediate_operand (op, mode))
2695 return 1;
2696 return x86_64_sign_extended_value (op);
2699 /* Return nonzero if OP is general operand representable on x86_64
2700 as either sign extended or zero extended constant. */
2703 x86_64_szext_general_operand (op, mode)
2704 rtx op;
2705 enum machine_mode mode;
2707 if (!TARGET_64BIT)
2708 return general_operand (op, mode);
2709 if (nonimmediate_operand (op, mode))
2710 return 1;
2711 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
2714 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
2717 x86_64_nonmemory_operand (op, mode)
2718 rtx op;
2719 enum machine_mode mode;
2721 if (!TARGET_64BIT)
2722 return nonmemory_operand (op, mode);
2723 if (register_operand (op, mode))
2724 return 1;
2725 return x86_64_sign_extended_value (op);
2728 /* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
2731 x86_64_movabs_operand (op, mode)
2732 rtx op;
2733 enum machine_mode mode;
2735 if (!TARGET_64BIT || !flag_pic)
2736 return nonmemory_operand (op, mode);
2737 if (register_operand (op, mode) || x86_64_sign_extended_value (op))
2738 return 1;
2739 if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op))
2740 return 1;
2741 return 0;
2744 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
2747 x86_64_szext_nonmemory_operand (op, mode)
2748 rtx op;
2749 enum machine_mode mode;
2751 if (!TARGET_64BIT)
2752 return nonmemory_operand (op, mode);
2753 if (register_operand (op, mode))
2754 return 1;
2755 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
2758 /* Return nonzero if OP is immediate operand representable on x86_64. */
2761 x86_64_immediate_operand (op, mode)
2762 rtx op;
2763 enum machine_mode mode;
2765 if (!TARGET_64BIT)
2766 return immediate_operand (op, mode);
2767 return x86_64_sign_extended_value (op);
2770 /* Return nonzero if OP is immediate operand representable on x86_64. */
2773 x86_64_zext_immediate_operand (op, mode)
2774 rtx op;
2775 enum machine_mode mode ATTRIBUTE_UNUSED;
2777 return x86_64_zero_extended_value (op);
2780 /* Return nonzero if OP is (const_int 1), else return zero. */
2783 const_int_1_operand (op, mode)
2784 rtx op;
2785 enum machine_mode mode ATTRIBUTE_UNUSED;
2787 return (GET_CODE (op) == CONST_INT && INTVAL (op) == 1);
2790 /* Returns 1 if OP is either a symbol reference or a sum of a symbol
2791 reference and a constant. */
2794 symbolic_operand (op, mode)
2795 register rtx op;
2796 enum machine_mode mode ATTRIBUTE_UNUSED;
2798 switch (GET_CODE (op))
2800 case SYMBOL_REF:
2801 case LABEL_REF:
2802 return 1;
2804 case CONST:
2805 op = XEXP (op, 0);
2806 if (GET_CODE (op) == SYMBOL_REF
2807 || GET_CODE (op) == LABEL_REF
2808 || (GET_CODE (op) == UNSPEC
2809 && (XINT (op, 1) == 6
2810 || XINT (op, 1) == 7
2811 || XINT (op, 1) == 15)))
2812 return 1;
2813 if (GET_CODE (op) != PLUS
2814 || GET_CODE (XEXP (op, 1)) != CONST_INT)
2815 return 0;
2817 op = XEXP (op, 0);
2818 if (GET_CODE (op) == SYMBOL_REF
2819 || GET_CODE (op) == LABEL_REF)
2820 return 1;
2821 /* Only @GOTOFF gets offsets. */
2822 if (GET_CODE (op) != UNSPEC
2823 || XINT (op, 1) != 7)
2824 return 0;
2826 op = XVECEXP (op, 0, 0);
2827 if (GET_CODE (op) == SYMBOL_REF
2828 || GET_CODE (op) == LABEL_REF)
2829 return 1;
2830 return 0;
2832 default:
2833 return 0;
2837 /* Return true if the operand contains a @GOT or @GOTOFF reference. */
2840 pic_symbolic_operand (op, mode)
2841 register rtx op;
2842 enum machine_mode mode ATTRIBUTE_UNUSED;
2844 if (GET_CODE (op) != CONST)
2845 return 0;
2846 op = XEXP (op, 0);
2847 if (TARGET_64BIT)
2849 if (GET_CODE (XEXP (op, 0)) == UNSPEC)
2850 return 1;
2852 else
2854 if (GET_CODE (op) == UNSPEC)
2855 return 1;
2856 if (GET_CODE (op) != PLUS
2857 || GET_CODE (XEXP (op, 1)) != CONST_INT)
2858 return 0;
2859 op = XEXP (op, 0);
2860 if (GET_CODE (op) == UNSPEC)
2861 return 1;
2863 return 0;
2866 /* Return true if OP is a symbolic operand that resolves locally. */
2868 static int
2869 local_symbolic_operand (op, mode)
2870 rtx op;
2871 enum machine_mode mode ATTRIBUTE_UNUSED;
2873 if (GET_CODE (op) == LABEL_REF)
2874 return 1;
2876 if (GET_CODE (op) == CONST
2877 && GET_CODE (XEXP (op, 0)) == PLUS
2878 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
2879 op = XEXP (XEXP (op, 0), 0);
2881 if (GET_CODE (op) != SYMBOL_REF)
2882 return 0;
2884 /* These we've been told are local by varasm and encode_section_info
2885 respectively. */
2886 if (CONSTANT_POOL_ADDRESS_P (op) || SYMBOL_REF_FLAG (op))
2887 return 1;
2889 /* There is, however, a not insubstantial body of code in the rest of
2890 the compiler that assumes it can just stick the results of
2891 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */
2892 /* ??? This is a hack. Should update the body of the compiler to
2893 always create a DECL an invoke ENCODE_SECTION_INFO. */
2894 if (strncmp (XSTR (op, 0), internal_label_prefix,
2895 internal_label_prefix_len) == 0)
2896 return 1;
2898 return 0;
2901 /* Test for a valid operand for a call instruction. Don't allow the
2902 arg pointer register or virtual regs since they may decay into
2903 reg + const, which the patterns can't handle. */
2906 call_insn_operand (op, mode)
2907 rtx op;
2908 enum machine_mode mode ATTRIBUTE_UNUSED;
2910 /* Disallow indirect through a virtual register. This leads to
2911 compiler aborts when trying to eliminate them. */
2912 if (GET_CODE (op) == REG
2913 && (op == arg_pointer_rtx
2914 || op == frame_pointer_rtx
2915 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
2916 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
2917 return 0;
2919 /* Disallow `call 1234'. Due to varying assembler lameness this
2920 gets either rejected or translated to `call .+1234'. */
2921 if (GET_CODE (op) == CONST_INT)
2922 return 0;
2924 /* Explicitly allow SYMBOL_REF even if pic. */
2925 if (GET_CODE (op) == SYMBOL_REF)
2926 return 1;
2928 /* Half-pic doesn't allow anything but registers and constants.
2929 We've just taken care of the later. */
2930 if (HALF_PIC_P ())
2931 return register_operand (op, Pmode);
2933 /* Otherwise we can allow any general_operand in the address. */
2934 return general_operand (op, Pmode);
2938 constant_call_address_operand (op, mode)
2939 rtx op;
2940 enum machine_mode mode ATTRIBUTE_UNUSED;
2942 if (GET_CODE (op) == CONST
2943 && GET_CODE (XEXP (op, 0)) == PLUS
2944 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
2945 op = XEXP (XEXP (op, 0), 0);
2946 return GET_CODE (op) == SYMBOL_REF;
2949 /* Match exactly zero and one. */
2952 const0_operand (op, mode)
2953 register rtx op;
2954 enum machine_mode mode;
2956 return op == CONST0_RTX (mode);
2960 const1_operand (op, mode)
2961 register rtx op;
2962 enum machine_mode mode ATTRIBUTE_UNUSED;
2964 return op == const1_rtx;
2967 /* Match 2, 4, or 8. Used for leal multiplicands. */
2970 const248_operand (op, mode)
2971 register rtx op;
2972 enum machine_mode mode ATTRIBUTE_UNUSED;
2974 return (GET_CODE (op) == CONST_INT
2975 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
2978 /* True if this is a constant appropriate for an increment or decremenmt. */
2981 incdec_operand (op, mode)
2982 register rtx op;
2983 enum machine_mode mode ATTRIBUTE_UNUSED;
2985 /* On Pentium4, the inc and dec operations causes extra dependency on flag
2986 registers, since carry flag is not set. */
2987 if (TARGET_PENTIUM4 && !optimize_size)
2988 return 0;
2989 return op == const1_rtx || op == constm1_rtx;
2992 /* Return nonzero if OP is acceptable as operand of DImode shift
2993 expander. */
2996 shiftdi_operand (op, mode)
2997 rtx op;
2998 enum machine_mode mode ATTRIBUTE_UNUSED;
3000 if (TARGET_64BIT)
3001 return nonimmediate_operand (op, mode);
3002 else
3003 return register_operand (op, mode);
3006 /* Return false if this is the stack pointer, or any other fake
3007 register eliminable to the stack pointer. Otherwise, this is
3008 a register operand.
3010 This is used to prevent esp from being used as an index reg.
3011 Which would only happen in pathological cases. */
3014 reg_no_sp_operand (op, mode)
3015 register rtx op;
3016 enum machine_mode mode;
3018 rtx t = op;
3019 if (GET_CODE (t) == SUBREG)
3020 t = SUBREG_REG (t);
3021 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
3022 return 0;
3024 return register_operand (op, mode);
3028 mmx_reg_operand (op, mode)
3029 register rtx op;
3030 enum machine_mode mode ATTRIBUTE_UNUSED;
3032 return MMX_REG_P (op);
3035 /* Return false if this is any eliminable register. Otherwise
3036 general_operand. */
3039 general_no_elim_operand (op, mode)
3040 register rtx op;
3041 enum machine_mode mode;
3043 rtx t = op;
3044 if (GET_CODE (t) == SUBREG)
3045 t = SUBREG_REG (t);
3046 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3047 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3048 || t == virtual_stack_dynamic_rtx)
3049 return 0;
3050 if (REG_P (t)
3051 && REGNO (t) >= FIRST_VIRTUAL_REGISTER
3052 && REGNO (t) <= LAST_VIRTUAL_REGISTER)
3053 return 0;
3055 return general_operand (op, mode);
3058 /* Return false if this is any eliminable register. Otherwise
3059 register_operand or const_int. */
3062 nonmemory_no_elim_operand (op, mode)
3063 register rtx op;
3064 enum machine_mode mode;
3066 rtx t = op;
3067 if (GET_CODE (t) == SUBREG)
3068 t = SUBREG_REG (t);
3069 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3070 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3071 || t == virtual_stack_dynamic_rtx)
3072 return 0;
3074 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
3077 /* Return true if op is a Q_REGS class register. */
3080 q_regs_operand (op, mode)
3081 register rtx op;
3082 enum machine_mode mode;
3084 if (mode != VOIDmode && GET_MODE (op) != mode)
3085 return 0;
3086 if (GET_CODE (op) == SUBREG)
3087 op = SUBREG_REG (op);
3088 return QI_REG_P (op);
3091 /* Return true if op is a NON_Q_REGS class register. */
3094 non_q_regs_operand (op, mode)
3095 register rtx op;
3096 enum machine_mode mode;
3098 if (mode != VOIDmode && GET_MODE (op) != mode)
3099 return 0;
3100 if (GET_CODE (op) == SUBREG)
3101 op = SUBREG_REG (op);
3102 return NON_QI_REG_P (op);
3105 /* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
3106 insns. */
3108 sse_comparison_operator (op, mode)
3109 rtx op;
3110 enum machine_mode mode ATTRIBUTE_UNUSED;
3112 enum rtx_code code = GET_CODE (op);
3113 switch (code)
3115 /* Operations supported directly. */
3116 case EQ:
3117 case LT:
3118 case LE:
3119 case UNORDERED:
3120 case NE:
3121 case UNGE:
3122 case UNGT:
3123 case ORDERED:
3124 return 1;
3125 /* These are equivalent to ones above in non-IEEE comparisons. */
3126 case UNEQ:
3127 case UNLT:
3128 case UNLE:
3129 case LTGT:
3130 case GE:
3131 case GT:
3132 return !TARGET_IEEE_FP;
3133 default:
3134 return 0;
3137 /* Return 1 if OP is a valid comparison operator in valid mode. */
3139 ix86_comparison_operator (op, mode)
3140 register rtx op;
3141 enum machine_mode mode;
3143 enum machine_mode inmode;
3144 enum rtx_code code = GET_CODE (op);
3145 if (mode != VOIDmode && GET_MODE (op) != mode)
3146 return 0;
3147 if (GET_RTX_CLASS (code) != '<')
3148 return 0;
3149 inmode = GET_MODE (XEXP (op, 0));
3151 if (inmode == CCFPmode || inmode == CCFPUmode)
3153 enum rtx_code second_code, bypass_code;
3154 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3155 return (bypass_code == NIL && second_code == NIL);
3157 switch (code)
3159 case EQ: case NE:
3160 return 1;
3161 case LT: case GE:
3162 if (inmode == CCmode || inmode == CCGCmode
3163 || inmode == CCGOCmode || inmode == CCNOmode)
3164 return 1;
3165 return 0;
3166 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
3167 if (inmode == CCmode)
3168 return 1;
3169 return 0;
3170 case GT: case LE:
3171 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
3172 return 1;
3173 return 0;
3174 default:
3175 return 0;
3179 /* Return 1 if OP is a comparison operator that can be issued by fcmov. */
3182 fcmov_comparison_operator (op, mode)
3183 register rtx op;
3184 enum machine_mode mode;
3186 enum machine_mode inmode;
3187 enum rtx_code code = GET_CODE (op);
3188 if (mode != VOIDmode && GET_MODE (op) != mode)
3189 return 0;
3190 if (GET_RTX_CLASS (code) != '<')
3191 return 0;
3192 inmode = GET_MODE (XEXP (op, 0));
3193 if (inmode == CCFPmode || inmode == CCFPUmode)
3195 enum rtx_code second_code, bypass_code;
3196 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3197 if (bypass_code != NIL || second_code != NIL)
3198 return 0;
3199 code = ix86_fp_compare_code_to_integer (code);
3201 /* i387 supports just limited amount of conditional codes. */
3202 switch (code)
3204 case LTU: case GTU: case LEU: case GEU:
3205 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
3206 return 1;
3207 return 0;
3208 case ORDERED: case UNORDERED:
3209 case EQ: case NE:
3210 return 1;
3211 default:
3212 return 0;
3216 /* Return 1 if OP is a binary operator that can be promoted to wider mode. */
3219 promotable_binary_operator (op, mode)
3220 register rtx op;
3221 enum machine_mode mode ATTRIBUTE_UNUSED;
3223 switch (GET_CODE (op))
3225 case MULT:
3226 /* Modern CPUs have same latency for HImode and SImode multiply,
3227 but 386 and 486 do HImode multiply faster. */
3228 return ix86_cpu > PROCESSOR_I486;
3229 case PLUS:
3230 case AND:
3231 case IOR:
3232 case XOR:
3233 case ASHIFT:
3234 return 1;
3235 default:
3236 return 0;
3240 /* Nearly general operand, but accept any const_double, since we wish
3241 to be able to drop them into memory rather than have them get pulled
3242 into registers. */
3245 cmp_fp_expander_operand (op, mode)
3246 register rtx op;
3247 enum machine_mode mode;
3249 if (mode != VOIDmode && mode != GET_MODE (op))
3250 return 0;
3251 if (GET_CODE (op) == CONST_DOUBLE)
3252 return 1;
3253 return general_operand (op, mode);
3256 /* Match an SI or HImode register for a zero_extract. */
3259 ext_register_operand (op, mode)
3260 register rtx op;
3261 enum machine_mode mode ATTRIBUTE_UNUSED;
3263 int regno;
3264 if ((!TARGET_64BIT || GET_MODE (op) != DImode)
3265 && GET_MODE (op) != SImode && GET_MODE (op) != HImode)
3266 return 0;
3268 if (!register_operand (op, VOIDmode))
3269 return 0;
3271 /* Be curefull to accept only registers having upper parts. */
3272 regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op));
3273 return (regno > LAST_VIRTUAL_REGISTER || regno < 4);
3276 /* Return 1 if this is a valid binary floating-point operation.
3277 OP is the expression matched, and MODE is its mode. */
3280 binary_fp_operator (op, mode)
3281 register rtx op;
3282 enum machine_mode mode;
3284 if (mode != VOIDmode && mode != GET_MODE (op))
3285 return 0;
3287 switch (GET_CODE (op))
3289 case PLUS:
3290 case MINUS:
3291 case MULT:
3292 case DIV:
3293 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
3295 default:
3296 return 0;
3301 mult_operator(op, mode)
3302 register rtx op;
3303 enum machine_mode mode ATTRIBUTE_UNUSED;
3305 return GET_CODE (op) == MULT;
3309 div_operator(op, mode)
3310 register rtx op;
3311 enum machine_mode mode ATTRIBUTE_UNUSED;
3313 return GET_CODE (op) == DIV;
3317 arith_or_logical_operator (op, mode)
3318 rtx op;
3319 enum machine_mode mode;
3321 return ((mode == VOIDmode || GET_MODE (op) == mode)
3322 && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
3323 || GET_RTX_CLASS (GET_CODE (op)) == '2'));
3326 /* Returns 1 if OP is memory operand with a displacement. */
3329 memory_displacement_operand (op, mode)
3330 register rtx op;
3331 enum machine_mode mode;
3333 struct ix86_address parts;
3335 if (! memory_operand (op, mode))
3336 return 0;
3338 if (! ix86_decompose_address (XEXP (op, 0), &parts))
3339 abort ();
3341 return parts.disp != NULL_RTX;
3344 /* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
3345 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
3347 ??? It seems likely that this will only work because cmpsi is an
3348 expander, and no actual insns use this. */
3351 cmpsi_operand (op, mode)
3352 rtx op;
3353 enum machine_mode mode;
3355 if (nonimmediate_operand (op, mode))
3356 return 1;
3358 if (GET_CODE (op) == AND
3359 && GET_MODE (op) == SImode
3360 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
3361 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
3362 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
3363 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
3364 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
3365 && GET_CODE (XEXP (op, 1)) == CONST_INT)
3366 return 1;
3368 return 0;
3371 /* Returns 1 if OP is memory operand that can not be represented by the
3372 modRM array. */
3375 long_memory_operand (op, mode)
3376 register rtx op;
3377 enum machine_mode mode;
3379 if (! memory_operand (op, mode))
3380 return 0;
3382 return memory_address_length (op) != 0;
3385 /* Return nonzero if the rtx is known aligned. */
3388 aligned_operand (op, mode)
3389 rtx op;
3390 enum machine_mode mode;
3392 struct ix86_address parts;
3394 if (!general_operand (op, mode))
3395 return 0;
3397 /* Registers and immediate operands are always "aligned". */
3398 if (GET_CODE (op) != MEM)
3399 return 1;
3401 /* Don't even try to do any aligned optimizations with volatiles. */
3402 if (MEM_VOLATILE_P (op))
3403 return 0;
3405 op = XEXP (op, 0);
3407 /* Pushes and pops are only valid on the stack pointer. */
3408 if (GET_CODE (op) == PRE_DEC
3409 || GET_CODE (op) == POST_INC)
3410 return 1;
3412 /* Decode the address. */
3413 if (! ix86_decompose_address (op, &parts))
3414 abort ();
3416 /* Look for some component that isn't known to be aligned. */
3417 if (parts.index)
3419 if (parts.scale < 4
3420 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
3421 return 0;
3423 if (parts.base)
3425 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
3426 return 0;
3428 if (parts.disp)
3430 if (GET_CODE (parts.disp) != CONST_INT
3431 || (INTVAL (parts.disp) & 3) != 0)
3432 return 0;
3435 /* Didn't find one -- this must be an aligned address. */
3436 return 1;
3439 /* Return true if the constant is something that can be loaded with
3440 a special instruction. Only handle 0.0 and 1.0; others are less
3441 worthwhile. */
3444 standard_80387_constant_p (x)
3445 rtx x;
3447 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
3448 return -1;
3449 /* Note that on the 80387, other constants, such as pi, that we should support
3450 too. On some machines, these are much slower to load as standard constant,
3451 than to load from doubles in memory. */
3452 if (x == CONST0_RTX (GET_MODE (x)))
3453 return 1;
3454 if (x == CONST1_RTX (GET_MODE (x)))
3455 return 2;
3456 return 0;
3459 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
3462 standard_sse_constant_p (x)
3463 rtx x;
3465 if (GET_CODE (x) != CONST_DOUBLE)
3466 return -1;
3467 return (x == CONST0_RTX (GET_MODE (x)));
3470 /* Returns 1 if OP contains a symbol reference */
3473 symbolic_reference_mentioned_p (op)
3474 rtx op;
3476 register const char *fmt;
3477 register int i;
3479 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
3480 return 1;
3482 fmt = GET_RTX_FORMAT (GET_CODE (op));
3483 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
3485 if (fmt[i] == 'E')
3487 register int j;
3489 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
3490 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
3491 return 1;
3494 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
3495 return 1;
3498 return 0;
3501 /* Return 1 if it is appropriate to emit `ret' instructions in the
3502 body of a function. Do this only if the epilogue is simple, needing a
3503 couple of insns. Prior to reloading, we can't tell how many registers
3504 must be saved, so return 0 then. Return 0 if there is no frame
3505 marker to de-allocate.
3507 If NON_SAVING_SETJMP is defined and true, then it is not possible
3508 for the epilogue to be simple, so return 0. This is a special case
3509 since NON_SAVING_SETJMP will not cause regs_ever_live to change
3510 until final, but jump_optimize may need to know sooner if a
3511 `return' is OK. */
3514 ix86_can_use_return_insn_p ()
3516 struct ix86_frame frame;
3518 #ifdef NON_SAVING_SETJMP
3519 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
3520 return 0;
3521 #endif
3523 if (! reload_completed || frame_pointer_needed)
3524 return 0;
3526 /* Don't allow more than 32 pop, since that's all we can do
3527 with one instruction. */
3528 if (current_function_pops_args
3529 && current_function_args_size >= 32768)
3530 return 0;
3532 ix86_compute_frame_layout (&frame);
3533 return frame.to_allocate == 0 && frame.nregs == 0;
3536 /* Return 1 if VALUE can be stored in the sign extended immediate field. */
3538 x86_64_sign_extended_value (value)
3539 rtx value;
3541 switch (GET_CODE (value))
3543 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
3544 to be at least 32 and this all acceptable constants are
3545 represented as CONST_INT. */
3546 case CONST_INT:
3547 if (HOST_BITS_PER_WIDE_INT == 32)
3548 return 1;
3549 else
3551 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode);
3552 return trunc_int_for_mode (val, SImode) == val;
3554 break;
3556 /* For certain code models, the symbolic references are known to fit. */
3557 case SYMBOL_REF:
3558 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL;
3560 /* For certain code models, the code is near as well. */
3561 case LABEL_REF:
3562 return ix86_cmodel != CM_LARGE && ix86_cmodel != CM_SMALL_PIC;
3564 /* We also may accept the offsetted memory references in certain special
3565 cases. */
3566 case CONST:
3567 if (GET_CODE (XEXP (value, 0)) == UNSPEC
3568 && XVECLEN (XEXP (value, 0), 0) == 1
3569 && XINT (XEXP (value, 0), 1) == 15)
3570 return 1;
3571 else if (GET_CODE (XEXP (value, 0)) == PLUS)
3573 rtx op1 = XEXP (XEXP (value, 0), 0);
3574 rtx op2 = XEXP (XEXP (value, 0), 1);
3575 HOST_WIDE_INT offset;
3577 if (ix86_cmodel == CM_LARGE)
3578 return 0;
3579 if (GET_CODE (op2) != CONST_INT)
3580 return 0;
3581 offset = trunc_int_for_mode (INTVAL (op2), DImode);
3582 switch (GET_CODE (op1))
3584 case SYMBOL_REF:
3585 /* For CM_SMALL assume that latest object is 1MB before
3586 end of 31bits boundary. We may also accept pretty
3587 large negative constants knowing that all objects are
3588 in the positive half of address space. */
3589 if (ix86_cmodel == CM_SMALL
3590 && offset < 1024*1024*1024
3591 && trunc_int_for_mode (offset, SImode) == offset)
3592 return 1;
3593 /* For CM_KERNEL we know that all object resist in the
3594 negative half of 32bits address space. We may not
3595 accept negative offsets, since they may be just off
3596 and we may accept pretty large positive ones. */
3597 if (ix86_cmodel == CM_KERNEL
3598 && offset > 0
3599 && trunc_int_for_mode (offset, SImode) == offset)
3600 return 1;
3601 break;
3602 case LABEL_REF:
3603 /* These conditions are similar to SYMBOL_REF ones, just the
3604 constraints for code models differ. */
3605 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
3606 && offset < 1024*1024*1024
3607 && trunc_int_for_mode (offset, SImode) == offset)
3608 return 1;
3609 if (ix86_cmodel == CM_KERNEL
3610 && offset > 0
3611 && trunc_int_for_mode (offset, SImode) == offset)
3612 return 1;
3613 break;
3614 default:
3615 return 0;
3618 return 0;
3619 default:
3620 return 0;
3624 /* Return 1 if VALUE can be stored in the zero extended immediate field. */
3626 x86_64_zero_extended_value (value)
3627 rtx value;
3629 switch (GET_CODE (value))
3631 case CONST_DOUBLE:
3632 if (HOST_BITS_PER_WIDE_INT == 32)
3633 return (GET_MODE (value) == VOIDmode
3634 && !CONST_DOUBLE_HIGH (value));
3635 else
3636 return 0;
3637 case CONST_INT:
3638 if (HOST_BITS_PER_WIDE_INT == 32)
3639 return INTVAL (value) >= 0;
3640 else
3641 return !(INTVAL (value) & ~(HOST_WIDE_INT)0xffffffff);
3642 break;
3644 /* For certain code models, the symbolic references are known to fit. */
3645 case SYMBOL_REF:
3646 return ix86_cmodel == CM_SMALL;
3648 /* For certain code models, the code is near as well. */
3649 case LABEL_REF:
3650 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM;
3652 /* We also may accept the offsetted memory references in certain special
3653 cases. */
3654 case CONST:
3655 if (GET_CODE (XEXP (value, 0)) == PLUS)
3657 rtx op1 = XEXP (XEXP (value, 0), 0);
3658 rtx op2 = XEXP (XEXP (value, 0), 1);
3660 if (ix86_cmodel == CM_LARGE)
3661 return 0;
3662 switch (GET_CODE (op1))
3664 case SYMBOL_REF:
3665 return 0;
3666 /* For small code model we may accept pretty large positive
3667 offsets, since one bit is available for free. Negative
3668 offsets are limited by the size of NULL pointer area
3669 specified by the ABI. */
3670 if (ix86_cmodel == CM_SMALL
3671 && GET_CODE (op2) == CONST_INT
3672 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
3673 && (trunc_int_for_mode (INTVAL (op2), SImode)
3674 == INTVAL (op2)))
3675 return 1;
3676 /* ??? For the kernel, we may accept adjustment of
3677 -0x10000000, since we know that it will just convert
3678 negative address space to positive, but perhaps this
3679 is not worthwhile. */
3680 break;
3681 case LABEL_REF:
3682 /* These conditions are similar to SYMBOL_REF ones, just the
3683 constraints for code models differ. */
3684 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
3685 && GET_CODE (op2) == CONST_INT
3686 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
3687 && (trunc_int_for_mode (INTVAL (op2), SImode)
3688 == INTVAL (op2)))
3689 return 1;
3690 break;
3691 default:
3692 return 0;
3695 return 0;
3696 default:
3697 return 0;
3701 /* Value should be nonzero if functions must have frame pointers.
3702 Zero means the frame pointer need not be set up (and parms may
3703 be accessed via the stack pointer) in functions that seem suitable. */
3706 ix86_frame_pointer_required ()
3708 /* If we accessed previous frames, then the generated code expects
3709 to be able to access the saved ebp value in our frame. */
3710 if (cfun->machine->accesses_prev_frame)
3711 return 1;
3713 /* Several x86 os'es need a frame pointer for other reasons,
3714 usually pertaining to setjmp. */
3715 if (SUBTARGET_FRAME_POINTER_REQUIRED)
3716 return 1;
3718 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
3719 the frame pointer by default. Turn it back on now if we've not
3720 got a leaf function. */
3721 if (TARGET_OMIT_LEAF_FRAME_POINTER && ! leaf_function_p ())
3722 return 1;
3724 return 0;
3727 /* Record that the current function accesses previous call frames. */
3729 void
3730 ix86_setup_frame_addresses ()
3732 cfun->machine->accesses_prev_frame = 1;
3735 static char pic_label_name[32];
3737 /* This function generates code for -fpic that loads %ebx with
3738 the return address of the caller and then returns. */
3740 void
3741 ix86_asm_file_end (file)
3742 FILE *file;
3744 rtx xops[2];
3746 if (! TARGET_DEEP_BRANCH_PREDICTION || pic_label_name[0] == 0)
3747 return;
3749 /* ??? Binutils 2.10 and earlier has a linkonce elimination bug related
3750 to updating relocations to a section being discarded such that this
3751 doesn't work. Ought to detect this at configure time. */
3752 #if 0
3753 /* The trick here is to create a linkonce section containing the
3754 pic label thunk, but to refer to it with an internal label.
3755 Because the label is internal, we don't have inter-dso name
3756 binding issues on hosts that don't support ".hidden".
3758 In order to use these macros, however, we must create a fake
3759 function decl. */
3760 if (targetm.have_named_sections)
3762 tree decl = build_decl (FUNCTION_DECL,
3763 get_identifier ("i686.get_pc_thunk"),
3764 error_mark_node);
3765 DECL_ONE_ONLY (decl) = 1;
3766 UNIQUE_SECTION (decl, 0);
3767 named_section (decl, NULL);
3769 else
3770 #else
3771 text_section ();
3772 #endif
3774 /* This used to call ASM_DECLARE_FUNCTION_NAME() but since it's an
3775 internal (non-global) label that's being emitted, it didn't make
3776 sense to have .type information for local labels. This caused
3777 the SCO OpenServer 5.0.4 ELF assembler grief (why are you giving
3778 me debug info for a label that you're declaring non-global?) this
3779 was changed to call ASM_OUTPUT_LABEL() instead. */
3781 ASM_OUTPUT_LABEL (file, pic_label_name);
3783 xops[0] = pic_offset_table_rtx;
3784 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
3785 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
3786 output_asm_insn ("ret", xops);
3789 void
3790 load_pic_register ()
3792 rtx gotsym, pclab;
3794 if (TARGET_64BIT)
3795 abort();
3797 gotsym = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
3799 if (TARGET_DEEP_BRANCH_PREDICTION)
3801 if (! pic_label_name[0])
3802 ASM_GENERATE_INTERNAL_LABEL (pic_label_name, "LPR", 0);
3803 pclab = gen_rtx_MEM (QImode, gen_rtx_SYMBOL_REF (Pmode, pic_label_name));
3805 else
3807 pclab = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
3810 emit_insn (gen_prologue_get_pc (pic_offset_table_rtx, pclab));
3812 if (! TARGET_DEEP_BRANCH_PREDICTION)
3813 emit_insn (gen_popsi1 (pic_offset_table_rtx));
3815 emit_insn (gen_prologue_set_got (pic_offset_table_rtx, gotsym, pclab));
3818 /* Generate an "push" pattern for input ARG. */
3820 static rtx
3821 gen_push (arg)
3822 rtx arg;
3824 return gen_rtx_SET (VOIDmode,
3825 gen_rtx_MEM (Pmode,
3826 gen_rtx_PRE_DEC (Pmode,
3827 stack_pointer_rtx)),
3828 arg);
3831 /* Return 1 if we need to save REGNO. */
3832 static int
3833 ix86_save_reg (regno, maybe_eh_return)
3834 int regno;
3835 int maybe_eh_return;
3837 if (flag_pic
3838 && ! TARGET_64BIT
3839 && regno == PIC_OFFSET_TABLE_REGNUM
3840 && (current_function_uses_pic_offset_table
3841 || current_function_uses_const_pool
3842 || current_function_calls_eh_return))
3843 return 1;
3845 if (current_function_calls_eh_return && maybe_eh_return)
3847 unsigned i;
3848 for (i = 0; ; i++)
3850 unsigned test = EH_RETURN_DATA_REGNO(i);
3851 if (test == INVALID_REGNUM)
3852 break;
3853 if (test == (unsigned) regno)
3854 return 1;
3858 return (regs_ever_live[regno]
3859 && !call_used_regs[regno]
3860 && !fixed_regs[regno]
3861 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
3864 /* Return number of registers to be saved on the stack. */
3866 static int
3867 ix86_nsaved_regs ()
3869 int nregs = 0;
3870 int regno;
3872 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
3873 if (ix86_save_reg (regno, true))
3874 nregs++;
3875 return nregs;
3878 /* Return the offset between two registers, one to be eliminated, and the other
3879 its replacement, at the start of a routine. */
3881 HOST_WIDE_INT
3882 ix86_initial_elimination_offset (from, to)
3883 int from;
3884 int to;
3886 struct ix86_frame frame;
3887 ix86_compute_frame_layout (&frame);
3889 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3890 return frame.hard_frame_pointer_offset;
3891 else if (from == FRAME_POINTER_REGNUM
3892 && to == HARD_FRAME_POINTER_REGNUM)
3893 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
3894 else
3896 if (to != STACK_POINTER_REGNUM)
3897 abort ();
3898 else if (from == ARG_POINTER_REGNUM)
3899 return frame.stack_pointer_offset;
3900 else if (from != FRAME_POINTER_REGNUM)
3901 abort ();
3902 else
3903 return frame.stack_pointer_offset - frame.frame_pointer_offset;
3907 /* Fill structure ix86_frame about frame of currently computed function. */
3909 static void
3910 ix86_compute_frame_layout (frame)
3911 struct ix86_frame *frame;
3913 HOST_WIDE_INT total_size;
3914 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
3915 int offset;
3916 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
3917 HOST_WIDE_INT size = get_frame_size ();
3919 frame->nregs = ix86_nsaved_regs ();
3920 total_size = size;
3922 /* Skip return value and save base pointer. */
3923 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
3925 frame->hard_frame_pointer_offset = offset;
3927 /* Do some sanity checking of stack_alignment_needed and
3928 preferred_alignment, since i386 port is the only using those features
3929 that may break easily. */
3931 if (size && !stack_alignment_needed)
3932 abort ();
3933 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
3934 abort ();
3935 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
3936 abort ();
3937 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
3938 abort ();
3940 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
3941 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
3943 /* Register save area */
3944 offset += frame->nregs * UNITS_PER_WORD;
3946 /* Va-arg area */
3947 if (ix86_save_varrargs_registers)
3949 offset += X86_64_VARARGS_SIZE;
3950 frame->va_arg_size = X86_64_VARARGS_SIZE;
3952 else
3953 frame->va_arg_size = 0;
3955 /* Align start of frame for local function. */
3956 frame->padding1 = ((offset + stack_alignment_needed - 1)
3957 & -stack_alignment_needed) - offset;
3959 offset += frame->padding1;
3961 /* Frame pointer points here. */
3962 frame->frame_pointer_offset = offset;
3964 offset += size;
3966 /* Add outgoing arguments area. */
3967 if (ACCUMULATE_OUTGOING_ARGS)
3969 offset += current_function_outgoing_args_size;
3970 frame->outgoing_arguments_size = current_function_outgoing_args_size;
3972 else
3973 frame->outgoing_arguments_size = 0;
3975 /* Align stack boundary. */
3976 frame->padding2 = ((offset + preferred_alignment - 1)
3977 & -preferred_alignment) - offset;
3979 offset += frame->padding2;
3981 /* We've reached end of stack frame. */
3982 frame->stack_pointer_offset = offset;
3984 /* Size prologue needs to allocate. */
3985 frame->to_allocate =
3986 (size + frame->padding1 + frame->padding2
3987 + frame->outgoing_arguments_size + frame->va_arg_size);
3989 if (TARGET_64BIT && TARGET_RED_ZONE && current_function_sp_is_unchanging
3990 && current_function_is_leaf)
3992 frame->red_zone_size = frame->to_allocate;
3993 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
3994 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
3996 else
3997 frame->red_zone_size = 0;
3998 frame->to_allocate -= frame->red_zone_size;
3999 frame->stack_pointer_offset -= frame->red_zone_size;
4000 #if 0
4001 fprintf (stderr, "nregs: %i\n", frame->nregs);
4002 fprintf (stderr, "size: %i\n", size);
4003 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
4004 fprintf (stderr, "padding1: %i\n", frame->padding1);
4005 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
4006 fprintf (stderr, "padding2: %i\n", frame->padding2);
4007 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
4008 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
4009 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
4010 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
4011 frame->hard_frame_pointer_offset);
4012 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
4013 #endif
4016 /* Emit code to save registers in the prologue. */
4018 static void
4019 ix86_emit_save_regs ()
4021 register int regno;
4022 rtx insn;
4024 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4025 if (ix86_save_reg (regno, true))
4027 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
4028 RTX_FRAME_RELATED_P (insn) = 1;
4032 /* Emit code to save registers using MOV insns. First register
4033 is restored from POINTER + OFFSET. */
4034 static void
4035 ix86_emit_save_regs_using_mov (pointer, offset)
4036 rtx pointer;
4037 HOST_WIDE_INT offset;
4039 int regno;
4040 rtx insn;
4042 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4043 if (ix86_save_reg (regno, true))
4045 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
4046 Pmode, offset),
4047 gen_rtx_REG (Pmode, regno));
4048 RTX_FRAME_RELATED_P (insn) = 1;
4049 offset += UNITS_PER_WORD;
4053 /* Expand the prologue into a bunch of separate insns. */
4055 void
4056 ix86_expand_prologue ()
4058 rtx insn;
4059 int pic_reg_used = (flag_pic && (current_function_uses_pic_offset_table
4060 || current_function_uses_const_pool)
4061 && !TARGET_64BIT);
4062 struct ix86_frame frame;
4063 int use_mov = 0;
4064 HOST_WIDE_INT allocate;
4066 if (!optimize_size)
4068 use_fast_prologue_epilogue
4069 = !expensive_function_p (FAST_PROLOGUE_INSN_COUNT);
4070 if (TARGET_PROLOGUE_USING_MOVE)
4071 use_mov = use_fast_prologue_epilogue;
4073 ix86_compute_frame_layout (&frame);
4075 /* Note: AT&T enter does NOT have reversed args. Enter is probably
4076 slower on all targets. Also sdb doesn't like it. */
4078 if (frame_pointer_needed)
4080 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
4081 RTX_FRAME_RELATED_P (insn) = 1;
4083 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
4084 RTX_FRAME_RELATED_P (insn) = 1;
4087 allocate = frame.to_allocate;
4088 /* In case we are dealing only with single register and empty frame,
4089 push is equivalent of the mov+add sequence. */
4090 if (allocate == 0 && frame.nregs <= 1)
4091 use_mov = 0;
4093 if (!use_mov)
4094 ix86_emit_save_regs ();
4095 else
4096 allocate += frame.nregs * UNITS_PER_WORD;
4098 if (allocate == 0)
4100 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
4102 insn = emit_insn (gen_pro_epilogue_adjust_stack
4103 (stack_pointer_rtx, stack_pointer_rtx,
4104 GEN_INT (-allocate)));
4105 RTX_FRAME_RELATED_P (insn) = 1;
4107 else
4109 /* ??? Is this only valid for Win32? */
4111 rtx arg0, sym;
4113 if (TARGET_64BIT)
4114 abort();
4116 arg0 = gen_rtx_REG (SImode, 0);
4117 emit_move_insn (arg0, GEN_INT (allocate));
4119 sym = gen_rtx_MEM (FUNCTION_MODE,
4120 gen_rtx_SYMBOL_REF (Pmode, "_alloca"));
4121 insn = emit_call_insn (gen_call (sym, const0_rtx, constm1_rtx));
4123 CALL_INSN_FUNCTION_USAGE (insn)
4124 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, arg0),
4125 CALL_INSN_FUNCTION_USAGE (insn));
4127 if (use_mov)
4129 if (!frame_pointer_needed || !frame.to_allocate)
4130 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
4131 else
4132 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
4133 -frame.nregs * UNITS_PER_WORD);
4136 #ifdef SUBTARGET_PROLOGUE
4137 SUBTARGET_PROLOGUE;
4138 #endif
4140 if (pic_reg_used)
4141 load_pic_register ();
4143 /* If we are profiling, make sure no instructions are scheduled before
4144 the call to mcount. However, if -fpic, the above call will have
4145 done that. */
4146 if (profile_flag && ! pic_reg_used)
4147 emit_insn (gen_blockage ());
4150 /* Emit code to restore saved registers using MOV insns. First register
4151 is restored from POINTER + OFFSET. */
4152 static void
4153 ix86_emit_restore_regs_using_mov (pointer, offset, maybe_eh_return)
4154 rtx pointer;
4155 int offset;
4156 int maybe_eh_return;
4158 int regno;
4160 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4161 if (ix86_save_reg (regno, maybe_eh_return))
4163 emit_move_insn (gen_rtx_REG (Pmode, regno),
4164 adjust_address (gen_rtx_MEM (Pmode, pointer),
4165 Pmode, offset));
4166 offset += UNITS_PER_WORD;
4170 /* Restore function stack, frame, and registers. */
4172 void
4173 ix86_expand_epilogue (style)
4174 int style;
4176 int regno;
4177 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
4178 struct ix86_frame frame;
4179 HOST_WIDE_INT offset;
4181 ix86_compute_frame_layout (&frame);
4183 /* Calculate start of saved registers relative to ebp. Special care
4184 must be taken for the normal return case of a function using
4185 eh_return: the eax and edx registers are marked as saved, but not
4186 restored along this path. */
4187 offset = frame.nregs;
4188 if (current_function_calls_eh_return && style != 2)
4189 offset -= 2;
4190 offset *= -UNITS_PER_WORD;
4192 /* If we're only restoring one register and sp is not valid then
4193 using a move instruction to restore the register since it's
4194 less work than reloading sp and popping the register.
4196 The default code result in stack adjustment using add/lea instruction,
4197 while this code results in LEAVE instruction (or discrete equivalent),
4198 so it is profitable in some other cases as well. Especially when there
4199 are no registers to restore. We also use this code when TARGET_USE_LEAVE
4200 and there is exactly one register to pop. This heruistic may need some
4201 tuning in future. */
4202 if ((!sp_valid && frame.nregs <= 1)
4203 || (TARGET_EPILOGUE_USING_MOVE
4204 && use_fast_prologue_epilogue
4205 && (frame.nregs > 1 || frame.to_allocate))
4206 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
4207 || (frame_pointer_needed && TARGET_USE_LEAVE
4208 && use_fast_prologue_epilogue && frame.nregs == 1)
4209 || current_function_calls_eh_return)
4211 /* Restore registers. We can use ebp or esp to address the memory
4212 locations. If both are available, default to ebp, since offsets
4213 are known to be small. Only exception is esp pointing directly to the
4214 end of block of saved registers, where we may simplify addressing
4215 mode. */
4217 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
4218 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
4219 frame.to_allocate, style == 2);
4220 else
4221 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
4222 offset, style == 2);
4224 /* eh_return epilogues need %ecx added to the stack pointer. */
4225 if (style == 2)
4227 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
4229 if (frame_pointer_needed)
4231 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
4232 tmp = plus_constant (tmp, UNITS_PER_WORD);
4233 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
4235 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
4236 emit_move_insn (hard_frame_pointer_rtx, tmp);
4238 emit_insn (gen_pro_epilogue_adjust_stack
4239 (stack_pointer_rtx, sa, const0_rtx));
4241 else
4243 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
4244 tmp = plus_constant (tmp, (frame.to_allocate
4245 + frame.nregs * UNITS_PER_WORD));
4246 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
4249 else if (!frame_pointer_needed)
4250 emit_insn (gen_pro_epilogue_adjust_stack
4251 (stack_pointer_rtx, stack_pointer_rtx,
4252 GEN_INT (frame.to_allocate
4253 + frame.nregs * UNITS_PER_WORD)));
4254 /* If not an i386, mov & pop is faster than "leave". */
4255 else if (TARGET_USE_LEAVE || optimize_size || !use_fast_prologue_epilogue)
4256 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4257 else
4259 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
4260 hard_frame_pointer_rtx,
4261 const0_rtx));
4262 if (TARGET_64BIT)
4263 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4264 else
4265 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4268 else
4270 /* First step is to deallocate the stack frame so that we can
4271 pop the registers. */
4272 if (!sp_valid)
4274 if (!frame_pointer_needed)
4275 abort ();
4276 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
4277 hard_frame_pointer_rtx,
4278 GEN_INT (offset)));
4280 else if (frame.to_allocate)
4281 emit_insn (gen_pro_epilogue_adjust_stack
4282 (stack_pointer_rtx, stack_pointer_rtx,
4283 GEN_INT (frame.to_allocate)));
4285 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4286 if (ix86_save_reg (regno, false))
4288 if (TARGET_64BIT)
4289 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
4290 else
4291 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
4293 if (frame_pointer_needed)
4295 /* Leave results in shorter dependency chains on CPUs that are
4296 able to grok it fast. */
4297 if (TARGET_USE_LEAVE)
4298 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4299 else if (TARGET_64BIT)
4300 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4301 else
4302 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4306 /* Sibcall epilogues don't want a return instruction. */
4307 if (style == 0)
4308 return;
4310 if (current_function_pops_args && current_function_args_size)
4312 rtx popc = GEN_INT (current_function_pops_args);
4314 /* i386 can only pop 64K bytes. If asked to pop more, pop
4315 return address, do explicit add, and jump indirectly to the
4316 caller. */
4318 if (current_function_pops_args >= 65536)
4320 rtx ecx = gen_rtx_REG (SImode, 2);
4322 /* There are is no "pascal" calling convention in 64bit ABI. */
4323 if (TARGET_64BIT)
4324 abort();
4326 emit_insn (gen_popsi1 (ecx));
4327 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
4328 emit_jump_insn (gen_return_indirect_internal (ecx));
4330 else
4331 emit_jump_insn (gen_return_pop_internal (popc));
4333 else
4334 emit_jump_insn (gen_return_internal ());
4337 /* Extract the parts of an RTL expression that is a valid memory address
4338 for an instruction. Return false if the structure of the address is
4339 grossly off. */
4341 static int
4342 ix86_decompose_address (addr, out)
4343 register rtx addr;
4344 struct ix86_address *out;
4346 rtx base = NULL_RTX;
4347 rtx index = NULL_RTX;
4348 rtx disp = NULL_RTX;
4349 HOST_WIDE_INT scale = 1;
4350 rtx scale_rtx = NULL_RTX;
4352 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
4353 base = addr;
4354 else if (GET_CODE (addr) == PLUS)
4356 rtx op0 = XEXP (addr, 0);
4357 rtx op1 = XEXP (addr, 1);
4358 enum rtx_code code0 = GET_CODE (op0);
4359 enum rtx_code code1 = GET_CODE (op1);
4361 if (code0 == REG || code0 == SUBREG)
4363 if (code1 == REG || code1 == SUBREG)
4364 index = op0, base = op1; /* index + base */
4365 else
4366 base = op0, disp = op1; /* base + displacement */
4368 else if (code0 == MULT)
4370 index = XEXP (op0, 0);
4371 scale_rtx = XEXP (op0, 1);
4372 if (code1 == REG || code1 == SUBREG)
4373 base = op1; /* index*scale + base */
4374 else
4375 disp = op1; /* index*scale + disp */
4377 else if (code0 == PLUS && GET_CODE (XEXP (op0, 0)) == MULT)
4379 index = XEXP (XEXP (op0, 0), 0); /* index*scale + base + disp */
4380 scale_rtx = XEXP (XEXP (op0, 0), 1);
4381 base = XEXP (op0, 1);
4382 disp = op1;
4384 else if (code0 == PLUS)
4386 index = XEXP (op0, 0); /* index + base + disp */
4387 base = XEXP (op0, 1);
4388 disp = op1;
4390 else
4391 return FALSE;
4393 else if (GET_CODE (addr) == MULT)
4395 index = XEXP (addr, 0); /* index*scale */
4396 scale_rtx = XEXP (addr, 1);
4398 else if (GET_CODE (addr) == ASHIFT)
4400 rtx tmp;
4402 /* We're called for lea too, which implements ashift on occasion. */
4403 index = XEXP (addr, 0);
4404 tmp = XEXP (addr, 1);
4405 if (GET_CODE (tmp) != CONST_INT)
4406 return FALSE;
4407 scale = INTVAL (tmp);
4408 if ((unsigned HOST_WIDE_INT) scale > 3)
4409 return FALSE;
4410 scale = 1 << scale;
4412 else
4413 disp = addr; /* displacement */
4415 /* Extract the integral value of scale. */
4416 if (scale_rtx)
4418 if (GET_CODE (scale_rtx) != CONST_INT)
4419 return FALSE;
4420 scale = INTVAL (scale_rtx);
4423 /* Allow arg pointer and stack pointer as index if there is not scaling */
4424 if (base && index && scale == 1
4425 && (index == arg_pointer_rtx || index == frame_pointer_rtx
4426 || index == stack_pointer_rtx))
4428 rtx tmp = base;
4429 base = index;
4430 index = tmp;
4433 /* Special case: %ebp cannot be encoded as a base without a displacement. */
4434 if ((base == hard_frame_pointer_rtx
4435 || base == frame_pointer_rtx
4436 || base == arg_pointer_rtx) && !disp)
4437 disp = const0_rtx;
4439 /* Special case: on K6, [%esi] makes the instruction vector decoded.
4440 Avoid this by transforming to [%esi+0]. */
4441 if (ix86_cpu == PROCESSOR_K6 && !optimize_size
4442 && base && !index && !disp
4443 && REG_P (base)
4444 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
4445 disp = const0_rtx;
4447 /* Special case: encode reg+reg instead of reg*2. */
4448 if (!base && index && scale && scale == 2)
4449 base = index, scale = 1;
4451 /* Special case: scaling cannot be encoded without base or displacement. */
4452 if (!base && !disp && index && scale != 1)
4453 disp = const0_rtx;
4455 out->base = base;
4456 out->index = index;
4457 out->disp = disp;
4458 out->scale = scale;
4460 return TRUE;
4463 /* Return cost of the memory address x.
4464 For i386, it is better to use a complex address than let gcc copy
4465 the address into a reg and make a new pseudo. But not if the address
4466 requires to two regs - that would mean more pseudos with longer
4467 lifetimes. */
4469 ix86_address_cost (x)
4470 rtx x;
4472 struct ix86_address parts;
4473 int cost = 1;
4475 if (!ix86_decompose_address (x, &parts))
4476 abort ();
4478 /* More complex memory references are better. */
4479 if (parts.disp && parts.disp != const0_rtx)
4480 cost--;
4482 /* Attempt to minimize number of registers in the address. */
4483 if ((parts.base
4484 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
4485 || (parts.index
4486 && (!REG_P (parts.index)
4487 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
4488 cost++;
4490 if (parts.base
4491 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
4492 && parts.index
4493 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
4494 && parts.base != parts.index)
4495 cost++;
4497 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
4498 since it's predecode logic can't detect the length of instructions
4499 and it degenerates to vector decoded. Increase cost of such
4500 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
4501 to split such addresses or even refuse such addresses at all.
4503 Following addressing modes are affected:
4504 [base+scale*index]
4505 [scale*index+disp]
4506 [base+index]
4508 The first and last case may be avoidable by explicitly coding the zero in
4509 memory address, but I don't have AMD-K6 machine handy to check this
4510 theory. */
4512 if (TARGET_K6
4513 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
4514 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
4515 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
4516 cost += 10;
4518 return cost;
4521 /* If X is a machine specific address (i.e. a symbol or label being
4522 referenced as a displacement from the GOT implemented using an
4523 UNSPEC), then return the base term. Otherwise return X. */
4526 ix86_find_base_term (x)
4527 rtx x;
4529 rtx term;
4531 if (TARGET_64BIT)
4533 if (GET_CODE (x) != CONST)
4534 return x;
4535 term = XEXP (x, 0);
4536 if (GET_CODE (term) == PLUS
4537 && (GET_CODE (XEXP (term, 1)) == CONST_INT
4538 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
4539 term = XEXP (term, 0);
4540 if (GET_CODE (term) != UNSPEC
4541 || XVECLEN (term, 0) != 1
4542 || XINT (term, 1) != 15)
4543 return x;
4545 term = XVECEXP (term, 0, 0);
4547 if (GET_CODE (term) != SYMBOL_REF
4548 && GET_CODE (term) != LABEL_REF)
4549 return x;
4551 return term;
4554 if (GET_CODE (x) != PLUS
4555 || XEXP (x, 0) != pic_offset_table_rtx
4556 || GET_CODE (XEXP (x, 1)) != CONST)
4557 return x;
4559 term = XEXP (XEXP (x, 1), 0);
4561 if (GET_CODE (term) == PLUS && GET_CODE (XEXP (term, 1)) == CONST_INT)
4562 term = XEXP (term, 0);
4564 if (GET_CODE (term) != UNSPEC
4565 || XVECLEN (term, 0) != 1
4566 || XINT (term, 1) != 7)
4567 return x;
4569 term = XVECEXP (term, 0, 0);
4571 if (GET_CODE (term) != SYMBOL_REF
4572 && GET_CODE (term) != LABEL_REF)
4573 return x;
4575 return term;
4578 /* Determine if a given CONST RTX is a valid memory displacement
4579 in PIC mode. */
4582 legitimate_pic_address_disp_p (disp)
4583 register rtx disp;
4585 /* In 64bit mode we can allow direct addresses of symbols and labels
4586 when they are not dynamic symbols. */
4587 if (TARGET_64BIT)
4589 rtx x = disp;
4590 if (GET_CODE (disp) == CONST)
4591 x = XEXP (disp, 0);
4592 /* ??? Handle PIC code models */
4593 if (GET_CODE (x) == PLUS
4594 && (GET_CODE (XEXP (x, 1)) == CONST_INT
4595 && ix86_cmodel == CM_SMALL_PIC
4596 && INTVAL (XEXP (x, 1)) < 1024*1024*1024
4597 && INTVAL (XEXP (x, 1)) > -1024*1024*1024))
4598 x = XEXP (x, 0);
4599 if (local_symbolic_operand (x, Pmode))
4600 return 1;
4602 if (GET_CODE (disp) != CONST)
4603 return 0;
4604 disp = XEXP (disp, 0);
4606 if (TARGET_64BIT)
4608 /* We are unsafe to allow PLUS expressions. This limit allowed distance
4609 of GOT tables. We should not need these anyway. */
4610 if (GET_CODE (disp) != UNSPEC
4611 || XVECLEN (disp, 0) != 1
4612 || XINT (disp, 1) != 15)
4613 return 0;
4615 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
4616 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
4617 return 0;
4618 return 1;
4621 if (GET_CODE (disp) == PLUS)
4623 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
4624 return 0;
4625 disp = XEXP (disp, 0);
4628 if (GET_CODE (disp) != UNSPEC
4629 || XVECLEN (disp, 0) != 1)
4630 return 0;
4632 /* Must be @GOT or @GOTOFF. */
4633 switch (XINT (disp, 1))
4635 case 6: /* @GOT */
4636 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
4638 case 7: /* @GOTOFF */
4639 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
4642 return 0;
4645 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
4646 memory address for an instruction. The MODE argument is the machine mode
4647 for the MEM expression that wants to use this address.
4649 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
4650 convert common non-canonical forms to canonical form so that they will
4651 be recognized. */
4654 legitimate_address_p (mode, addr, strict)
4655 enum machine_mode mode;
4656 register rtx addr;
4657 int strict;
4659 struct ix86_address parts;
4660 rtx base, index, disp;
4661 HOST_WIDE_INT scale;
4662 const char *reason = NULL;
4663 rtx reason_rtx = NULL_RTX;
4665 if (TARGET_DEBUG_ADDR)
4667 fprintf (stderr,
4668 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
4669 GET_MODE_NAME (mode), strict);
4670 debug_rtx (addr);
4673 if (! ix86_decompose_address (addr, &parts))
4675 reason = "decomposition failed";
4676 goto report_error;
4679 base = parts.base;
4680 index = parts.index;
4681 disp = parts.disp;
4682 scale = parts.scale;
4684 /* Validate base register.
4686 Don't allow SUBREG's here, it can lead to spill failures when the base
4687 is one word out of a two word structure, which is represented internally
4688 as a DImode int. */
4690 if (base)
4692 reason_rtx = base;
4694 if (GET_CODE (base) != REG)
4696 reason = "base is not a register";
4697 goto report_error;
4700 if (GET_MODE (base) != Pmode)
4702 reason = "base is not in Pmode";
4703 goto report_error;
4706 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (base))
4707 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (base)))
4709 reason = "base is not valid";
4710 goto report_error;
4714 /* Validate index register.
4716 Don't allow SUBREG's here, it can lead to spill failures when the index
4717 is one word out of a two word structure, which is represented internally
4718 as a DImode int. */
4720 if (index)
4722 reason_rtx = index;
4724 if (GET_CODE (index) != REG)
4726 reason = "index is not a register";
4727 goto report_error;
4730 if (GET_MODE (index) != Pmode)
4732 reason = "index is not in Pmode";
4733 goto report_error;
4736 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (index))
4737 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (index)))
4739 reason = "index is not valid";
4740 goto report_error;
4744 /* Validate scale factor. */
4745 if (scale != 1)
4747 reason_rtx = GEN_INT (scale);
4748 if (!index)
4750 reason = "scale without index";
4751 goto report_error;
4754 if (scale != 2 && scale != 4 && scale != 8)
4756 reason = "scale is not a valid multiplier";
4757 goto report_error;
4761 /* Validate displacement. */
4762 if (disp)
4764 reason_rtx = disp;
4766 if (!CONSTANT_ADDRESS_P (disp))
4768 reason = "displacement is not constant";
4769 goto report_error;
4772 if (TARGET_64BIT)
4774 if (!x86_64_sign_extended_value (disp))
4776 reason = "displacement is out of range";
4777 goto report_error;
4780 else
4782 if (GET_CODE (disp) == CONST_DOUBLE)
4784 reason = "displacement is a const_double";
4785 goto report_error;
4789 if (flag_pic && SYMBOLIC_CONST (disp))
4791 if (TARGET_64BIT && (index || base))
4793 reason = "non-constant pic memory reference";
4794 goto report_error;
4796 if (! legitimate_pic_address_disp_p (disp))
4798 reason = "displacement is an invalid pic construct";
4799 goto report_error;
4802 /* This code used to verify that a symbolic pic displacement
4803 includes the pic_offset_table_rtx register.
4805 While this is good idea, unfortunately these constructs may
4806 be created by "adds using lea" optimization for incorrect
4807 code like:
4809 int a;
4810 int foo(int i)
4812 return *(&a+i);
4815 This code is nonsensical, but results in addressing
4816 GOT table with pic_offset_table_rtx base. We can't
4817 just refuse it easily, since it gets matched by
4818 "addsi3" pattern, that later gets split to lea in the
4819 case output register differs from input. While this
4820 can be handled by separate addsi pattern for this case
4821 that never results in lea, this seems to be easier and
4822 correct fix for crash to disable this test. */
4824 else if (HALF_PIC_P ())
4826 if (! HALF_PIC_ADDRESS_P (disp)
4827 || (base != NULL_RTX || index != NULL_RTX))
4829 reason = "displacement is an invalid half-pic reference";
4830 goto report_error;
4835 /* Everything looks valid. */
4836 if (TARGET_DEBUG_ADDR)
4837 fprintf (stderr, "Success.\n");
4838 return TRUE;
4840 report_error:
4841 if (TARGET_DEBUG_ADDR)
4843 fprintf (stderr, "Error: %s\n", reason);
4844 debug_rtx (reason_rtx);
4846 return FALSE;
4849 /* Return an unique alias set for the GOT. */
4851 static HOST_WIDE_INT
4852 ix86_GOT_alias_set ()
4854 static HOST_WIDE_INT set = -1;
4855 if (set == -1)
4856 set = new_alias_set ();
4857 return set;
4860 /* Return a legitimate reference for ORIG (an address) using the
4861 register REG. If REG is 0, a new pseudo is generated.
4863 There are two types of references that must be handled:
4865 1. Global data references must load the address from the GOT, via
4866 the PIC reg. An insn is emitted to do this load, and the reg is
4867 returned.
4869 2. Static data references, constant pool addresses, and code labels
4870 compute the address as an offset from the GOT, whose base is in
4871 the PIC reg. Static data objects have SYMBOL_REF_FLAG set to
4872 differentiate them from global data objects. The returned
4873 address is the PIC reg + an unspec constant.
4875 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
4876 reg also appears in the address. */
4879 legitimize_pic_address (orig, reg)
4880 rtx orig;
4881 rtx reg;
4883 rtx addr = orig;
4884 rtx new = orig;
4885 rtx base;
4887 if (local_symbolic_operand (addr, Pmode))
4889 /* In 64bit mode we can address such objects directly. */
4890 if (TARGET_64BIT)
4891 new = addr;
4892 else
4894 /* This symbol may be referenced via a displacement from the PIC
4895 base address (@GOTOFF). */
4897 current_function_uses_pic_offset_table = 1;
4898 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), 7);
4899 new = gen_rtx_CONST (Pmode, new);
4900 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
4902 if (reg != 0)
4904 emit_move_insn (reg, new);
4905 new = reg;
4909 else if (GET_CODE (addr) == SYMBOL_REF)
4911 if (TARGET_64BIT)
4913 current_function_uses_pic_offset_table = 1;
4914 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), 15);
4915 new = gen_rtx_CONST (Pmode, new);
4916 new = gen_rtx_MEM (Pmode, new);
4917 RTX_UNCHANGING_P (new) = 1;
4918 set_mem_alias_set (new, ix86_GOT_alias_set ());
4920 if (reg == 0)
4921 reg = gen_reg_rtx (Pmode);
4922 /* Use directly gen_movsi, otherwise the address is loaded
4923 into register for CSE. We don't want to CSE this addresses,
4924 instead we CSE addresses from the GOT table, so skip this. */
4925 emit_insn (gen_movsi (reg, new));
4926 new = reg;
4928 else
4930 /* This symbol must be referenced via a load from the
4931 Global Offset Table (@GOT). */
4933 current_function_uses_pic_offset_table = 1;
4934 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), 6);
4935 new = gen_rtx_CONST (Pmode, new);
4936 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
4937 new = gen_rtx_MEM (Pmode, new);
4938 RTX_UNCHANGING_P (new) = 1;
4939 set_mem_alias_set (new, ix86_GOT_alias_set ());
4941 if (reg == 0)
4942 reg = gen_reg_rtx (Pmode);
4943 emit_move_insn (reg, new);
4944 new = reg;
4947 else
4949 if (GET_CODE (addr) == CONST)
4951 addr = XEXP (addr, 0);
4952 if (GET_CODE (addr) == UNSPEC)
4954 /* Check that the unspec is one of the ones we generate? */
4956 else if (GET_CODE (addr) != PLUS)
4957 abort ();
4959 if (GET_CODE (addr) == PLUS)
4961 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
4963 /* Check first to see if this is a constant offset from a @GOTOFF
4964 symbol reference. */
4965 if (local_symbolic_operand (op0, Pmode)
4966 && GET_CODE (op1) == CONST_INT)
4968 if (!TARGET_64BIT)
4970 current_function_uses_pic_offset_table = 1;
4971 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0), 7);
4972 new = gen_rtx_PLUS (Pmode, new, op1);
4973 new = gen_rtx_CONST (Pmode, new);
4974 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
4976 if (reg != 0)
4978 emit_move_insn (reg, new);
4979 new = reg;
4982 else
4984 /* ??? We need to limit offsets here. */
4987 else
4989 base = legitimize_pic_address (XEXP (addr, 0), reg);
4990 new = legitimize_pic_address (XEXP (addr, 1),
4991 base == reg ? NULL_RTX : reg);
4993 if (GET_CODE (new) == CONST_INT)
4994 new = plus_constant (base, INTVAL (new));
4995 else
4997 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
4999 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
5000 new = XEXP (new, 1);
5002 new = gen_rtx_PLUS (Pmode, base, new);
5007 return new;
5010 /* Try machine-dependent ways of modifying an illegitimate address
5011 to be legitimate. If we find one, return the new, valid address.
5012 This macro is used in only one place: `memory_address' in explow.c.
5014 OLDX is the address as it was before break_out_memory_refs was called.
5015 In some cases it is useful to look at this to decide what needs to be done.
5017 MODE and WIN are passed so that this macro can use
5018 GO_IF_LEGITIMATE_ADDRESS.
5020 It is always safe for this macro to do nothing. It exists to recognize
5021 opportunities to optimize the output.
5023 For the 80386, we handle X+REG by loading X into a register R and
5024 using R+REG. R will go in a general reg and indexing will be used.
5025 However, if REG is a broken-out memory address or multiplication,
5026 nothing needs to be done because REG can certainly go in a general reg.
5028 When -fpic is used, special handling is needed for symbolic references.
5029 See comments by legitimize_pic_address in i386.c for details. */
5032 legitimize_address (x, oldx, mode)
5033 register rtx x;
5034 register rtx oldx ATTRIBUTE_UNUSED;
5035 enum machine_mode mode;
5037 int changed = 0;
5038 unsigned log;
5040 if (TARGET_DEBUG_ADDR)
5042 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
5043 GET_MODE_NAME (mode));
5044 debug_rtx (x);
5047 if (flag_pic && SYMBOLIC_CONST (x))
5048 return legitimize_pic_address (x, 0);
5050 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
5051 if (GET_CODE (x) == ASHIFT
5052 && GET_CODE (XEXP (x, 1)) == CONST_INT
5053 && (log = (unsigned)exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
5055 changed = 1;
5056 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
5057 GEN_INT (1 << log));
5060 if (GET_CODE (x) == PLUS)
5062 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
5064 if (GET_CODE (XEXP (x, 0)) == ASHIFT
5065 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
5066 && (log = (unsigned)exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
5068 changed = 1;
5069 XEXP (x, 0) = gen_rtx_MULT (Pmode,
5070 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
5071 GEN_INT (1 << log));
5074 if (GET_CODE (XEXP (x, 1)) == ASHIFT
5075 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
5076 && (log = (unsigned)exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
5078 changed = 1;
5079 XEXP (x, 1) = gen_rtx_MULT (Pmode,
5080 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
5081 GEN_INT (1 << log));
5084 /* Put multiply first if it isn't already. */
5085 if (GET_CODE (XEXP (x, 1)) == MULT)
5087 rtx tmp = XEXP (x, 0);
5088 XEXP (x, 0) = XEXP (x, 1);
5089 XEXP (x, 1) = tmp;
5090 changed = 1;
5093 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
5094 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
5095 created by virtual register instantiation, register elimination, and
5096 similar optimizations. */
5097 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
5099 changed = 1;
5100 x = gen_rtx_PLUS (Pmode,
5101 gen_rtx_PLUS (Pmode, XEXP (x, 0),
5102 XEXP (XEXP (x, 1), 0)),
5103 XEXP (XEXP (x, 1), 1));
5106 /* Canonicalize
5107 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
5108 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
5109 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
5110 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5111 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
5112 && CONSTANT_P (XEXP (x, 1)))
5114 rtx constant;
5115 rtx other = NULL_RTX;
5117 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5119 constant = XEXP (x, 1);
5120 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
5122 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
5124 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
5125 other = XEXP (x, 1);
5127 else
5128 constant = 0;
5130 if (constant)
5132 changed = 1;
5133 x = gen_rtx_PLUS (Pmode,
5134 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
5135 XEXP (XEXP (XEXP (x, 0), 1), 0)),
5136 plus_constant (other, INTVAL (constant)));
5140 if (changed && legitimate_address_p (mode, x, FALSE))
5141 return x;
5143 if (GET_CODE (XEXP (x, 0)) == MULT)
5145 changed = 1;
5146 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
5149 if (GET_CODE (XEXP (x, 1)) == MULT)
5151 changed = 1;
5152 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
5155 if (changed
5156 && GET_CODE (XEXP (x, 1)) == REG
5157 && GET_CODE (XEXP (x, 0)) == REG)
5158 return x;
5160 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
5162 changed = 1;
5163 x = legitimize_pic_address (x, 0);
5166 if (changed && legitimate_address_p (mode, x, FALSE))
5167 return x;
5169 if (GET_CODE (XEXP (x, 0)) == REG)
5171 register rtx temp = gen_reg_rtx (Pmode);
5172 register rtx val = force_operand (XEXP (x, 1), temp);
5173 if (val != temp)
5174 emit_move_insn (temp, val);
5176 XEXP (x, 1) = temp;
5177 return x;
5180 else if (GET_CODE (XEXP (x, 1)) == REG)
5182 register rtx temp = gen_reg_rtx (Pmode);
5183 register rtx val = force_operand (XEXP (x, 0), temp);
5184 if (val != temp)
5185 emit_move_insn (temp, val);
5187 XEXP (x, 0) = temp;
5188 return x;
5192 return x;
5195 /* Print an integer constant expression in assembler syntax. Addition
5196 and subtraction are the only arithmetic that may appear in these
5197 expressions. FILE is the stdio stream to write to, X is the rtx, and
5198 CODE is the operand print code from the output string. */
5200 static void
5201 output_pic_addr_const (file, x, code)
5202 FILE *file;
5203 rtx x;
5204 int code;
5206 char buf[256];
5208 switch (GET_CODE (x))
5210 case PC:
5211 if (flag_pic)
5212 putc ('.', file);
5213 else
5214 abort ();
5215 break;
5217 case SYMBOL_REF:
5218 assemble_name (file, XSTR (x, 0));
5219 if (code == 'P' && ! SYMBOL_REF_FLAG (x))
5220 fputs ("@PLT", file);
5221 break;
5223 case LABEL_REF:
5224 x = XEXP (x, 0);
5225 /* FALLTHRU */
5226 case CODE_LABEL:
5227 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
5228 assemble_name (asm_out_file, buf);
5229 break;
5231 case CONST_INT:
5232 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
5233 break;
5235 case CONST:
5236 /* This used to output parentheses around the expression,
5237 but that does not work on the 386 (either ATT or BSD assembler). */
5238 output_pic_addr_const (file, XEXP (x, 0), code);
5239 break;
5241 case CONST_DOUBLE:
5242 if (GET_MODE (x) == VOIDmode)
5244 /* We can use %d if the number is <32 bits and positive. */
5245 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
5246 fprintf (file, "0x%lx%08lx",
5247 (unsigned long) CONST_DOUBLE_HIGH (x),
5248 (unsigned long) CONST_DOUBLE_LOW (x));
5249 else
5250 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
5252 else
5253 /* We can't handle floating point constants;
5254 PRINT_OPERAND must handle them. */
5255 output_operand_lossage ("floating constant misused");
5256 break;
5258 case PLUS:
5259 /* Some assemblers need integer constants to appear first. */
5260 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
5262 output_pic_addr_const (file, XEXP (x, 0), code);
5263 putc ('+', file);
5264 output_pic_addr_const (file, XEXP (x, 1), code);
5266 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5268 output_pic_addr_const (file, XEXP (x, 1), code);
5269 putc ('+', file);
5270 output_pic_addr_const (file, XEXP (x, 0), code);
5272 else
5273 abort ();
5274 break;
5276 case MINUS:
5277 putc (ASSEMBLER_DIALECT ? '(' : '[', file);
5278 output_pic_addr_const (file, XEXP (x, 0), code);
5279 putc ('-', file);
5280 output_pic_addr_const (file, XEXP (x, 1), code);
5281 putc (ASSEMBLER_DIALECT ? ')' : ']', file);
5282 break;
5284 case UNSPEC:
5285 if (XVECLEN (x, 0) != 1)
5286 abort ();
5287 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
5288 switch (XINT (x, 1))
5290 case 6:
5291 fputs ("@GOT", file);
5292 break;
5293 case 7:
5294 fputs ("@GOTOFF", file);
5295 break;
5296 case 8:
5297 fputs ("@PLT", file);
5298 break;
5299 case 15:
5300 fputs ("@GOTPCREL(%RIP)", file);
5301 break;
5302 default:
5303 output_operand_lossage ("invalid UNSPEC as operand");
5304 break;
5306 break;
5308 default:
5309 output_operand_lossage ("invalid expression as operand");
5313 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
5314 We need to handle our special PIC relocations. */
5316 void
5317 i386_dwarf_output_addr_const (file, x)
5318 FILE *file;
5319 rtx x;
5321 #ifdef ASM_QUAD
5322 fprintf (file, "%s", TARGET_64BIT ? ASM_QUAD : ASM_LONG);
5323 #else
5324 if (TARGET_64BIT)
5325 abort ();
5326 fprintf (file, "%s", ASM_LONG);
5327 #endif
5328 if (flag_pic)
5329 output_pic_addr_const (file, x, '\0');
5330 else
5331 output_addr_const (file, x);
5332 fputc ('\n', file);
5335 /* In the name of slightly smaller debug output, and to cater to
5336 general assembler losage, recognize PIC+GOTOFF and turn it back
5337 into a direct symbol reference. */
5340 i386_simplify_dwarf_addr (orig_x)
5341 rtx orig_x;
5343 rtx x = orig_x;
5345 if (TARGET_64BIT)
5347 if (GET_CODE (x) != CONST
5348 || GET_CODE (XEXP (x, 0)) != UNSPEC
5349 || XINT (XEXP (x, 0), 1) != 15)
5350 return orig_x;
5351 return XVECEXP (XEXP (x, 0), 0, 0);
5354 if (GET_CODE (x) != PLUS
5355 || GET_CODE (XEXP (x, 0)) != REG
5356 || GET_CODE (XEXP (x, 1)) != CONST)
5357 return orig_x;
5359 x = XEXP (XEXP (x, 1), 0);
5360 if (GET_CODE (x) == UNSPEC
5361 && (XINT (x, 1) == 6
5362 || XINT (x, 1) == 7))
5363 return XVECEXP (x, 0, 0);
5365 if (GET_CODE (x) == PLUS
5366 && GET_CODE (XEXP (x, 0)) == UNSPEC
5367 && GET_CODE (XEXP (x, 1)) == CONST_INT
5368 && (XINT (XEXP (x, 0), 1) == 6
5369 || XINT (XEXP (x, 0), 1) == 7))
5370 return gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
5372 return orig_x;
5375 static void
5376 put_condition_code (code, mode, reverse, fp, file)
5377 enum rtx_code code;
5378 enum machine_mode mode;
5379 int reverse, fp;
5380 FILE *file;
5382 const char *suffix;
5384 if (mode == CCFPmode || mode == CCFPUmode)
5386 enum rtx_code second_code, bypass_code;
5387 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
5388 if (bypass_code != NIL || second_code != NIL)
5389 abort();
5390 code = ix86_fp_compare_code_to_integer (code);
5391 mode = CCmode;
5393 if (reverse)
5394 code = reverse_condition (code);
5396 switch (code)
5398 case EQ:
5399 suffix = "e";
5400 break;
5401 case NE:
5402 suffix = "ne";
5403 break;
5404 case GT:
5405 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
5406 abort ();
5407 suffix = "g";
5408 break;
5409 case GTU:
5410 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
5411 Those same assemblers have the same but opposite losage on cmov. */
5412 if (mode != CCmode)
5413 abort ();
5414 suffix = fp ? "nbe" : "a";
5415 break;
5416 case LT:
5417 if (mode == CCNOmode || mode == CCGOCmode)
5418 suffix = "s";
5419 else if (mode == CCmode || mode == CCGCmode)
5420 suffix = "l";
5421 else
5422 abort ();
5423 break;
5424 case LTU:
5425 if (mode != CCmode)
5426 abort ();
5427 suffix = "b";
5428 break;
5429 case GE:
5430 if (mode == CCNOmode || mode == CCGOCmode)
5431 suffix = "ns";
5432 else if (mode == CCmode || mode == CCGCmode)
5433 suffix = "ge";
5434 else
5435 abort ();
5436 break;
5437 case GEU:
5438 /* ??? As above. */
5439 if (mode != CCmode)
5440 abort ();
5441 suffix = fp ? "nb" : "ae";
5442 break;
5443 case LE:
5444 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
5445 abort ();
5446 suffix = "le";
5447 break;
5448 case LEU:
5449 if (mode != CCmode)
5450 abort ();
5451 suffix = "be";
5452 break;
5453 case UNORDERED:
5454 suffix = fp ? "u" : "p";
5455 break;
5456 case ORDERED:
5457 suffix = fp ? "nu" : "np";
5458 break;
5459 default:
5460 abort ();
5462 fputs (suffix, file);
5465 void
5466 print_reg (x, code, file)
5467 rtx x;
5468 int code;
5469 FILE *file;
5471 if (REGNO (x) == ARG_POINTER_REGNUM
5472 || REGNO (x) == FRAME_POINTER_REGNUM
5473 || REGNO (x) == FLAGS_REG
5474 || REGNO (x) == FPSR_REG)
5475 abort ();
5477 if (ASSEMBLER_DIALECT == 0 || USER_LABEL_PREFIX[0] == 0)
5478 putc ('%', file);
5480 if (code == 'w' || MMX_REG_P (x))
5481 code = 2;
5482 else if (code == 'b')
5483 code = 1;
5484 else if (code == 'k')
5485 code = 4;
5486 else if (code == 'q')
5487 code = 8;
5488 else if (code == 'y')
5489 code = 3;
5490 else if (code == 'h')
5491 code = 0;
5492 else
5493 code = GET_MODE_SIZE (GET_MODE (x));
5495 /* Irritatingly, AMD extended registers use different naming convention
5496 from the normal registers. */
5497 if (REX_INT_REG_P (x))
5499 if (!TARGET_64BIT)
5500 abort ();
5501 switch (code)
5503 case 0:
5504 error ("extended registers have no high halves");
5505 break;
5506 case 1:
5507 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
5508 break;
5509 case 2:
5510 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
5511 break;
5512 case 4:
5513 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
5514 break;
5515 case 8:
5516 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
5517 break;
5518 default:
5519 error ("unsupported operand size for extended register");
5520 break;
5522 return;
5524 switch (code)
5526 case 3:
5527 if (STACK_TOP_P (x))
5529 fputs ("st(0)", file);
5530 break;
5532 /* FALLTHRU */
5533 case 8:
5534 case 4:
5535 case 12:
5536 if (! ANY_FP_REG_P (x))
5537 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
5538 /* FALLTHRU */
5539 case 16:
5540 case 2:
5541 fputs (hi_reg_name[REGNO (x)], file);
5542 break;
5543 case 1:
5544 fputs (qi_reg_name[REGNO (x)], file);
5545 break;
5546 case 0:
5547 fputs (qi_high_reg_name[REGNO (x)], file);
5548 break;
5549 default:
5550 abort ();
5554 /* Meaning of CODE:
5555 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
5556 C -- print opcode suffix for set/cmov insn.
5557 c -- like C, but print reversed condition
5558 F,f -- likewise, but for floating-point.
5559 R -- print the prefix for register names.
5560 z -- print the opcode suffix for the size of the current operand.
5561 * -- print a star (in certain assembler syntax)
5562 A -- print an absolute memory reference.
5563 w -- print the operand as if it's a "word" (HImode) even if it isn't.
5564 s -- print a shift double count, followed by the assemblers argument
5565 delimiter.
5566 b -- print the QImode name of the register for the indicated operand.
5567 %b0 would print %al if operands[0] is reg 0.
5568 w -- likewise, print the HImode name of the register.
5569 k -- likewise, print the SImode name of the register.
5570 q -- likewise, print the DImode name of the register.
5571 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
5572 y -- print "st(0)" instead of "st" as a register.
5573 D -- print condition for SSE cmp instruction.
5574 P -- if PIC, print an @PLT suffix.
5575 X -- don't print any sort of PIC '@' suffix for a symbol.
5578 void
5579 print_operand (file, x, code)
5580 FILE *file;
5581 rtx x;
5582 int code;
5584 if (code)
5586 switch (code)
5588 case '*':
5589 if (ASSEMBLER_DIALECT == 0)
5590 putc ('*', file);
5591 return;
5593 case 'A':
5594 if (ASSEMBLER_DIALECT == 0)
5595 putc ('*', file);
5596 else if (ASSEMBLER_DIALECT == 1)
5598 /* Intel syntax. For absolute addresses, registers should not
5599 be surrounded by braces. */
5600 if (GET_CODE (x) != REG)
5602 putc ('[', file);
5603 PRINT_OPERAND (file, x, 0);
5604 putc (']', file);
5605 return;
5609 PRINT_OPERAND (file, x, 0);
5610 return;
5613 case 'L':
5614 if (ASSEMBLER_DIALECT == 0)
5615 putc ('l', file);
5616 return;
5618 case 'W':
5619 if (ASSEMBLER_DIALECT == 0)
5620 putc ('w', file);
5621 return;
5623 case 'B':
5624 if (ASSEMBLER_DIALECT == 0)
5625 putc ('b', file);
5626 return;
5628 case 'Q':
5629 if (ASSEMBLER_DIALECT == 0)
5630 putc ('l', file);
5631 return;
5633 case 'S':
5634 if (ASSEMBLER_DIALECT == 0)
5635 putc ('s', file);
5636 return;
5638 case 'T':
5639 if (ASSEMBLER_DIALECT == 0)
5640 putc ('t', file);
5641 return;
5643 case 'z':
5644 /* 387 opcodes don't get size suffixes if the operands are
5645 registers. */
5647 if (STACK_REG_P (x))
5648 return;
5650 /* this is the size of op from size of operand */
5651 switch (GET_MODE_SIZE (GET_MODE (x)))
5653 case 2:
5654 #ifdef HAVE_GAS_FILDS_FISTS
5655 putc ('s', file);
5656 #endif
5657 return;
5659 case 4:
5660 if (GET_MODE (x) == SFmode)
5662 putc ('s', file);
5663 return;
5665 else
5666 putc ('l', file);
5667 return;
5669 case 12:
5670 case 16:
5671 putc ('t', file);
5672 return;
5674 case 8:
5675 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
5677 #ifdef GAS_MNEMONICS
5678 putc ('q', file);
5679 #else
5680 putc ('l', file);
5681 putc ('l', file);
5682 #endif
5684 else
5685 putc ('l', file);
5686 return;
5688 default:
5689 abort ();
5692 case 'b':
5693 case 'w':
5694 case 'k':
5695 case 'q':
5696 case 'h':
5697 case 'y':
5698 case 'X':
5699 case 'P':
5700 break;
5702 case 's':
5703 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
5705 PRINT_OPERAND (file, x, 0);
5706 putc (',', file);
5708 return;
5710 case 'D':
5711 /* Little bit of braindamage here. The SSE compare instructions
5712 does use completely different names for the comparisons that the
5713 fp conditional moves. */
5714 switch (GET_CODE (x))
5716 case EQ:
5717 case UNEQ:
5718 fputs ("eq", file);
5719 break;
5720 case LT:
5721 case UNLT:
5722 fputs ("lt", file);
5723 break;
5724 case LE:
5725 case UNLE:
5726 fputs ("le", file);
5727 break;
5728 case UNORDERED:
5729 fputs ("unord", file);
5730 break;
5731 case NE:
5732 case LTGT:
5733 fputs ("neq", file);
5734 break;
5735 case UNGE:
5736 case GE:
5737 fputs ("nlt", file);
5738 break;
5739 case UNGT:
5740 case GT:
5741 fputs ("nle", file);
5742 break;
5743 case ORDERED:
5744 fputs ("ord", file);
5745 break;
5746 default:
5747 abort ();
5748 break;
5750 return;
5751 case 'C':
5752 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
5753 return;
5754 case 'F':
5755 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
5756 return;
5758 /* Like above, but reverse condition */
5759 case 'c':
5760 /* Check to see if argument to %c is really a constant
5761 and not a condition code which needs to be reversed. */
5762 if (GET_RTX_CLASS (GET_CODE (x)) != '<')
5764 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
5765 return;
5767 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
5768 return;
5769 case 'f':
5770 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
5771 return;
5772 case '+':
5774 rtx x;
5776 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
5777 return;
5779 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
5780 if (x)
5782 int pred_val = INTVAL (XEXP (x, 0));
5784 if (pred_val < REG_BR_PROB_BASE * 45 / 100
5785 || pred_val > REG_BR_PROB_BASE * 55 / 100)
5787 int taken = pred_val > REG_BR_PROB_BASE / 2;
5788 int cputaken = final_forward_branch_p (current_output_insn) == 0;
5790 /* Emit hints only in the case default branch prediction
5791 heruistics would fail. */
5792 if (taken != cputaken)
5794 /* We use 3e (DS) prefix for taken branches and
5795 2e (CS) prefix for not taken branches. */
5796 if (taken)
5797 fputs ("ds ; ", file);
5798 else
5799 fputs ("cs ; ", file);
5803 return;
5805 default:
5807 char str[50];
5808 sprintf (str, "invalid operand code `%c'", code);
5809 output_operand_lossage (str);
5814 if (GET_CODE (x) == REG)
5816 PRINT_REG (x, code, file);
5819 else if (GET_CODE (x) == MEM)
5821 /* No `byte ptr' prefix for call instructions. */
5822 if (ASSEMBLER_DIALECT != 0 && code != 'X' && code != 'P')
5824 const char * size;
5825 switch (GET_MODE_SIZE (GET_MODE (x)))
5827 case 1: size = "BYTE"; break;
5828 case 2: size = "WORD"; break;
5829 case 4: size = "DWORD"; break;
5830 case 8: size = "QWORD"; break;
5831 case 12: size = "XWORD"; break;
5832 case 16: size = "XMMWORD"; break;
5833 default:
5834 abort ();
5837 /* Check for explicit size override (codes 'b', 'w' and 'k') */
5838 if (code == 'b')
5839 size = "BYTE";
5840 else if (code == 'w')
5841 size = "WORD";
5842 else if (code == 'k')
5843 size = "DWORD";
5845 fputs (size, file);
5846 fputs (" PTR ", file);
5849 x = XEXP (x, 0);
5850 if (flag_pic && CONSTANT_ADDRESS_P (x))
5851 output_pic_addr_const (file, x, code);
5852 /* Avoid (%rip) for call operands. */
5853 else if (CONSTANT_ADDRESS_P (x) && code =='P'
5854 && GET_CODE (x) != CONST_INT)
5855 output_addr_const (file, x);
5856 else
5857 output_address (x);
5860 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
5862 REAL_VALUE_TYPE r;
5863 long l;
5865 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
5866 REAL_VALUE_TO_TARGET_SINGLE (r, l);
5868 if (ASSEMBLER_DIALECT == 0)
5869 putc ('$', file);
5870 fprintf (file, "0x%lx", l);
5873 /* These float cases don't actually occur as immediate operands. */
5874 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
5876 REAL_VALUE_TYPE r;
5877 char dstr[30];
5879 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
5880 REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr);
5881 fprintf (file, "%s", dstr);
5884 else if (GET_CODE (x) == CONST_DOUBLE
5885 && (GET_MODE (x) == XFmode || GET_MODE (x) == TFmode))
5887 REAL_VALUE_TYPE r;
5888 char dstr[30];
5890 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
5891 REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr);
5892 fprintf (file, "%s", dstr);
5894 else
5896 if (code != 'P')
5898 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
5900 if (ASSEMBLER_DIALECT == 0)
5901 putc ('$', file);
5903 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
5904 || GET_CODE (x) == LABEL_REF)
5906 if (ASSEMBLER_DIALECT == 0)
5907 putc ('$', file);
5908 else
5909 fputs ("OFFSET FLAT:", file);
5912 if (GET_CODE (x) == CONST_INT)
5913 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
5914 else if (flag_pic)
5915 output_pic_addr_const (file, x, code);
5916 else
5917 output_addr_const (file, x);
5921 /* Print a memory operand whose address is ADDR. */
5923 void
5924 print_operand_address (file, addr)
5925 FILE *file;
5926 register rtx addr;
5928 struct ix86_address parts;
5929 rtx base, index, disp;
5930 int scale;
5932 if (! ix86_decompose_address (addr, &parts))
5933 abort ();
5935 base = parts.base;
5936 index = parts.index;
5937 disp = parts.disp;
5938 scale = parts.scale;
5940 if (!base && !index)
5942 /* Displacement only requires special attention. */
5944 if (GET_CODE (disp) == CONST_INT)
5946 if (ASSEMBLER_DIALECT != 0)
5948 if (USER_LABEL_PREFIX[0] == 0)
5949 putc ('%', file);
5950 fputs ("ds:", file);
5952 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (addr));
5954 else if (flag_pic)
5955 output_pic_addr_const (file, addr, 0);
5956 else
5957 output_addr_const (file, addr);
5959 /* Use one byte shorter RIP relative addressing for 64bit mode. */
5960 if (GET_CODE (disp) != CONST_INT && TARGET_64BIT)
5961 fputs ("(%rip)", file);
5963 else
5965 if (ASSEMBLER_DIALECT == 0)
5967 if (disp)
5969 if (flag_pic)
5970 output_pic_addr_const (file, disp, 0);
5971 else if (GET_CODE (disp) == LABEL_REF)
5972 output_asm_label (disp);
5973 else
5974 output_addr_const (file, disp);
5977 putc ('(', file);
5978 if (base)
5979 PRINT_REG (base, 0, file);
5980 if (index)
5982 putc (',', file);
5983 PRINT_REG (index, 0, file);
5984 if (scale != 1)
5985 fprintf (file, ",%d", scale);
5987 putc (')', file);
5989 else
5991 rtx offset = NULL_RTX;
5993 if (disp)
5995 /* Pull out the offset of a symbol; print any symbol itself. */
5996 if (GET_CODE (disp) == CONST
5997 && GET_CODE (XEXP (disp, 0)) == PLUS
5998 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
6000 offset = XEXP (XEXP (disp, 0), 1);
6001 disp = gen_rtx_CONST (VOIDmode,
6002 XEXP (XEXP (disp, 0), 0));
6005 if (flag_pic)
6006 output_pic_addr_const (file, disp, 0);
6007 else if (GET_CODE (disp) == LABEL_REF)
6008 output_asm_label (disp);
6009 else if (GET_CODE (disp) == CONST_INT)
6010 offset = disp;
6011 else
6012 output_addr_const (file, disp);
6015 putc ('[', file);
6016 if (base)
6018 PRINT_REG (base, 0, file);
6019 if (offset)
6021 if (INTVAL (offset) >= 0)
6022 putc ('+', file);
6023 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
6026 else if (offset)
6027 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
6028 else
6029 putc ('0', file);
6031 if (index)
6033 putc ('+', file);
6034 PRINT_REG (index, 0, file);
6035 if (scale != 1)
6036 fprintf (file, "*%d", scale);
6038 putc (']', file);
6043 /* Split one or more DImode RTL references into pairs of SImode
6044 references. The RTL can be REG, offsettable MEM, integer constant, or
6045 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
6046 split and "num" is its length. lo_half and hi_half are output arrays
6047 that parallel "operands". */
6049 void
6050 split_di (operands, num, lo_half, hi_half)
6051 rtx operands[];
6052 int num;
6053 rtx lo_half[], hi_half[];
6055 while (num--)
6057 rtx op = operands[num];
6059 /* simplify_subreg refuse to split volatile memory addresses,
6060 but we still have to handle it. */
6061 if (GET_CODE (op) == MEM)
6063 lo_half[num] = adjust_address (op, SImode, 0);
6064 hi_half[num] = adjust_address (op, SImode, 4);
6066 else
6068 lo_half[num] = simplify_gen_subreg (SImode, op,
6069 GET_MODE (op) == VOIDmode
6070 ? DImode : GET_MODE (op), 0);
6071 hi_half[num] = simplify_gen_subreg (SImode, op,
6072 GET_MODE (op) == VOIDmode
6073 ? DImode : GET_MODE (op), 4);
6077 /* Split one or more TImode RTL references into pairs of SImode
6078 references. The RTL can be REG, offsettable MEM, integer constant, or
6079 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
6080 split and "num" is its length. lo_half and hi_half are output arrays
6081 that parallel "operands". */
6083 void
6084 split_ti (operands, num, lo_half, hi_half)
6085 rtx operands[];
6086 int num;
6087 rtx lo_half[], hi_half[];
6089 while (num--)
6091 rtx op = operands[num];
6093 /* simplify_subreg refuse to split volatile memory addresses, but we
6094 still have to handle it. */
6095 if (GET_CODE (op) == MEM)
6097 lo_half[num] = adjust_address (op, DImode, 0);
6098 hi_half[num] = adjust_address (op, DImode, 8);
6100 else
6102 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
6103 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
6108 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
6109 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
6110 is the expression of the binary operation. The output may either be
6111 emitted here, or returned to the caller, like all output_* functions.
6113 There is no guarantee that the operands are the same mode, as they
6114 might be within FLOAT or FLOAT_EXTEND expressions. */
6116 #ifndef SYSV386_COMPAT
6117 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
6118 wants to fix the assemblers because that causes incompatibility
6119 with gcc. No-one wants to fix gcc because that causes
6120 incompatibility with assemblers... You can use the option of
6121 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
6122 #define SYSV386_COMPAT 1
6123 #endif
6125 const char *
6126 output_387_binary_op (insn, operands)
6127 rtx insn;
6128 rtx *operands;
6130 static char buf[30];
6131 const char *p;
6132 const char *ssep;
6133 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
6135 #ifdef ENABLE_CHECKING
6136 /* Even if we do not want to check the inputs, this documents input
6137 constraints. Which helps in understanding the following code. */
6138 if (STACK_REG_P (operands[0])
6139 && ((REG_P (operands[1])
6140 && REGNO (operands[0]) == REGNO (operands[1])
6141 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
6142 || (REG_P (operands[2])
6143 && REGNO (operands[0]) == REGNO (operands[2])
6144 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
6145 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
6146 ; /* ok */
6147 else if (!is_sse)
6148 abort ();
6149 #endif
6151 switch (GET_CODE (operands[3]))
6153 case PLUS:
6154 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6155 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6156 p = "fiadd";
6157 else
6158 p = "fadd";
6159 ssep = "add";
6160 break;
6162 case MINUS:
6163 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6164 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6165 p = "fisub";
6166 else
6167 p = "fsub";
6168 ssep = "sub";
6169 break;
6171 case MULT:
6172 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6173 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6174 p = "fimul";
6175 else
6176 p = "fmul";
6177 ssep = "mul";
6178 break;
6180 case DIV:
6181 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6182 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6183 p = "fidiv";
6184 else
6185 p = "fdiv";
6186 ssep = "div";
6187 break;
6189 default:
6190 abort ();
6193 if (is_sse)
6195 strcpy (buf, ssep);
6196 if (GET_MODE (operands[0]) == SFmode)
6197 strcat (buf, "ss\t{%2, %0|%0, %2}");
6198 else
6199 strcat (buf, "sd\t{%2, %0|%0, %2}");
6200 return buf;
6202 strcpy (buf, p);
6204 switch (GET_CODE (operands[3]))
6206 case MULT:
6207 case PLUS:
6208 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
6210 rtx temp = operands[2];
6211 operands[2] = operands[1];
6212 operands[1] = temp;
6215 /* know operands[0] == operands[1]. */
6217 if (GET_CODE (operands[2]) == MEM)
6219 p = "%z2\t%2";
6220 break;
6223 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6225 if (STACK_TOP_P (operands[0]))
6226 /* How is it that we are storing to a dead operand[2]?
6227 Well, presumably operands[1] is dead too. We can't
6228 store the result to st(0) as st(0) gets popped on this
6229 instruction. Instead store to operands[2] (which I
6230 think has to be st(1)). st(1) will be popped later.
6231 gcc <= 2.8.1 didn't have this check and generated
6232 assembly code that the Unixware assembler rejected. */
6233 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6234 else
6235 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
6236 break;
6239 if (STACK_TOP_P (operands[0]))
6240 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
6241 else
6242 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
6243 break;
6245 case MINUS:
6246 case DIV:
6247 if (GET_CODE (operands[1]) == MEM)
6249 p = "r%z1\t%1";
6250 break;
6253 if (GET_CODE (operands[2]) == MEM)
6255 p = "%z2\t%2";
6256 break;
6259 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6261 #if SYSV386_COMPAT
6262 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
6263 derived assemblers, confusingly reverse the direction of
6264 the operation for fsub{r} and fdiv{r} when the
6265 destination register is not st(0). The Intel assembler
6266 doesn't have this brain damage. Read !SYSV386_COMPAT to
6267 figure out what the hardware really does. */
6268 if (STACK_TOP_P (operands[0]))
6269 p = "{p\t%0, %2|rp\t%2, %0}";
6270 else
6271 p = "{rp\t%2, %0|p\t%0, %2}";
6272 #else
6273 if (STACK_TOP_P (operands[0]))
6274 /* As above for fmul/fadd, we can't store to st(0). */
6275 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6276 else
6277 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
6278 #endif
6279 break;
6282 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
6284 #if SYSV386_COMPAT
6285 if (STACK_TOP_P (operands[0]))
6286 p = "{rp\t%0, %1|p\t%1, %0}";
6287 else
6288 p = "{p\t%1, %0|rp\t%0, %1}";
6289 #else
6290 if (STACK_TOP_P (operands[0]))
6291 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
6292 else
6293 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
6294 #endif
6295 break;
6298 if (STACK_TOP_P (operands[0]))
6300 if (STACK_TOP_P (operands[1]))
6301 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
6302 else
6303 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
6304 break;
6306 else if (STACK_TOP_P (operands[1]))
6308 #if SYSV386_COMPAT
6309 p = "{\t%1, %0|r\t%0, %1}";
6310 #else
6311 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
6312 #endif
6314 else
6316 #if SYSV386_COMPAT
6317 p = "{r\t%2, %0|\t%0, %2}";
6318 #else
6319 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
6320 #endif
6322 break;
6324 default:
6325 abort ();
6328 strcat (buf, p);
6329 return buf;
6332 /* Output code to initialize control word copies used by
6333 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
6334 is set to control word rounding downwards. */
6335 void
6336 emit_i387_cw_initialization (normal, round_down)
6337 rtx normal, round_down;
6339 rtx reg = gen_reg_rtx (HImode);
6341 emit_insn (gen_x86_fnstcw_1 (normal));
6342 emit_move_insn (reg, normal);
6343 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
6344 && !TARGET_64BIT)
6345 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
6346 else
6347 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00)));
6348 emit_move_insn (round_down, reg);
6351 /* Output code for INSN to convert a float to a signed int. OPERANDS
6352 are the insn operands. The output may be [HSD]Imode and the input
6353 operand may be [SDX]Fmode. */
6355 const char *
6356 output_fix_trunc (insn, operands)
6357 rtx insn;
6358 rtx *operands;
6360 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
6361 int dimode_p = GET_MODE (operands[0]) == DImode;
6363 /* Jump through a hoop or two for DImode, since the hardware has no
6364 non-popping instruction. We used to do this a different way, but
6365 that was somewhat fragile and broke with post-reload splitters. */
6366 if (dimode_p && !stack_top_dies)
6367 output_asm_insn ("fld\t%y1", operands);
6369 if (!STACK_TOP_P (operands[1]))
6370 abort ();
6372 if (GET_CODE (operands[0]) != MEM)
6373 abort ();
6375 output_asm_insn ("fldcw\t%3", operands);
6376 if (stack_top_dies || dimode_p)
6377 output_asm_insn ("fistp%z0\t%0", operands);
6378 else
6379 output_asm_insn ("fist%z0\t%0", operands);
6380 output_asm_insn ("fldcw\t%2", operands);
6382 return "";
6385 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
6386 should be used and 2 when fnstsw should be used. UNORDERED_P is true
6387 when fucom should be used. */
6389 const char *
6390 output_fp_compare (insn, operands, eflags_p, unordered_p)
6391 rtx insn;
6392 rtx *operands;
6393 int eflags_p, unordered_p;
6395 int stack_top_dies;
6396 rtx cmp_op0 = operands[0];
6397 rtx cmp_op1 = operands[1];
6398 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
6400 if (eflags_p == 2)
6402 cmp_op0 = cmp_op1;
6403 cmp_op1 = operands[2];
6405 if (is_sse)
6407 if (GET_MODE (operands[0]) == SFmode)
6408 if (unordered_p)
6409 return "ucomiss\t{%1, %0|%0, %1}";
6410 else
6411 return "comiss\t{%1, %0|%0, %y}";
6412 else
6413 if (unordered_p)
6414 return "ucomisd\t{%1, %0|%0, %1}";
6415 else
6416 return "comisd\t{%1, %0|%0, %y}";
6419 if (! STACK_TOP_P (cmp_op0))
6420 abort ();
6422 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
6424 if (STACK_REG_P (cmp_op1)
6425 && stack_top_dies
6426 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
6427 && REGNO (cmp_op1) != FIRST_STACK_REG)
6429 /* If both the top of the 387 stack dies, and the other operand
6430 is also a stack register that dies, then this must be a
6431 `fcompp' float compare */
6433 if (eflags_p == 1)
6435 /* There is no double popping fcomi variant. Fortunately,
6436 eflags is immune from the fstp's cc clobbering. */
6437 if (unordered_p)
6438 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
6439 else
6440 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
6441 return "fstp\t%y0";
6443 else
6445 if (eflags_p == 2)
6447 if (unordered_p)
6448 return "fucompp\n\tfnstsw\t%0";
6449 else
6450 return "fcompp\n\tfnstsw\t%0";
6452 else
6454 if (unordered_p)
6455 return "fucompp";
6456 else
6457 return "fcompp";
6461 else
6463 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
6465 static const char * const alt[24] =
6467 "fcom%z1\t%y1",
6468 "fcomp%z1\t%y1",
6469 "fucom%z1\t%y1",
6470 "fucomp%z1\t%y1",
6472 "ficom%z1\t%y1",
6473 "ficomp%z1\t%y1",
6474 NULL,
6475 NULL,
6477 "fcomi\t{%y1, %0|%0, %y1}",
6478 "fcomip\t{%y1, %0|%0, %y1}",
6479 "fucomi\t{%y1, %0|%0, %y1}",
6480 "fucomip\t{%y1, %0|%0, %y1}",
6482 NULL,
6483 NULL,
6484 NULL,
6485 NULL,
6487 "fcom%z2\t%y2\n\tfnstsw\t%0",
6488 "fcomp%z2\t%y2\n\tfnstsw\t%0",
6489 "fucom%z2\t%y2\n\tfnstsw\t%0",
6490 "fucomp%z2\t%y2\n\tfnstsw\t%0",
6492 "ficom%z2\t%y2\n\tfnstsw\t%0",
6493 "ficomp%z2\t%y2\n\tfnstsw\t%0",
6494 NULL,
6495 NULL
6498 int mask;
6499 const char *ret;
6501 mask = eflags_p << 3;
6502 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
6503 mask |= unordered_p << 1;
6504 mask |= stack_top_dies;
6506 if (mask >= 24)
6507 abort ();
6508 ret = alt[mask];
6509 if (ret == NULL)
6510 abort ();
6512 return ret;
6516 void
6517 ix86_output_addr_vec_elt (file, value)
6518 FILE *file;
6519 int value;
6521 const char *directive = ASM_LONG;
6523 if (TARGET_64BIT)
6525 #ifdef ASM_QUAD
6526 directive = ASM_QUAD;
6527 #else
6528 abort ();
6529 #endif
6532 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
6535 void
6536 ix86_output_addr_diff_elt (file, value, rel)
6537 FILE *file;
6538 int value, rel;
6540 if (TARGET_64BIT)
6541 fprintf (file, "%s%s%d-.+4+(.-%s%d)\n",
6542 ASM_LONG, LPREFIX, value, LPREFIX, rel);
6543 else if (HAVE_AS_GOTOFF_IN_DATA)
6544 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
6545 else
6546 asm_fprintf (file, "%s%U_GLOBAL_OFFSET_TABLE_+[.-%s%d]\n",
6547 ASM_LONG, LPREFIX, value);
6550 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
6551 for the target. */
6553 void
6554 ix86_expand_clear (dest)
6555 rtx dest;
6557 rtx tmp;
6559 /* We play register width games, which are only valid after reload. */
6560 if (!reload_completed)
6561 abort ();
6563 /* Avoid HImode and its attendant prefix byte. */
6564 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
6565 dest = gen_rtx_REG (SImode, REGNO (dest));
6567 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
6569 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
6570 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
6572 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
6573 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
6576 emit_insn (tmp);
6579 void
6580 ix86_expand_move (mode, operands)
6581 enum machine_mode mode;
6582 rtx operands[];
6584 int strict = (reload_in_progress || reload_completed);
6585 rtx insn;
6587 if (flag_pic && mode == Pmode && symbolic_operand (operands[1], Pmode))
6589 /* Emit insns to move operands[1] into operands[0]. */
6591 if (GET_CODE (operands[0]) == MEM)
6592 operands[1] = force_reg (Pmode, operands[1]);
6593 else
6595 rtx temp = operands[0];
6596 if (GET_CODE (temp) != REG)
6597 temp = gen_reg_rtx (Pmode);
6598 temp = legitimize_pic_address (operands[1], temp);
6599 if (temp == operands[0])
6600 return;
6601 operands[1] = temp;
6604 else
6606 if (GET_CODE (operands[0]) == MEM
6607 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
6608 || !push_operand (operands[0], mode))
6609 && GET_CODE (operands[1]) == MEM)
6610 operands[1] = force_reg (mode, operands[1]);
6612 if (push_operand (operands[0], mode)
6613 && ! general_no_elim_operand (operands[1], mode))
6614 operands[1] = copy_to_mode_reg (mode, operands[1]);
6616 /* Force large constants in 64bit compilation into register
6617 to get them CSEed. */
6618 if (TARGET_64BIT && mode == DImode
6619 && immediate_operand (operands[1], mode)
6620 && !x86_64_zero_extended_value (operands[1])
6621 && !register_operand (operands[0], mode)
6622 && optimize && !reload_completed && !reload_in_progress)
6623 operands[1] = copy_to_mode_reg (mode, operands[1]);
6625 if (FLOAT_MODE_P (mode))
6627 /* If we are loading a floating point constant to a register,
6628 force the value to memory now, since we'll get better code
6629 out the back end. */
6631 if (strict)
6633 else if (GET_CODE (operands[1]) == CONST_DOUBLE
6634 && register_operand (operands[0], mode))
6635 operands[1] = validize_mem (force_const_mem (mode, operands[1]));
6639 insn = gen_rtx_SET (VOIDmode, operands[0], operands[1]);
6641 emit_insn (insn);
6644 /* Attempt to expand a binary operator. Make the expansion closer to the
6645 actual machine, then just general_operand, which will allow 3 separate
6646 memory references (one output, two input) in a single insn. */
6648 void
6649 ix86_expand_binary_operator (code, mode, operands)
6650 enum rtx_code code;
6651 enum machine_mode mode;
6652 rtx operands[];
6654 int matching_memory;
6655 rtx src1, src2, dst, op, clob;
6657 dst = operands[0];
6658 src1 = operands[1];
6659 src2 = operands[2];
6661 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
6662 if (GET_RTX_CLASS (code) == 'c'
6663 && (rtx_equal_p (dst, src2)
6664 || immediate_operand (src1, mode)))
6666 rtx temp = src1;
6667 src1 = src2;
6668 src2 = temp;
6671 /* If the destination is memory, and we do not have matching source
6672 operands, do things in registers. */
6673 matching_memory = 0;
6674 if (GET_CODE (dst) == MEM)
6676 if (rtx_equal_p (dst, src1))
6677 matching_memory = 1;
6678 else if (GET_RTX_CLASS (code) == 'c'
6679 && rtx_equal_p (dst, src2))
6680 matching_memory = 2;
6681 else
6682 dst = gen_reg_rtx (mode);
6685 /* Both source operands cannot be in memory. */
6686 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
6688 if (matching_memory != 2)
6689 src2 = force_reg (mode, src2);
6690 else
6691 src1 = force_reg (mode, src1);
6694 /* If the operation is not commutable, source 1 cannot be a constant
6695 or non-matching memory. */
6696 if ((CONSTANT_P (src1)
6697 || (!matching_memory && GET_CODE (src1) == MEM))
6698 && GET_RTX_CLASS (code) != 'c')
6699 src1 = force_reg (mode, src1);
6701 /* If optimizing, copy to regs to improve CSE */
6702 if (optimize && ! no_new_pseudos)
6704 if (GET_CODE (dst) == MEM)
6705 dst = gen_reg_rtx (mode);
6706 if (GET_CODE (src1) == MEM)
6707 src1 = force_reg (mode, src1);
6708 if (GET_CODE (src2) == MEM)
6709 src2 = force_reg (mode, src2);
6712 /* Emit the instruction. */
6714 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
6715 if (reload_in_progress)
6717 /* Reload doesn't know about the flags register, and doesn't know that
6718 it doesn't want to clobber it. We can only do this with PLUS. */
6719 if (code != PLUS)
6720 abort ();
6721 emit_insn (op);
6723 else
6725 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
6726 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
6729 /* Fix up the destination if needed. */
6730 if (dst != operands[0])
6731 emit_move_insn (operands[0], dst);
6734 /* Return TRUE or FALSE depending on whether the binary operator meets the
6735 appropriate constraints. */
6738 ix86_binary_operator_ok (code, mode, operands)
6739 enum rtx_code code;
6740 enum machine_mode mode ATTRIBUTE_UNUSED;
6741 rtx operands[3];
6743 /* Both source operands cannot be in memory. */
6744 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
6745 return 0;
6746 /* If the operation is not commutable, source 1 cannot be a constant. */
6747 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
6748 return 0;
6749 /* If the destination is memory, we must have a matching source operand. */
6750 if (GET_CODE (operands[0]) == MEM
6751 && ! (rtx_equal_p (operands[0], operands[1])
6752 || (GET_RTX_CLASS (code) == 'c'
6753 && rtx_equal_p (operands[0], operands[2]))))
6754 return 0;
6755 /* If the operation is not commutable and the source 1 is memory, we must
6756 have a matching destination. */
6757 if (GET_CODE (operands[1]) == MEM
6758 && GET_RTX_CLASS (code) != 'c'
6759 && ! rtx_equal_p (operands[0], operands[1]))
6760 return 0;
6761 return 1;
6764 /* Attempt to expand a unary operator. Make the expansion closer to the
6765 actual machine, then just general_operand, which will allow 2 separate
6766 memory references (one output, one input) in a single insn. */
6768 void
6769 ix86_expand_unary_operator (code, mode, operands)
6770 enum rtx_code code;
6771 enum machine_mode mode;
6772 rtx operands[];
6774 int matching_memory;
6775 rtx src, dst, op, clob;
6777 dst = operands[0];
6778 src = operands[1];
6780 /* If the destination is memory, and we do not have matching source
6781 operands, do things in registers. */
6782 matching_memory = 0;
6783 if (GET_CODE (dst) == MEM)
6785 if (rtx_equal_p (dst, src))
6786 matching_memory = 1;
6787 else
6788 dst = gen_reg_rtx (mode);
6791 /* When source operand is memory, destination must match. */
6792 if (!matching_memory && GET_CODE (src) == MEM)
6793 src = force_reg (mode, src);
6795 /* If optimizing, copy to regs to improve CSE */
6796 if (optimize && ! no_new_pseudos)
6798 if (GET_CODE (dst) == MEM)
6799 dst = gen_reg_rtx (mode);
6800 if (GET_CODE (src) == MEM)
6801 src = force_reg (mode, src);
6804 /* Emit the instruction. */
6806 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
6807 if (reload_in_progress || code == NOT)
6809 /* Reload doesn't know about the flags register, and doesn't know that
6810 it doesn't want to clobber it. */
6811 if (code != NOT)
6812 abort ();
6813 emit_insn (op);
6815 else
6817 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
6818 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
6821 /* Fix up the destination if needed. */
6822 if (dst != operands[0])
6823 emit_move_insn (operands[0], dst);
6826 /* Return TRUE or FALSE depending on whether the unary operator meets the
6827 appropriate constraints. */
6830 ix86_unary_operator_ok (code, mode, operands)
6831 enum rtx_code code ATTRIBUTE_UNUSED;
6832 enum machine_mode mode ATTRIBUTE_UNUSED;
6833 rtx operands[2] ATTRIBUTE_UNUSED;
6835 /* If one of operands is memory, source and destination must match. */
6836 if ((GET_CODE (operands[0]) == MEM
6837 || GET_CODE (operands[1]) == MEM)
6838 && ! rtx_equal_p (operands[0], operands[1]))
6839 return FALSE;
6840 return TRUE;
6843 /* Return TRUE or FALSE depending on whether the first SET in INSN
6844 has source and destination with matching CC modes, and that the
6845 CC mode is at least as constrained as REQ_MODE. */
6848 ix86_match_ccmode (insn, req_mode)
6849 rtx insn;
6850 enum machine_mode req_mode;
6852 rtx set;
6853 enum machine_mode set_mode;
6855 set = PATTERN (insn);
6856 if (GET_CODE (set) == PARALLEL)
6857 set = XVECEXP (set, 0, 0);
6858 if (GET_CODE (set) != SET)
6859 abort ();
6860 if (GET_CODE (SET_SRC (set)) != COMPARE)
6861 abort ();
6863 set_mode = GET_MODE (SET_DEST (set));
6864 switch (set_mode)
6866 case CCNOmode:
6867 if (req_mode != CCNOmode
6868 && (req_mode != CCmode
6869 || XEXP (SET_SRC (set), 1) != const0_rtx))
6870 return 0;
6871 break;
6872 case CCmode:
6873 if (req_mode == CCGCmode)
6874 return 0;
6875 /* FALLTHRU */
6876 case CCGCmode:
6877 if (req_mode == CCGOCmode || req_mode == CCNOmode)
6878 return 0;
6879 /* FALLTHRU */
6880 case CCGOCmode:
6881 if (req_mode == CCZmode)
6882 return 0;
6883 /* FALLTHRU */
6884 case CCZmode:
6885 break;
6887 default:
6888 abort ();
6891 return (GET_MODE (SET_SRC (set)) == set_mode);
6894 /* Generate insn patterns to do an integer compare of OPERANDS. */
6896 static rtx
6897 ix86_expand_int_compare (code, op0, op1)
6898 enum rtx_code code;
6899 rtx op0, op1;
6901 enum machine_mode cmpmode;
6902 rtx tmp, flags;
6904 cmpmode = SELECT_CC_MODE (code, op0, op1);
6905 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
6907 /* This is very simple, but making the interface the same as in the
6908 FP case makes the rest of the code easier. */
6909 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
6910 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
6912 /* Return the test that should be put into the flags user, i.e.
6913 the bcc, scc, or cmov instruction. */
6914 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
6917 /* Figure out whether to use ordered or unordered fp comparisons.
6918 Return the appropriate mode to use. */
6920 enum machine_mode
6921 ix86_fp_compare_mode (code)
6922 enum rtx_code code ATTRIBUTE_UNUSED;
6924 /* ??? In order to make all comparisons reversible, we do all comparisons
6925 non-trapping when compiling for IEEE. Once gcc is able to distinguish
6926 all forms trapping and nontrapping comparisons, we can make inequality
6927 comparisons trapping again, since it results in better code when using
6928 FCOM based compares. */
6929 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
6932 enum machine_mode
6933 ix86_cc_mode (code, op0, op1)
6934 enum rtx_code code;
6935 rtx op0, op1;
6937 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
6938 return ix86_fp_compare_mode (code);
6939 switch (code)
6941 /* Only zero flag is needed. */
6942 case EQ: /* ZF=0 */
6943 case NE: /* ZF!=0 */
6944 return CCZmode;
6945 /* Codes needing carry flag. */
6946 case GEU: /* CF=0 */
6947 case GTU: /* CF=0 & ZF=0 */
6948 case LTU: /* CF=1 */
6949 case LEU: /* CF=1 | ZF=1 */
6950 return CCmode;
6951 /* Codes possibly doable only with sign flag when
6952 comparing against zero. */
6953 case GE: /* SF=OF or SF=0 */
6954 case LT: /* SF<>OF or SF=1 */
6955 if (op1 == const0_rtx)
6956 return CCGOCmode;
6957 else
6958 /* For other cases Carry flag is not required. */
6959 return CCGCmode;
6960 /* Codes doable only with sign flag when comparing
6961 against zero, but we miss jump instruction for it
6962 so we need to use relational tests agains overflow
6963 that thus needs to be zero. */
6964 case GT: /* ZF=0 & SF=OF */
6965 case LE: /* ZF=1 | SF<>OF */
6966 if (op1 == const0_rtx)
6967 return CCNOmode;
6968 else
6969 return CCGCmode;
6970 /* strcmp pattern do (use flags) and combine may ask us for proper
6971 mode. */
6972 case USE:
6973 return CCmode;
6974 default:
6975 abort ();
6979 /* Return true if we should use an FCOMI instruction for this fp comparison. */
6982 ix86_use_fcomi_compare (code)
6983 enum rtx_code code ATTRIBUTE_UNUSED;
6985 enum rtx_code swapped_code = swap_condition (code);
6986 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
6987 || (ix86_fp_comparison_cost (swapped_code)
6988 == ix86_fp_comparison_fcomi_cost (swapped_code)));
6991 /* Swap, force into registers, or otherwise massage the two operands
6992 to a fp comparison. The operands are updated in place; the new
6993 comparsion code is returned. */
6995 static enum rtx_code
6996 ix86_prepare_fp_compare_args (code, pop0, pop1)
6997 enum rtx_code code;
6998 rtx *pop0, *pop1;
7000 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
7001 rtx op0 = *pop0, op1 = *pop1;
7002 enum machine_mode op_mode = GET_MODE (op0);
7003 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
7005 /* All of the unordered compare instructions only work on registers.
7006 The same is true of the XFmode compare instructions. The same is
7007 true of the fcomi compare instructions. */
7009 if (!is_sse
7010 && (fpcmp_mode == CCFPUmode
7011 || op_mode == XFmode
7012 || op_mode == TFmode
7013 || ix86_use_fcomi_compare (code)))
7015 op0 = force_reg (op_mode, op0);
7016 op1 = force_reg (op_mode, op1);
7018 else
7020 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
7021 things around if they appear profitable, otherwise force op0
7022 into a register. */
7024 if (standard_80387_constant_p (op0) == 0
7025 || (GET_CODE (op0) == MEM
7026 && ! (standard_80387_constant_p (op1) == 0
7027 || GET_CODE (op1) == MEM)))
7029 rtx tmp;
7030 tmp = op0, op0 = op1, op1 = tmp;
7031 code = swap_condition (code);
7034 if (GET_CODE (op0) != REG)
7035 op0 = force_reg (op_mode, op0);
7037 if (CONSTANT_P (op1))
7039 if (standard_80387_constant_p (op1))
7040 op1 = force_reg (op_mode, op1);
7041 else
7042 op1 = validize_mem (force_const_mem (op_mode, op1));
7046 /* Try to rearrange the comparison to make it cheaper. */
7047 if (ix86_fp_comparison_cost (code)
7048 > ix86_fp_comparison_cost (swap_condition (code))
7049 && (GET_CODE (op0) == REG || !reload_completed))
7051 rtx tmp;
7052 tmp = op0, op0 = op1, op1 = tmp;
7053 code = swap_condition (code);
7054 if (GET_CODE (op0) != REG)
7055 op0 = force_reg (op_mode, op0);
7058 *pop0 = op0;
7059 *pop1 = op1;
7060 return code;
7063 /* Convert comparison codes we use to represent FP comparison to integer
7064 code that will result in proper branch. Return UNKNOWN if no such code
7065 is available. */
7066 static enum rtx_code
7067 ix86_fp_compare_code_to_integer (code)
7068 enum rtx_code code;
7070 switch (code)
7072 case GT:
7073 return GTU;
7074 case GE:
7075 return GEU;
7076 case ORDERED:
7077 case UNORDERED:
7078 return code;
7079 break;
7080 case UNEQ:
7081 return EQ;
7082 break;
7083 case UNLT:
7084 return LTU;
7085 break;
7086 case UNLE:
7087 return LEU;
7088 break;
7089 case LTGT:
7090 return NE;
7091 break;
7092 default:
7093 return UNKNOWN;
7097 /* Split comparison code CODE into comparisons we can do using branch
7098 instructions. BYPASS_CODE is comparison code for branch that will
7099 branch around FIRST_CODE and SECOND_CODE. If some of branches
7100 is not required, set value to NIL.
7101 We never require more than two branches. */
7102 static void
7103 ix86_fp_comparison_codes (code, bypass_code, first_code, second_code)
7104 enum rtx_code code, *bypass_code, *first_code, *second_code;
7106 *first_code = code;
7107 *bypass_code = NIL;
7108 *second_code = NIL;
7110 /* The fcomi comparison sets flags as follows:
7112 cmp ZF PF CF
7113 > 0 0 0
7114 < 0 0 1
7115 = 1 0 0
7116 un 1 1 1 */
7118 switch (code)
7120 case GT: /* GTU - CF=0 & ZF=0 */
7121 case GE: /* GEU - CF=0 */
7122 case ORDERED: /* PF=0 */
7123 case UNORDERED: /* PF=1 */
7124 case UNEQ: /* EQ - ZF=1 */
7125 case UNLT: /* LTU - CF=1 */
7126 case UNLE: /* LEU - CF=1 | ZF=1 */
7127 case LTGT: /* EQ - ZF=0 */
7128 break;
7129 case LT: /* LTU - CF=1 - fails on unordered */
7130 *first_code = UNLT;
7131 *bypass_code = UNORDERED;
7132 break;
7133 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
7134 *first_code = UNLE;
7135 *bypass_code = UNORDERED;
7136 break;
7137 case EQ: /* EQ - ZF=1 - fails on unordered */
7138 *first_code = UNEQ;
7139 *bypass_code = UNORDERED;
7140 break;
7141 case NE: /* NE - ZF=0 - fails on unordered */
7142 *first_code = LTGT;
7143 *second_code = UNORDERED;
7144 break;
7145 case UNGE: /* GEU - CF=0 - fails on unordered */
7146 *first_code = GE;
7147 *second_code = UNORDERED;
7148 break;
7149 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
7150 *first_code = GT;
7151 *second_code = UNORDERED;
7152 break;
7153 default:
7154 abort ();
7156 if (!TARGET_IEEE_FP)
7158 *second_code = NIL;
7159 *bypass_code = NIL;
7163 /* Return cost of comparison done fcom + arithmetics operations on AX.
7164 All following functions do use number of instructions as an cost metrics.
7165 In future this should be tweaked to compute bytes for optimize_size and
7166 take into account performance of various instructions on various CPUs. */
7167 static int
7168 ix86_fp_comparison_arithmetics_cost (code)
7169 enum rtx_code code;
7171 if (!TARGET_IEEE_FP)
7172 return 4;
7173 /* The cost of code output by ix86_expand_fp_compare. */
7174 switch (code)
7176 case UNLE:
7177 case UNLT:
7178 case LTGT:
7179 case GT:
7180 case GE:
7181 case UNORDERED:
7182 case ORDERED:
7183 case UNEQ:
7184 return 4;
7185 break;
7186 case LT:
7187 case NE:
7188 case EQ:
7189 case UNGE:
7190 return 5;
7191 break;
7192 case LE:
7193 case UNGT:
7194 return 6;
7195 break;
7196 default:
7197 abort ();
7201 /* Return cost of comparison done using fcomi operation.
7202 See ix86_fp_comparison_arithmetics_cost for the metrics. */
7203 static int
7204 ix86_fp_comparison_fcomi_cost (code)
7205 enum rtx_code code;
7207 enum rtx_code bypass_code, first_code, second_code;
7208 /* Return arbitarily high cost when instruction is not supported - this
7209 prevents gcc from using it. */
7210 if (!TARGET_CMOVE)
7211 return 1024;
7212 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
7213 return (bypass_code != NIL || second_code != NIL) + 2;
7216 /* Return cost of comparison done using sahf operation.
7217 See ix86_fp_comparison_arithmetics_cost for the metrics. */
7218 static int
7219 ix86_fp_comparison_sahf_cost (code)
7220 enum rtx_code code;
7222 enum rtx_code bypass_code, first_code, second_code;
7223 /* Return arbitarily high cost when instruction is not preferred - this
7224 avoids gcc from using it. */
7225 if (!TARGET_USE_SAHF && !optimize_size)
7226 return 1024;
7227 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
7228 return (bypass_code != NIL || second_code != NIL) + 3;
7231 /* Compute cost of the comparison done using any method.
7232 See ix86_fp_comparison_arithmetics_cost for the metrics. */
7233 static int
7234 ix86_fp_comparison_cost (code)
7235 enum rtx_code code;
7237 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
7238 int min;
7240 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
7241 sahf_cost = ix86_fp_comparison_sahf_cost (code);
7243 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
7244 if (min > sahf_cost)
7245 min = sahf_cost;
7246 if (min > fcomi_cost)
7247 min = fcomi_cost;
7248 return min;
7251 /* Generate insn patterns to do a floating point compare of OPERANDS. */
7253 static rtx
7254 ix86_expand_fp_compare (code, op0, op1, scratch, second_test, bypass_test)
7255 enum rtx_code code;
7256 rtx op0, op1, scratch;
7257 rtx *second_test;
7258 rtx *bypass_test;
7260 enum machine_mode fpcmp_mode, intcmp_mode;
7261 rtx tmp, tmp2;
7262 int cost = ix86_fp_comparison_cost (code);
7263 enum rtx_code bypass_code, first_code, second_code;
7265 fpcmp_mode = ix86_fp_compare_mode (code);
7266 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
7268 if (second_test)
7269 *second_test = NULL_RTX;
7270 if (bypass_test)
7271 *bypass_test = NULL_RTX;
7273 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
7275 /* Do fcomi/sahf based test when profitable. */
7276 if ((bypass_code == NIL || bypass_test)
7277 && (second_code == NIL || second_test)
7278 && ix86_fp_comparison_arithmetics_cost (code) > cost)
7280 if (TARGET_CMOVE)
7282 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
7283 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
7284 tmp);
7285 emit_insn (tmp);
7287 else
7289 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
7290 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), 9);
7291 if (!scratch)
7292 scratch = gen_reg_rtx (HImode);
7293 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
7294 emit_insn (gen_x86_sahf_1 (scratch));
7297 /* The FP codes work out to act like unsigned. */
7298 intcmp_mode = fpcmp_mode;
7299 code = first_code;
7300 if (bypass_code != NIL)
7301 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
7302 gen_rtx_REG (intcmp_mode, FLAGS_REG),
7303 const0_rtx);
7304 if (second_code != NIL)
7305 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
7306 gen_rtx_REG (intcmp_mode, FLAGS_REG),
7307 const0_rtx);
7309 else
7311 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
7312 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
7313 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), 9);
7314 if (!scratch)
7315 scratch = gen_reg_rtx (HImode);
7316 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
7318 /* In the unordered case, we have to check C2 for NaN's, which
7319 doesn't happen to work out to anything nice combination-wise.
7320 So do some bit twiddling on the value we've got in AH to come
7321 up with an appropriate set of condition codes. */
7323 intcmp_mode = CCNOmode;
7324 switch (code)
7326 case GT:
7327 case UNGT:
7328 if (code == GT || !TARGET_IEEE_FP)
7330 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
7331 code = EQ;
7333 else
7335 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
7336 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
7337 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
7338 intcmp_mode = CCmode;
7339 code = GEU;
7341 break;
7342 case LT:
7343 case UNLT:
7344 if (code == LT && TARGET_IEEE_FP)
7346 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
7347 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
7348 intcmp_mode = CCmode;
7349 code = EQ;
7351 else
7353 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
7354 code = NE;
7356 break;
7357 case GE:
7358 case UNGE:
7359 if (code == GE || !TARGET_IEEE_FP)
7361 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
7362 code = EQ;
7364 else
7366 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
7367 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
7368 GEN_INT (0x01)));
7369 code = NE;
7371 break;
7372 case LE:
7373 case UNLE:
7374 if (code == LE && TARGET_IEEE_FP)
7376 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
7377 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
7378 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
7379 intcmp_mode = CCmode;
7380 code = LTU;
7382 else
7384 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
7385 code = NE;
7387 break;
7388 case EQ:
7389 case UNEQ:
7390 if (code == EQ && TARGET_IEEE_FP)
7392 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
7393 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
7394 intcmp_mode = CCmode;
7395 code = EQ;
7397 else
7399 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
7400 code = NE;
7401 break;
7403 break;
7404 case NE:
7405 case LTGT:
7406 if (code == NE && TARGET_IEEE_FP)
7408 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
7409 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
7410 GEN_INT (0x40)));
7411 code = NE;
7413 else
7415 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
7416 code = EQ;
7418 break;
7420 case UNORDERED:
7421 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
7422 code = NE;
7423 break;
7424 case ORDERED:
7425 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
7426 code = EQ;
7427 break;
7429 default:
7430 abort ();
7434 /* Return the test that should be put into the flags user, i.e.
7435 the bcc, scc, or cmov instruction. */
7436 return gen_rtx_fmt_ee (code, VOIDmode,
7437 gen_rtx_REG (intcmp_mode, FLAGS_REG),
7438 const0_rtx);
7442 ix86_expand_compare (code, second_test, bypass_test)
7443 enum rtx_code code;
7444 rtx *second_test, *bypass_test;
7446 rtx op0, op1, ret;
7447 op0 = ix86_compare_op0;
7448 op1 = ix86_compare_op1;
7450 if (second_test)
7451 *second_test = NULL_RTX;
7452 if (bypass_test)
7453 *bypass_test = NULL_RTX;
7455 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
7456 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
7457 second_test, bypass_test);
7458 else
7459 ret = ix86_expand_int_compare (code, op0, op1);
7461 return ret;
7464 /* Return true if the CODE will result in nontrivial jump sequence. */
7465 bool
7466 ix86_fp_jump_nontrivial_p (code)
7467 enum rtx_code code;
7469 enum rtx_code bypass_code, first_code, second_code;
7470 if (!TARGET_CMOVE)
7471 return true;
7472 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
7473 return bypass_code != NIL || second_code != NIL;
7476 void
7477 ix86_expand_branch (code, label)
7478 enum rtx_code code;
7479 rtx label;
7481 rtx tmp;
7483 switch (GET_MODE (ix86_compare_op0))
7485 case QImode:
7486 case HImode:
7487 case SImode:
7488 simple:
7489 tmp = ix86_expand_compare (code, NULL, NULL);
7490 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
7491 gen_rtx_LABEL_REF (VOIDmode, label),
7492 pc_rtx);
7493 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
7494 return;
7496 case SFmode:
7497 case DFmode:
7498 case XFmode:
7499 case TFmode:
7501 rtvec vec;
7502 int use_fcomi;
7503 enum rtx_code bypass_code, first_code, second_code;
7505 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
7506 &ix86_compare_op1);
7508 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
7510 /* Check whether we will use the natural sequence with one jump. If
7511 so, we can expand jump early. Otherwise delay expansion by
7512 creating compound insn to not confuse optimizers. */
7513 if (bypass_code == NIL && second_code == NIL
7514 && TARGET_CMOVE)
7516 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
7517 gen_rtx_LABEL_REF (VOIDmode, label),
7518 pc_rtx, NULL_RTX);
7520 else
7522 tmp = gen_rtx_fmt_ee (code, VOIDmode,
7523 ix86_compare_op0, ix86_compare_op1);
7524 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
7525 gen_rtx_LABEL_REF (VOIDmode, label),
7526 pc_rtx);
7527 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
7529 use_fcomi = ix86_use_fcomi_compare (code);
7530 vec = rtvec_alloc (3 + !use_fcomi);
7531 RTVEC_ELT (vec, 0) = tmp;
7532 RTVEC_ELT (vec, 1)
7533 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
7534 RTVEC_ELT (vec, 2)
7535 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
7536 if (! use_fcomi)
7537 RTVEC_ELT (vec, 3)
7538 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
7540 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
7542 return;
7545 case DImode:
7546 if (TARGET_64BIT)
7547 goto simple;
7548 /* Expand DImode branch into multiple compare+branch. */
7550 rtx lo[2], hi[2], label2;
7551 enum rtx_code code1, code2, code3;
7553 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
7555 tmp = ix86_compare_op0;
7556 ix86_compare_op0 = ix86_compare_op1;
7557 ix86_compare_op1 = tmp;
7558 code = swap_condition (code);
7560 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
7561 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
7563 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
7564 avoid two branches. This costs one extra insn, so disable when
7565 optimizing for size. */
7567 if ((code == EQ || code == NE)
7568 && (!optimize_size
7569 || hi[1] == const0_rtx || lo[1] == const0_rtx))
7571 rtx xor0, xor1;
7573 xor1 = hi[0];
7574 if (hi[1] != const0_rtx)
7575 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
7576 NULL_RTX, 0, OPTAB_WIDEN);
7578 xor0 = lo[0];
7579 if (lo[1] != const0_rtx)
7580 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
7581 NULL_RTX, 0, OPTAB_WIDEN);
7583 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
7584 NULL_RTX, 0, OPTAB_WIDEN);
7586 ix86_compare_op0 = tmp;
7587 ix86_compare_op1 = const0_rtx;
7588 ix86_expand_branch (code, label);
7589 return;
7592 /* Otherwise, if we are doing less-than or greater-or-equal-than,
7593 op1 is a constant and the low word is zero, then we can just
7594 examine the high word. */
7596 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
7597 switch (code)
7599 case LT: case LTU: case GE: case GEU:
7600 ix86_compare_op0 = hi[0];
7601 ix86_compare_op1 = hi[1];
7602 ix86_expand_branch (code, label);
7603 return;
7604 default:
7605 break;
7608 /* Otherwise, we need two or three jumps. */
7610 label2 = gen_label_rtx ();
7612 code1 = code;
7613 code2 = swap_condition (code);
7614 code3 = unsigned_condition (code);
7616 switch (code)
7618 case LT: case GT: case LTU: case GTU:
7619 break;
7621 case LE: code1 = LT; code2 = GT; break;
7622 case GE: code1 = GT; code2 = LT; break;
7623 case LEU: code1 = LTU; code2 = GTU; break;
7624 case GEU: code1 = GTU; code2 = LTU; break;
7626 case EQ: code1 = NIL; code2 = NE; break;
7627 case NE: code2 = NIL; break;
7629 default:
7630 abort ();
7634 * a < b =>
7635 * if (hi(a) < hi(b)) goto true;
7636 * if (hi(a) > hi(b)) goto false;
7637 * if (lo(a) < lo(b)) goto true;
7638 * false:
7641 ix86_compare_op0 = hi[0];
7642 ix86_compare_op1 = hi[1];
7644 if (code1 != NIL)
7645 ix86_expand_branch (code1, label);
7646 if (code2 != NIL)
7647 ix86_expand_branch (code2, label2);
7649 ix86_compare_op0 = lo[0];
7650 ix86_compare_op1 = lo[1];
7651 ix86_expand_branch (code3, label);
7653 if (code2 != NIL)
7654 emit_label (label2);
7655 return;
7658 default:
7659 abort ();
7663 /* Split branch based on floating point condition. */
7664 void
7665 ix86_split_fp_branch (code, op1, op2, target1, target2, tmp)
7666 enum rtx_code code;
7667 rtx op1, op2, target1, target2, tmp;
7669 rtx second, bypass;
7670 rtx label = NULL_RTX;
7671 rtx condition;
7672 int bypass_probability = -1, second_probability = -1, probability = -1;
7673 rtx i;
7675 if (target2 != pc_rtx)
7677 rtx tmp = target2;
7678 code = reverse_condition_maybe_unordered (code);
7679 target2 = target1;
7680 target1 = tmp;
7683 condition = ix86_expand_fp_compare (code, op1, op2,
7684 tmp, &second, &bypass);
7686 if (split_branch_probability >= 0)
7688 /* Distribute the probabilities across the jumps.
7689 Assume the BYPASS and SECOND to be always test
7690 for UNORDERED. */
7691 probability = split_branch_probability;
7693 /* Value of 1 is low enough to make no need for probability
7694 to be updated. Later we may run some experiments and see
7695 if unordered values are more frequent in practice. */
7696 if (bypass)
7697 bypass_probability = 1;
7698 if (second)
7699 second_probability = 1;
7701 if (bypass != NULL_RTX)
7703 label = gen_label_rtx ();
7704 i = emit_jump_insn (gen_rtx_SET
7705 (VOIDmode, pc_rtx,
7706 gen_rtx_IF_THEN_ELSE (VOIDmode,
7707 bypass,
7708 gen_rtx_LABEL_REF (VOIDmode,
7709 label),
7710 pc_rtx)));
7711 if (bypass_probability >= 0)
7712 REG_NOTES (i)
7713 = gen_rtx_EXPR_LIST (REG_BR_PROB,
7714 GEN_INT (bypass_probability),
7715 REG_NOTES (i));
7717 i = emit_jump_insn (gen_rtx_SET
7718 (VOIDmode, pc_rtx,
7719 gen_rtx_IF_THEN_ELSE (VOIDmode,
7720 condition, target1, target2)));
7721 if (probability >= 0)
7722 REG_NOTES (i)
7723 = gen_rtx_EXPR_LIST (REG_BR_PROB,
7724 GEN_INT (probability),
7725 REG_NOTES (i));
7726 if (second != NULL_RTX)
7728 i = emit_jump_insn (gen_rtx_SET
7729 (VOIDmode, pc_rtx,
7730 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
7731 target2)));
7732 if (second_probability >= 0)
7733 REG_NOTES (i)
7734 = gen_rtx_EXPR_LIST (REG_BR_PROB,
7735 GEN_INT (second_probability),
7736 REG_NOTES (i));
7738 if (label != NULL_RTX)
7739 emit_label (label);
7743 ix86_expand_setcc (code, dest)
7744 enum rtx_code code;
7745 rtx dest;
7747 rtx ret, tmp, tmpreg;
7748 rtx second_test, bypass_test;
7750 if (GET_MODE (ix86_compare_op0) == DImode
7751 && !TARGET_64BIT)
7752 return 0; /* FAIL */
7754 if (GET_MODE (dest) != QImode)
7755 abort ();
7757 ret = ix86_expand_compare (code, &second_test, &bypass_test);
7758 PUT_MODE (ret, QImode);
7760 tmp = dest;
7761 tmpreg = dest;
7763 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
7764 if (bypass_test || second_test)
7766 rtx test = second_test;
7767 int bypass = 0;
7768 rtx tmp2 = gen_reg_rtx (QImode);
7769 if (bypass_test)
7771 if (second_test)
7772 abort();
7773 test = bypass_test;
7774 bypass = 1;
7775 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
7777 PUT_MODE (test, QImode);
7778 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
7780 if (bypass)
7781 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
7782 else
7783 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
7786 return 1; /* DONE */
7790 ix86_expand_int_movcc (operands)
7791 rtx operands[];
7793 enum rtx_code code = GET_CODE (operands[1]), compare_code;
7794 rtx compare_seq, compare_op;
7795 rtx second_test, bypass_test;
7796 enum machine_mode mode = GET_MODE (operands[0]);
7798 /* When the compare code is not LTU or GEU, we can not use sbbl case.
7799 In case comparsion is done with immediate, we can convert it to LTU or
7800 GEU by altering the integer. */
7802 if ((code == LEU || code == GTU)
7803 && GET_CODE (ix86_compare_op1) == CONST_INT
7804 && mode != HImode
7805 && (unsigned int)INTVAL (ix86_compare_op1) != 0xffffffff
7806 && GET_CODE (operands[2]) == CONST_INT
7807 && GET_CODE (operands[3]) == CONST_INT)
7809 if (code == LEU)
7810 code = LTU;
7811 else
7812 code = GEU;
7813 ix86_compare_op1 = GEN_INT (INTVAL (ix86_compare_op1) + 1);
7816 start_sequence ();
7817 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
7818 compare_seq = gen_sequence ();
7819 end_sequence ();
7821 compare_code = GET_CODE (compare_op);
7823 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
7824 HImode insns, we'd be swallowed in word prefix ops. */
7826 if (mode != HImode
7827 && (mode != DImode || TARGET_64BIT)
7828 && GET_CODE (operands[2]) == CONST_INT
7829 && GET_CODE (operands[3]) == CONST_INT)
7831 rtx out = operands[0];
7832 HOST_WIDE_INT ct = INTVAL (operands[2]);
7833 HOST_WIDE_INT cf = INTVAL (operands[3]);
7834 HOST_WIDE_INT diff;
7836 if ((compare_code == LTU || compare_code == GEU)
7837 && !second_test && !bypass_test)
7840 /* Detect overlap between destination and compare sources. */
7841 rtx tmp = out;
7843 /* To simplify rest of code, restrict to the GEU case. */
7844 if (compare_code == LTU)
7846 int tmp = ct;
7847 ct = cf;
7848 cf = tmp;
7849 compare_code = reverse_condition (compare_code);
7850 code = reverse_condition (code);
7852 diff = ct - cf;
7854 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
7855 || reg_overlap_mentioned_p (out, ix86_compare_op1))
7856 tmp = gen_reg_rtx (mode);
7858 emit_insn (compare_seq);
7859 if (mode == DImode)
7860 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp));
7861 else
7862 emit_insn (gen_x86_movsicc_0_m1 (tmp));
7864 if (diff == 1)
7867 * cmpl op0,op1
7868 * sbbl dest,dest
7869 * [addl dest, ct]
7871 * Size 5 - 8.
7873 if (ct)
7874 tmp = expand_simple_binop (mode, PLUS,
7875 tmp, GEN_INT (ct),
7876 tmp, 1, OPTAB_DIRECT);
7878 else if (cf == -1)
7881 * cmpl op0,op1
7882 * sbbl dest,dest
7883 * orl $ct, dest
7885 * Size 8.
7887 tmp = expand_simple_binop (mode, IOR,
7888 tmp, GEN_INT (ct),
7889 tmp, 1, OPTAB_DIRECT);
7891 else if (diff == -1 && ct)
7894 * cmpl op0,op1
7895 * sbbl dest,dest
7896 * xorl $-1, dest
7897 * [addl dest, cf]
7899 * Size 8 - 11.
7901 tmp = expand_simple_unop (mode, NOT, tmp, tmp, 1);
7902 if (cf)
7903 tmp = expand_simple_binop (mode, PLUS,
7904 tmp, GEN_INT (cf),
7905 tmp, 1, OPTAB_DIRECT);
7907 else
7910 * cmpl op0,op1
7911 * sbbl dest,dest
7912 * andl cf - ct, dest
7913 * [addl dest, ct]
7915 * Size 8 - 11.
7917 tmp = expand_simple_binop (mode, AND,
7918 tmp,
7919 GEN_INT (trunc_int_for_mode
7920 (cf - ct, mode)),
7921 tmp, 1, OPTAB_DIRECT);
7922 if (ct)
7923 tmp = expand_simple_binop (mode, PLUS,
7924 tmp, GEN_INT (ct),
7925 tmp, 1, OPTAB_DIRECT);
7928 if (tmp != out)
7929 emit_move_insn (out, tmp);
7931 return 1; /* DONE */
7934 diff = ct - cf;
7935 if (diff < 0)
7937 HOST_WIDE_INT tmp;
7938 tmp = ct, ct = cf, cf = tmp;
7939 diff = -diff;
7940 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
7942 /* We may be reversing unordered compare to normal compare, that
7943 is not valid in general (we may convert non-trapping condition
7944 to trapping one), however on i386 we currently emit all
7945 comparisons unordered. */
7946 compare_code = reverse_condition_maybe_unordered (compare_code);
7947 code = reverse_condition_maybe_unordered (code);
7949 else
7951 compare_code = reverse_condition (compare_code);
7952 code = reverse_condition (code);
7955 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
7956 || diff == 3 || diff == 5 || diff == 9)
7957 && (mode != DImode || x86_64_sign_extended_value (GEN_INT (cf))))
7960 * xorl dest,dest
7961 * cmpl op1,op2
7962 * setcc dest
7963 * lea cf(dest*(ct-cf)),dest
7965 * Size 14.
7967 * This also catches the degenerate setcc-only case.
7970 rtx tmp;
7971 int nops;
7973 out = emit_store_flag (out, code, ix86_compare_op0,
7974 ix86_compare_op1, VOIDmode, 0, 1);
7976 nops = 0;
7977 /* On x86_64 the lea instruction operates on Pmode, so we need to get arithmetics
7978 done in proper mode to match. */
7979 if (diff == 1)
7980 tmp = out;
7981 else
7983 rtx out1;
7984 out1 = out;
7985 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
7986 nops++;
7987 if (diff & 1)
7989 tmp = gen_rtx_PLUS (mode, tmp, out1);
7990 nops++;
7993 if (cf != 0)
7995 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
7996 nops++;
7998 if (tmp != out
7999 && (GET_CODE (tmp) != SUBREG || SUBREG_REG (tmp) != out))
8001 if (nops == 1)
8003 rtx clob;
8005 clob = gen_rtx_REG (CCmode, FLAGS_REG);
8006 clob = gen_rtx_CLOBBER (VOIDmode, clob);
8008 tmp = gen_rtx_SET (VOIDmode, out, tmp);
8009 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
8010 emit_insn (tmp);
8012 else
8013 emit_insn (gen_rtx_SET (VOIDmode, out, tmp));
8015 if (out != operands[0])
8016 emit_move_insn (operands[0], out);
8018 return 1; /* DONE */
8022 * General case: Jumpful:
8023 * xorl dest,dest cmpl op1, op2
8024 * cmpl op1, op2 movl ct, dest
8025 * setcc dest jcc 1f
8026 * decl dest movl cf, dest
8027 * andl (cf-ct),dest 1:
8028 * addl ct,dest
8030 * Size 20. Size 14.
8032 * This is reasonably steep, but branch mispredict costs are
8033 * high on modern cpus, so consider failing only if optimizing
8034 * for space.
8036 * %%% Parameterize branch_cost on the tuning architecture, then
8037 * use that. The 80386 couldn't care less about mispredicts.
8040 if (!optimize_size && !TARGET_CMOVE)
8042 if (ct == 0)
8044 ct = cf;
8045 cf = 0;
8046 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
8048 /* We may be reversing unordered compare to normal compare,
8049 that is not valid in general (we may convert non-trapping
8050 condition to trapping one), however on i386 we currently
8051 emit all comparisons unordered. */
8052 compare_code = reverse_condition_maybe_unordered (compare_code);
8053 code = reverse_condition_maybe_unordered (code);
8055 else
8057 compare_code = reverse_condition (compare_code);
8058 code = reverse_condition (code);
8062 out = emit_store_flag (out, code, ix86_compare_op0,
8063 ix86_compare_op1, VOIDmode, 0, 1);
8065 out = expand_simple_binop (mode, PLUS,
8066 out, constm1_rtx,
8067 out, 1, OPTAB_DIRECT);
8068 out = expand_simple_binop (mode, AND,
8069 out,
8070 GEN_INT (trunc_int_for_mode
8071 (cf - ct, mode)),
8072 out, 1, OPTAB_DIRECT);
8073 out = expand_simple_binop (mode, PLUS,
8074 out, GEN_INT (ct),
8075 out, 1, OPTAB_DIRECT);
8076 if (out != operands[0])
8077 emit_move_insn (operands[0], out);
8079 return 1; /* DONE */
8083 if (!TARGET_CMOVE)
8085 /* Try a few things more with specific constants and a variable. */
8087 optab op;
8088 rtx var, orig_out, out, tmp;
8090 if (optimize_size)
8091 return 0; /* FAIL */
8093 /* If one of the two operands is an interesting constant, load a
8094 constant with the above and mask it in with a logical operation. */
8096 if (GET_CODE (operands[2]) == CONST_INT)
8098 var = operands[3];
8099 if (INTVAL (operands[2]) == 0)
8100 operands[3] = constm1_rtx, op = and_optab;
8101 else if (INTVAL (operands[2]) == -1)
8102 operands[3] = const0_rtx, op = ior_optab;
8103 else
8104 return 0; /* FAIL */
8106 else if (GET_CODE (operands[3]) == CONST_INT)
8108 var = operands[2];
8109 if (INTVAL (operands[3]) == 0)
8110 operands[2] = constm1_rtx, op = and_optab;
8111 else if (INTVAL (operands[3]) == -1)
8112 operands[2] = const0_rtx, op = ior_optab;
8113 else
8114 return 0; /* FAIL */
8116 else
8117 return 0; /* FAIL */
8119 orig_out = operands[0];
8120 tmp = gen_reg_rtx (mode);
8121 operands[0] = tmp;
8123 /* Recurse to get the constant loaded. */
8124 if (ix86_expand_int_movcc (operands) == 0)
8125 return 0; /* FAIL */
8127 /* Mask in the interesting variable. */
8128 out = expand_binop (mode, op, var, tmp, orig_out, 0,
8129 OPTAB_WIDEN);
8130 if (out != orig_out)
8131 emit_move_insn (orig_out, out);
8133 return 1; /* DONE */
8137 * For comparison with above,
8139 * movl cf,dest
8140 * movl ct,tmp
8141 * cmpl op1,op2
8142 * cmovcc tmp,dest
8144 * Size 15.
8147 if (! nonimmediate_operand (operands[2], mode))
8148 operands[2] = force_reg (mode, operands[2]);
8149 if (! nonimmediate_operand (operands[3], mode))
8150 operands[3] = force_reg (mode, operands[3]);
8152 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
8154 rtx tmp = gen_reg_rtx (mode);
8155 emit_move_insn (tmp, operands[3]);
8156 operands[3] = tmp;
8158 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
8160 rtx tmp = gen_reg_rtx (mode);
8161 emit_move_insn (tmp, operands[2]);
8162 operands[2] = tmp;
8164 if (! register_operand (operands[2], VOIDmode)
8165 && ! register_operand (operands[3], VOIDmode))
8166 operands[2] = force_reg (mode, operands[2]);
8168 emit_insn (compare_seq);
8169 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
8170 gen_rtx_IF_THEN_ELSE (mode,
8171 compare_op, operands[2],
8172 operands[3])));
8173 if (bypass_test)
8174 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
8175 gen_rtx_IF_THEN_ELSE (mode,
8176 bypass_test,
8177 operands[3],
8178 operands[0])));
8179 if (second_test)
8180 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
8181 gen_rtx_IF_THEN_ELSE (mode,
8182 second_test,
8183 operands[2],
8184 operands[0])));
8186 return 1; /* DONE */
8190 ix86_expand_fp_movcc (operands)
8191 rtx operands[];
8193 enum rtx_code code;
8194 rtx tmp;
8195 rtx compare_op, second_test, bypass_test;
8197 /* For SF/DFmode conditional moves based on comparisons
8198 in same mode, we may want to use SSE min/max instructions. */
8199 if (((TARGET_SSE_MATH && GET_MODE (operands[0]) == SFmode)
8200 || (TARGET_SSE2 && TARGET_SSE_MATH && GET_MODE (operands[0]) == DFmode))
8201 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
8202 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
8203 && (!TARGET_IEEE_FP
8204 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
8205 /* We may be called from the post-reload splitter. */
8206 && (!REG_P (operands[0])
8207 || SSE_REG_P (operands[0])
8208 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
8210 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
8211 code = GET_CODE (operands[1]);
8213 /* See if we have (cross) match between comparison operands and
8214 conditional move operands. */
8215 if (rtx_equal_p (operands[2], op1))
8217 rtx tmp = op0;
8218 op0 = op1;
8219 op1 = tmp;
8220 code = reverse_condition_maybe_unordered (code);
8222 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
8224 /* Check for min operation. */
8225 if (code == LT)
8227 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
8228 if (memory_operand (op0, VOIDmode))
8229 op0 = force_reg (GET_MODE (operands[0]), op0);
8230 if (GET_MODE (operands[0]) == SFmode)
8231 emit_insn (gen_minsf3 (operands[0], op0, op1));
8232 else
8233 emit_insn (gen_mindf3 (operands[0], op0, op1));
8234 return 1;
8236 /* Check for max operation. */
8237 if (code == GT)
8239 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
8240 if (memory_operand (op0, VOIDmode))
8241 op0 = force_reg (GET_MODE (operands[0]), op0);
8242 if (GET_MODE (operands[0]) == SFmode)
8243 emit_insn (gen_maxsf3 (operands[0], op0, op1));
8244 else
8245 emit_insn (gen_maxdf3 (operands[0], op0, op1));
8246 return 1;
8249 /* Manage condition to be sse_comparison_operator. In case we are
8250 in non-ieee mode, try to canonicalize the destination operand
8251 to be first in the comparison - this helps reload to avoid extra
8252 moves. */
8253 if (!sse_comparison_operator (operands[1], VOIDmode)
8254 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
8256 rtx tmp = ix86_compare_op0;
8257 ix86_compare_op0 = ix86_compare_op1;
8258 ix86_compare_op1 = tmp;
8259 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
8260 VOIDmode, ix86_compare_op0,
8261 ix86_compare_op1);
8263 /* Similary try to manage result to be first operand of conditional
8264 move. We also don't support the NE comparison on SSE, so try to
8265 avoid it. */
8266 if ((rtx_equal_p (operands[0], operands[3])
8267 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
8268 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
8270 rtx tmp = operands[2];
8271 operands[2] = operands[3];
8272 operands[3] = tmp;
8273 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
8274 (GET_CODE (operands[1])),
8275 VOIDmode, ix86_compare_op0,
8276 ix86_compare_op1);
8278 if (GET_MODE (operands[0]) == SFmode)
8279 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
8280 operands[2], operands[3],
8281 ix86_compare_op0, ix86_compare_op1));
8282 else
8283 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
8284 operands[2], operands[3],
8285 ix86_compare_op0, ix86_compare_op1));
8286 return 1;
8289 /* The floating point conditional move instructions don't directly
8290 support conditions resulting from a signed integer comparison. */
8292 code = GET_CODE (operands[1]);
8293 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
8295 /* The floating point conditional move instructions don't directly
8296 support signed integer comparisons. */
8298 if (!fcmov_comparison_operator (compare_op, VOIDmode))
8300 if (second_test != NULL || bypass_test != NULL)
8301 abort();
8302 tmp = gen_reg_rtx (QImode);
8303 ix86_expand_setcc (code, tmp);
8304 code = NE;
8305 ix86_compare_op0 = tmp;
8306 ix86_compare_op1 = const0_rtx;
8307 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
8309 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
8311 tmp = gen_reg_rtx (GET_MODE (operands[0]));
8312 emit_move_insn (tmp, operands[3]);
8313 operands[3] = tmp;
8315 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
8317 tmp = gen_reg_rtx (GET_MODE (operands[0]));
8318 emit_move_insn (tmp, operands[2]);
8319 operands[2] = tmp;
8322 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
8323 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
8324 compare_op,
8325 operands[2],
8326 operands[3])));
8327 if (bypass_test)
8328 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
8329 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
8330 bypass_test,
8331 operands[3],
8332 operands[0])));
8333 if (second_test)
8334 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
8335 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
8336 second_test,
8337 operands[2],
8338 operands[0])));
8340 return 1;
8343 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
8344 works for floating pointer parameters and nonoffsetable memories.
8345 For pushes, it returns just stack offsets; the values will be saved
8346 in the right order. Maximally three parts are generated. */
8348 static int
8349 ix86_split_to_parts (operand, parts, mode)
8350 rtx operand;
8351 rtx *parts;
8352 enum machine_mode mode;
8354 int size;
8356 if (!TARGET_64BIT)
8357 size = mode == TFmode ? 3 : (GET_MODE_SIZE (mode) / 4);
8358 else
8359 size = (GET_MODE_SIZE (mode) + 4) / 8;
8361 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
8362 abort ();
8363 if (size < 2 || size > 3)
8364 abort ();
8366 /* Optimize constant pool reference to immediates. This is used by fp moves,
8367 that force all constants to memory to allow combining. */
8369 if (GET_CODE (operand) == MEM
8370 && GET_CODE (XEXP (operand, 0)) == SYMBOL_REF
8371 && CONSTANT_POOL_ADDRESS_P (XEXP (operand, 0)))
8372 operand = get_pool_constant (XEXP (operand, 0));
8374 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
8376 /* The only non-offsetable memories we handle are pushes. */
8377 if (! push_operand (operand, VOIDmode))
8378 abort ();
8380 operand = copy_rtx (operand);
8381 PUT_MODE (operand, Pmode);
8382 parts[0] = parts[1] = parts[2] = operand;
8384 else if (!TARGET_64BIT)
8386 if (mode == DImode)
8387 split_di (&operand, 1, &parts[0], &parts[1]);
8388 else
8390 if (REG_P (operand))
8392 if (!reload_completed)
8393 abort ();
8394 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
8395 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
8396 if (size == 3)
8397 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
8399 else if (offsettable_memref_p (operand))
8401 operand = adjust_address (operand, SImode, 0);
8402 parts[0] = operand;
8403 parts[1] = adjust_address (operand, SImode, 4);
8404 if (size == 3)
8405 parts[2] = adjust_address (operand, SImode, 8);
8407 else if (GET_CODE (operand) == CONST_DOUBLE)
8409 REAL_VALUE_TYPE r;
8410 long l[4];
8412 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
8413 switch (mode)
8415 case XFmode:
8416 case TFmode:
8417 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
8418 parts[2] = GEN_INT (trunc_int_for_mode (l[2], SImode));
8419 break;
8420 case DFmode:
8421 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
8422 break;
8423 default:
8424 abort ();
8426 parts[1] = GEN_INT (trunc_int_for_mode (l[1], SImode));
8427 parts[0] = GEN_INT (trunc_int_for_mode (l[0], SImode));
8429 else
8430 abort ();
8433 else
8435 if (mode == TImode)
8436 split_ti (&operand, 1, &parts[0], &parts[1]);
8437 if (mode == XFmode || mode == TFmode)
8439 if (REG_P (operand))
8441 if (!reload_completed)
8442 abort ();
8443 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
8444 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
8446 else if (offsettable_memref_p (operand))
8448 operand = adjust_address (operand, DImode, 0);
8449 parts[0] = operand;
8450 parts[1] = adjust_address (operand, SImode, 8);
8452 else if (GET_CODE (operand) == CONST_DOUBLE)
8454 REAL_VALUE_TYPE r;
8455 long l[3];
8457 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
8458 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
8459 /* Do not use shift by 32 to avoid warning on 32bit systems. */
8460 if (HOST_BITS_PER_WIDE_INT >= 64)
8461 parts[0]
8462 = GEN_INT (trunc_int_for_mode
8463 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
8464 + ((((HOST_WIDE_INT)l[1]) << 31) << 1),
8465 DImode));
8466 else
8467 parts[0] = immed_double_const (l[0], l[1], DImode);
8468 parts[1] = GEN_INT (trunc_int_for_mode (l[2], SImode));
8470 else
8471 abort ();
8475 return size;
8478 /* Emit insns to perform a move or push of DI, DF, and XF values.
8479 Return false when normal moves are needed; true when all required
8480 insns have been emitted. Operands 2-4 contain the input values
8481 int the correct order; operands 5-7 contain the output values. */
8483 void
8484 ix86_split_long_move (operands)
8485 rtx operands[];
8487 rtx part[2][3];
8488 int nparts;
8489 int push = 0;
8490 int collisions = 0;
8491 enum machine_mode mode = GET_MODE (operands[0]);
8493 /* The DFmode expanders may ask us to move double.
8494 For 64bit target this is single move. By hiding the fact
8495 here we simplify i386.md splitters. */
8496 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
8498 /* Optimize constant pool reference to immediates. This is used by
8499 fp moves, that force all constants to memory to allow combining. */
8501 if (GET_CODE (operands[1]) == MEM
8502 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
8503 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
8504 operands[1] = get_pool_constant (XEXP (operands[1], 0));
8505 if (push_operand (operands[0], VOIDmode))
8507 operands[0] = copy_rtx (operands[0]);
8508 PUT_MODE (operands[0], Pmode);
8510 else
8511 operands[0] = gen_lowpart (DImode, operands[0]);
8512 operands[1] = gen_lowpart (DImode, operands[1]);
8513 emit_move_insn (operands[0], operands[1]);
8514 return;
8517 /* The only non-offsettable memory we handle is push. */
8518 if (push_operand (operands[0], VOIDmode))
8519 push = 1;
8520 else if (GET_CODE (operands[0]) == MEM
8521 && ! offsettable_memref_p (operands[0]))
8522 abort ();
8524 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
8525 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
8527 /* When emitting push, take care for source operands on the stack. */
8528 if (push && GET_CODE (operands[1]) == MEM
8529 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
8531 if (nparts == 3)
8532 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
8533 XEXP (part[1][2], 0));
8534 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
8535 XEXP (part[1][1], 0));
8538 /* We need to do copy in the right order in case an address register
8539 of the source overlaps the destination. */
8540 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
8542 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
8543 collisions++;
8544 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
8545 collisions++;
8546 if (nparts == 3
8547 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
8548 collisions++;
8550 /* Collision in the middle part can be handled by reordering. */
8551 if (collisions == 1 && nparts == 3
8552 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
8554 rtx tmp;
8555 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
8556 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
8559 /* If there are more collisions, we can't handle it by reordering.
8560 Do an lea to the last part and use only one colliding move. */
8561 else if (collisions > 1)
8563 collisions = 1;
8564 emit_insn (gen_rtx_SET (VOIDmode, part[0][nparts - 1],
8565 XEXP (part[1][0], 0)));
8566 part[1][0] = change_address (part[1][0],
8567 TARGET_64BIT ? DImode : SImode,
8568 part[0][nparts - 1]);
8569 part[1][1] = adjust_address (part[1][0], VOIDmode, UNITS_PER_WORD);
8570 if (nparts == 3)
8571 part[1][2] = adjust_address (part[1][0], VOIDmode, 8);
8575 if (push)
8577 if (!TARGET_64BIT)
8579 if (nparts == 3)
8581 /* We use only first 12 bytes of TFmode value, but for pushing we
8582 are required to adjust stack as if we were pushing real 16byte
8583 value. */
8584 if (mode == TFmode && !TARGET_64BIT)
8585 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
8586 GEN_INT (-4)));
8587 emit_move_insn (part[0][2], part[1][2]);
8590 else
8592 /* In 64bit mode we don't have 32bit push available. In case this is
8593 register, it is OK - we will just use larger counterpart. We also
8594 retype memory - these comes from attempt to avoid REX prefix on
8595 moving of second half of TFmode value. */
8596 if (GET_MODE (part[1][1]) == SImode)
8598 if (GET_CODE (part[1][1]) == MEM)
8599 part[1][1] = adjust_address (part[1][1], DImode, 0);
8600 else if (REG_P (part[1][1]))
8601 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
8602 else
8603 abort();
8604 if (GET_MODE (part[1][0]) == SImode)
8605 part[1][0] = part[1][1];
8608 emit_move_insn (part[0][1], part[1][1]);
8609 emit_move_insn (part[0][0], part[1][0]);
8610 return;
8613 /* Choose correct order to not overwrite the source before it is copied. */
8614 if ((REG_P (part[0][0])
8615 && REG_P (part[1][1])
8616 && (REGNO (part[0][0]) == REGNO (part[1][1])
8617 || (nparts == 3
8618 && REGNO (part[0][0]) == REGNO (part[1][2]))))
8619 || (collisions > 0
8620 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
8622 if (nparts == 3)
8624 operands[2] = part[0][2];
8625 operands[3] = part[0][1];
8626 operands[4] = part[0][0];
8627 operands[5] = part[1][2];
8628 operands[6] = part[1][1];
8629 operands[7] = part[1][0];
8631 else
8633 operands[2] = part[0][1];
8634 operands[3] = part[0][0];
8635 operands[5] = part[1][1];
8636 operands[6] = part[1][0];
8639 else
8641 if (nparts == 3)
8643 operands[2] = part[0][0];
8644 operands[3] = part[0][1];
8645 operands[4] = part[0][2];
8646 operands[5] = part[1][0];
8647 operands[6] = part[1][1];
8648 operands[7] = part[1][2];
8650 else
8652 operands[2] = part[0][0];
8653 operands[3] = part[0][1];
8654 operands[5] = part[1][0];
8655 operands[6] = part[1][1];
8658 emit_move_insn (operands[2], operands[5]);
8659 emit_move_insn (operands[3], operands[6]);
8660 if (nparts == 3)
8661 emit_move_insn (operands[4], operands[7]);
8663 return;
8666 void
8667 ix86_split_ashldi (operands, scratch)
8668 rtx *operands, scratch;
8670 rtx low[2], high[2];
8671 int count;
8673 if (GET_CODE (operands[2]) == CONST_INT)
8675 split_di (operands, 2, low, high);
8676 count = INTVAL (operands[2]) & 63;
8678 if (count >= 32)
8680 emit_move_insn (high[0], low[1]);
8681 emit_move_insn (low[0], const0_rtx);
8683 if (count > 32)
8684 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
8686 else
8688 if (!rtx_equal_p (operands[0], operands[1]))
8689 emit_move_insn (operands[0], operands[1]);
8690 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
8691 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
8694 else
8696 if (!rtx_equal_p (operands[0], operands[1]))
8697 emit_move_insn (operands[0], operands[1]);
8699 split_di (operands, 1, low, high);
8701 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
8702 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
8704 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
8706 if (! no_new_pseudos)
8707 scratch = force_reg (SImode, const0_rtx);
8708 else
8709 emit_move_insn (scratch, const0_rtx);
8711 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
8712 scratch));
8714 else
8715 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
8719 void
8720 ix86_split_ashrdi (operands, scratch)
8721 rtx *operands, scratch;
8723 rtx low[2], high[2];
8724 int count;
8726 if (GET_CODE (operands[2]) == CONST_INT)
8728 split_di (operands, 2, low, high);
8729 count = INTVAL (operands[2]) & 63;
8731 if (count >= 32)
8733 emit_move_insn (low[0], high[1]);
8735 if (! reload_completed)
8736 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
8737 else
8739 emit_move_insn (high[0], low[0]);
8740 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
8743 if (count > 32)
8744 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
8746 else
8748 if (!rtx_equal_p (operands[0], operands[1]))
8749 emit_move_insn (operands[0], operands[1]);
8750 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
8751 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
8754 else
8756 if (!rtx_equal_p (operands[0], operands[1]))
8757 emit_move_insn (operands[0], operands[1]);
8759 split_di (operands, 1, low, high);
8761 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
8762 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
8764 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
8766 if (! no_new_pseudos)
8767 scratch = gen_reg_rtx (SImode);
8768 emit_move_insn (scratch, high[0]);
8769 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
8770 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
8771 scratch));
8773 else
8774 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
8778 void
8779 ix86_split_lshrdi (operands, scratch)
8780 rtx *operands, scratch;
8782 rtx low[2], high[2];
8783 int count;
8785 if (GET_CODE (operands[2]) == CONST_INT)
8787 split_di (operands, 2, low, high);
8788 count = INTVAL (operands[2]) & 63;
8790 if (count >= 32)
8792 emit_move_insn (low[0], high[1]);
8793 emit_move_insn (high[0], const0_rtx);
8795 if (count > 32)
8796 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
8798 else
8800 if (!rtx_equal_p (operands[0], operands[1]))
8801 emit_move_insn (operands[0], operands[1]);
8802 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
8803 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
8806 else
8808 if (!rtx_equal_p (operands[0], operands[1]))
8809 emit_move_insn (operands[0], operands[1]);
8811 split_di (operands, 1, low, high);
8813 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
8814 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
8816 /* Heh. By reversing the arguments, we can reuse this pattern. */
8817 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
8819 if (! no_new_pseudos)
8820 scratch = force_reg (SImode, const0_rtx);
8821 else
8822 emit_move_insn (scratch, const0_rtx);
8824 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
8825 scratch));
8827 else
8828 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
8832 /* Helper function for the string operations below. Dest VARIABLE whether
8833 it is aligned to VALUE bytes. If true, jump to the label. */
8834 static rtx
8835 ix86_expand_aligntest (variable, value)
8836 rtx variable;
8837 int value;
8839 rtx label = gen_label_rtx ();
8840 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
8841 if (GET_MODE (variable) == DImode)
8842 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
8843 else
8844 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
8845 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
8846 1, label);
8847 return label;
8850 /* Adjust COUNTER by the VALUE. */
8851 static void
8852 ix86_adjust_counter (countreg, value)
8853 rtx countreg;
8854 HOST_WIDE_INT value;
8856 if (GET_MODE (countreg) == DImode)
8857 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
8858 else
8859 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
8862 /* Zero extend possibly SImode EXP to Pmode register. */
8864 ix86_zero_extend_to_Pmode (exp)
8865 rtx exp;
8867 rtx r;
8868 if (GET_MODE (exp) == VOIDmode)
8869 return force_reg (Pmode, exp);
8870 if (GET_MODE (exp) == Pmode)
8871 return copy_to_mode_reg (Pmode, exp);
8872 r = gen_reg_rtx (Pmode);
8873 emit_insn (gen_zero_extendsidi2 (r, exp));
8874 return r;
8877 /* Expand string move (memcpy) operation. Use i386 string operations when
8878 profitable. expand_clrstr contains similar code. */
8880 ix86_expand_movstr (dst, src, count_exp, align_exp)
8881 rtx dst, src, count_exp, align_exp;
8883 rtx srcreg, destreg, countreg;
8884 enum machine_mode counter_mode;
8885 HOST_WIDE_INT align = 0;
8886 unsigned HOST_WIDE_INT count = 0;
8887 rtx insns;
8889 start_sequence ();
8891 if (GET_CODE (align_exp) == CONST_INT)
8892 align = INTVAL (align_exp);
8894 /* This simple hack avoids all inlining code and simplifies code below. */
8895 if (!TARGET_ALIGN_STRINGOPS)
8896 align = 64;
8898 if (GET_CODE (count_exp) == CONST_INT)
8899 count = INTVAL (count_exp);
8901 /* Figure out proper mode for counter. For 32bits it is always SImode,
8902 for 64bits use SImode when possible, otherwise DImode.
8903 Set count to number of bytes copied when known at compile time. */
8904 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
8905 || x86_64_zero_extended_value (count_exp))
8906 counter_mode = SImode;
8907 else
8908 counter_mode = DImode;
8910 if (counter_mode != SImode && counter_mode != DImode)
8911 abort ();
8913 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
8914 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
8916 emit_insn (gen_cld ());
8918 /* When optimizing for size emit simple rep ; movsb instruction for
8919 counts not divisible by 4. */
8921 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
8923 countreg = ix86_zero_extend_to_Pmode (count_exp);
8924 if (TARGET_64BIT)
8925 emit_insn (gen_rep_movqi_rex64 (destreg, srcreg, countreg,
8926 destreg, srcreg, countreg));
8927 else
8928 emit_insn (gen_rep_movqi (destreg, srcreg, countreg,
8929 destreg, srcreg, countreg));
8932 /* For constant aligned (or small unaligned) copies use rep movsl
8933 followed by code copying the rest. For PentiumPro ensure 8 byte
8934 alignment to allow rep movsl acceleration. */
8936 else if (count != 0
8937 && (align >= 8
8938 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
8939 || optimize_size || count < (unsigned int)64))
8941 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
8942 if (count & ~(size - 1))
8944 countreg = copy_to_mode_reg (counter_mode,
8945 GEN_INT ((count >> (size == 4 ? 2 : 3))
8946 & (TARGET_64BIT ? -1 : 0x3fffffff)));
8947 countreg = ix86_zero_extend_to_Pmode (countreg);
8948 if (size == 4)
8950 if (TARGET_64BIT)
8951 emit_insn (gen_rep_movsi_rex64 (destreg, srcreg, countreg,
8952 destreg, srcreg, countreg));
8953 else
8954 emit_insn (gen_rep_movsi (destreg, srcreg, countreg,
8955 destreg, srcreg, countreg));
8957 else
8958 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg,
8959 destreg, srcreg, countreg));
8961 if (size == 8 && (count & 0x04))
8962 emit_insn (gen_strmovsi (destreg, srcreg));
8963 if (count & 0x02)
8964 emit_insn (gen_strmovhi (destreg, srcreg));
8965 if (count & 0x01)
8966 emit_insn (gen_strmovqi (destreg, srcreg));
8968 /* The generic code based on the glibc implementation:
8969 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
8970 allowing accelerated copying there)
8971 - copy the data using rep movsl
8972 - copy the rest. */
8973 else
8975 rtx countreg2;
8976 rtx label = NULL;
8978 /* In case we don't know anything about the alignment, default to
8979 library version, since it is usually equally fast and result in
8980 shorter code. */
8981 if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD)
8983 end_sequence ();
8984 return 0;
8987 if (TARGET_SINGLE_STRINGOP)
8988 emit_insn (gen_cld ());
8990 countreg2 = gen_reg_rtx (Pmode);
8991 countreg = copy_to_mode_reg (counter_mode, count_exp);
8993 /* We don't use loops to align destination and to copy parts smaller
8994 than 4 bytes, because gcc is able to optimize such code better (in
8995 the case the destination or the count really is aligned, gcc is often
8996 able to predict the branches) and also it is friendlier to the
8997 hardware branch prediction.
8999 Using loops is benefical for generic case, because we can
9000 handle small counts using the loops. Many CPUs (such as Athlon)
9001 have large REP prefix setup costs.
9003 This is quite costy. Maybe we can revisit this decision later or
9004 add some customizability to this code. */
9006 if (count == 0
9007 && align < (TARGET_PENTIUMPRO && (count == 0
9008 || count >= (unsigned int)260)
9009 ? 8 : UNITS_PER_WORD))
9011 label = gen_label_rtx ();
9012 emit_cmp_and_jump_insns (countreg, GEN_INT (UNITS_PER_WORD - 1),
9013 LEU, 0, counter_mode, 1, label);
9015 if (align <= 1)
9017 rtx label = ix86_expand_aligntest (destreg, 1);
9018 emit_insn (gen_strmovqi (destreg, srcreg));
9019 ix86_adjust_counter (countreg, 1);
9020 emit_label (label);
9021 LABEL_NUSES (label) = 1;
9023 if (align <= 2)
9025 rtx label = ix86_expand_aligntest (destreg, 2);
9026 emit_insn (gen_strmovhi (destreg, srcreg));
9027 ix86_adjust_counter (countreg, 2);
9028 emit_label (label);
9029 LABEL_NUSES (label) = 1;
9031 if (align <= 4
9032 && ((TARGET_PENTIUMPRO && (count == 0
9033 || count >= (unsigned int)260))
9034 || TARGET_64BIT))
9036 rtx label = ix86_expand_aligntest (destreg, 4);
9037 emit_insn (gen_strmovsi (destreg, srcreg));
9038 ix86_adjust_counter (countreg, 4);
9039 emit_label (label);
9040 LABEL_NUSES (label) = 1;
9043 if (!TARGET_SINGLE_STRINGOP)
9044 emit_insn (gen_cld ());
9045 if (TARGET_64BIT)
9047 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
9048 GEN_INT (3)));
9049 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg2,
9050 destreg, srcreg, countreg2));
9052 else
9054 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
9055 emit_insn (gen_rep_movsi (destreg, srcreg, countreg2,
9056 destreg, srcreg, countreg2));
9059 if (label)
9061 emit_label (label);
9062 LABEL_NUSES (label) = 1;
9064 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
9065 emit_insn (gen_strmovsi (destreg, srcreg));
9066 if ((align <= 4 || count == 0) && TARGET_64BIT)
9068 rtx label = ix86_expand_aligntest (countreg, 4);
9069 emit_insn (gen_strmovsi (destreg, srcreg));
9070 emit_label (label);
9071 LABEL_NUSES (label) = 1;
9073 if (align > 2 && count != 0 && (count & 2))
9074 emit_insn (gen_strmovhi (destreg, srcreg));
9075 if (align <= 2 || count == 0)
9077 rtx label = ix86_expand_aligntest (countreg, 2);
9078 emit_insn (gen_strmovhi (destreg, srcreg));
9079 emit_label (label);
9080 LABEL_NUSES (label) = 1;
9082 if (align > 1 && count != 0 && (count & 1))
9083 emit_insn (gen_strmovqi (destreg, srcreg));
9084 if (align <= 1 || count == 0)
9086 rtx label = ix86_expand_aligntest (countreg, 1);
9087 emit_insn (gen_strmovqi (destreg, srcreg));
9088 emit_label (label);
9089 LABEL_NUSES (label) = 1;
9093 insns = get_insns ();
9094 end_sequence ();
9096 ix86_set_move_mem_attrs (insns, dst, src, destreg, srcreg);
9097 emit_insns (insns);
9098 return 1;
9101 /* Expand string clear operation (bzero). Use i386 string operations when
9102 profitable. expand_movstr contains similar code. */
9104 ix86_expand_clrstr (src, count_exp, align_exp)
9105 rtx src, count_exp, align_exp;
9107 rtx destreg, zeroreg, countreg;
9108 enum machine_mode counter_mode;
9109 HOST_WIDE_INT align = 0;
9110 unsigned HOST_WIDE_INT count = 0;
9112 if (GET_CODE (align_exp) == CONST_INT)
9113 align = INTVAL (align_exp);
9115 /* This simple hack avoids all inlining code and simplifies code below. */
9116 if (!TARGET_ALIGN_STRINGOPS)
9117 align = 32;
9119 if (GET_CODE (count_exp) == CONST_INT)
9120 count = INTVAL (count_exp);
9121 /* Figure out proper mode for counter. For 32bits it is always SImode,
9122 for 64bits use SImode when possible, otherwise DImode.
9123 Set count to number of bytes copied when known at compile time. */
9124 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
9125 || x86_64_zero_extended_value (count_exp))
9126 counter_mode = SImode;
9127 else
9128 counter_mode = DImode;
9130 destreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
9132 emit_insn (gen_cld ());
9134 /* When optimizing for size emit simple rep ; movsb instruction for
9135 counts not divisible by 4. */
9137 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
9139 countreg = ix86_zero_extend_to_Pmode (count_exp);
9140 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
9141 if (TARGET_64BIT)
9142 emit_insn (gen_rep_stosqi_rex64 (destreg, countreg, zeroreg,
9143 destreg, countreg));
9144 else
9145 emit_insn (gen_rep_stosqi (destreg, countreg, zeroreg,
9146 destreg, countreg));
9148 else if (count != 0
9149 && (align >= 8
9150 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
9151 || optimize_size || count < (unsigned int)64))
9153 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
9154 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
9155 if (count & ~(size - 1))
9157 countreg = copy_to_mode_reg (counter_mode,
9158 GEN_INT ((count >> (size == 4 ? 2 : 3))
9159 & (TARGET_64BIT ? -1 : 0x3fffffff)));
9160 countreg = ix86_zero_extend_to_Pmode (countreg);
9161 if (size == 4)
9163 if (TARGET_64BIT)
9164 emit_insn (gen_rep_stossi_rex64 (destreg, countreg, zeroreg,
9165 destreg, countreg));
9166 else
9167 emit_insn (gen_rep_stossi (destreg, countreg, zeroreg,
9168 destreg, countreg));
9170 else
9171 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg, zeroreg,
9172 destreg, countreg));
9174 if (size == 8 && (count & 0x04))
9175 emit_insn (gen_strsetsi (destreg,
9176 gen_rtx_SUBREG (SImode, zeroreg, 0)));
9177 if (count & 0x02)
9178 emit_insn (gen_strsethi (destreg,
9179 gen_rtx_SUBREG (HImode, zeroreg, 0)));
9180 if (count & 0x01)
9181 emit_insn (gen_strsetqi (destreg,
9182 gen_rtx_SUBREG (QImode, zeroreg, 0)));
9184 else
9186 rtx countreg2;
9187 rtx label = NULL;
9189 /* In case we don't know anything about the alignment, default to
9190 library version, since it is usually equally fast and result in
9191 shorter code. */
9192 if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD)
9193 return 0;
9195 if (TARGET_SINGLE_STRINGOP)
9196 emit_insn (gen_cld ());
9198 countreg2 = gen_reg_rtx (Pmode);
9199 countreg = copy_to_mode_reg (counter_mode, count_exp);
9200 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
9202 if (count == 0
9203 && align < (TARGET_PENTIUMPRO && (count == 0
9204 || count >= (unsigned int)260)
9205 ? 8 : UNITS_PER_WORD))
9207 label = gen_label_rtx ();
9208 emit_cmp_and_jump_insns (countreg, GEN_INT (UNITS_PER_WORD - 1),
9209 LEU, 0, counter_mode, 1, label);
9211 if (align <= 1)
9213 rtx label = ix86_expand_aligntest (destreg, 1);
9214 emit_insn (gen_strsetqi (destreg,
9215 gen_rtx_SUBREG (QImode, zeroreg, 0)));
9216 ix86_adjust_counter (countreg, 1);
9217 emit_label (label);
9218 LABEL_NUSES (label) = 1;
9220 if (align <= 2)
9222 rtx label = ix86_expand_aligntest (destreg, 2);
9223 emit_insn (gen_strsethi (destreg,
9224 gen_rtx_SUBREG (HImode, zeroreg, 0)));
9225 ix86_adjust_counter (countreg, 2);
9226 emit_label (label);
9227 LABEL_NUSES (label) = 1;
9229 if (align <= 4 && TARGET_PENTIUMPRO && (count == 0
9230 || count >= (unsigned int)260))
9232 rtx label = ix86_expand_aligntest (destreg, 4);
9233 emit_insn (gen_strsetsi (destreg, (TARGET_64BIT
9234 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
9235 : zeroreg)));
9236 ix86_adjust_counter (countreg, 4);
9237 emit_label (label);
9238 LABEL_NUSES (label) = 1;
9241 if (!TARGET_SINGLE_STRINGOP)
9242 emit_insn (gen_cld ());
9243 if (TARGET_64BIT)
9245 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
9246 GEN_INT (3)));
9247 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg2, zeroreg,
9248 destreg, countreg2));
9250 else
9252 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
9253 emit_insn (gen_rep_stossi (destreg, countreg2, zeroreg,
9254 destreg, countreg2));
9257 if (label)
9259 emit_label (label);
9260 LABEL_NUSES (label) = 1;
9262 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
9263 emit_insn (gen_strsetsi (destreg,
9264 gen_rtx_SUBREG (SImode, zeroreg, 0)));
9265 if (TARGET_64BIT && (align <= 4 || count == 0))
9267 rtx label = ix86_expand_aligntest (destreg, 2);
9268 emit_insn (gen_strsetsi (destreg,
9269 gen_rtx_SUBREG (SImode, zeroreg, 0)));
9270 emit_label (label);
9271 LABEL_NUSES (label) = 1;
9273 if (align > 2 && count != 0 && (count & 2))
9274 emit_insn (gen_strsethi (destreg,
9275 gen_rtx_SUBREG (HImode, zeroreg, 0)));
9276 if (align <= 2 || count == 0)
9278 rtx label = ix86_expand_aligntest (destreg, 2);
9279 emit_insn (gen_strsethi (destreg,
9280 gen_rtx_SUBREG (HImode, zeroreg, 0)));
9281 emit_label (label);
9282 LABEL_NUSES (label) = 1;
9284 if (align > 1 && count != 0 && (count & 1))
9285 emit_insn (gen_strsetqi (destreg,
9286 gen_rtx_SUBREG (QImode, zeroreg, 0)));
9287 if (align <= 1 || count == 0)
9289 rtx label = ix86_expand_aligntest (destreg, 1);
9290 emit_insn (gen_strsetqi (destreg,
9291 gen_rtx_SUBREG (QImode, zeroreg, 0)));
9292 emit_label (label);
9293 LABEL_NUSES (label) = 1;
9296 return 1;
9298 /* Expand strlen. */
9300 ix86_expand_strlen (out, src, eoschar, align)
9301 rtx out, src, eoschar, align;
9303 rtx addr, scratch1, scratch2, scratch3, scratch4;
9305 /* The generic case of strlen expander is long. Avoid it's
9306 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
9308 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
9309 && !TARGET_INLINE_ALL_STRINGOPS
9310 && !optimize_size
9311 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
9312 return 0;
9314 addr = force_reg (Pmode, XEXP (src, 0));
9315 scratch1 = gen_reg_rtx (Pmode);
9317 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
9318 && !optimize_size)
9320 /* Well it seems that some optimizer does not combine a call like
9321 foo(strlen(bar), strlen(bar));
9322 when the move and the subtraction is done here. It does calculate
9323 the length just once when these instructions are done inside of
9324 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
9325 often used and I use one fewer register for the lifetime of
9326 output_strlen_unroll() this is better. */
9328 emit_move_insn (out, addr);
9330 ix86_expand_strlensi_unroll_1 (out, align);
9332 /* strlensi_unroll_1 returns the address of the zero at the end of
9333 the string, like memchr(), so compute the length by subtracting
9334 the start address. */
9335 if (TARGET_64BIT)
9336 emit_insn (gen_subdi3 (out, out, addr));
9337 else
9338 emit_insn (gen_subsi3 (out, out, addr));
9340 else
9342 scratch2 = gen_reg_rtx (Pmode);
9343 scratch3 = gen_reg_rtx (Pmode);
9344 scratch4 = force_reg (Pmode, constm1_rtx);
9346 emit_move_insn (scratch3, addr);
9347 eoschar = force_reg (QImode, eoschar);
9349 emit_insn (gen_cld ());
9350 if (TARGET_64BIT)
9352 emit_insn (gen_strlenqi_rex_1 (scratch1, scratch3, eoschar,
9353 align, scratch4, scratch3));
9354 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
9355 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
9357 else
9359 emit_insn (gen_strlenqi_1 (scratch1, scratch3, eoschar,
9360 align, scratch4, scratch3));
9361 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
9362 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
9365 return 1;
9368 /* Expand the appropriate insns for doing strlen if not just doing
9369 repnz; scasb
9371 out = result, initialized with the start address
9372 align_rtx = alignment of the address.
9373 scratch = scratch register, initialized with the startaddress when
9374 not aligned, otherwise undefined
9376 This is just the body. It needs the initialisations mentioned above and
9377 some address computing at the end. These things are done in i386.md. */
9379 static void
9380 ix86_expand_strlensi_unroll_1 (out, align_rtx)
9381 rtx out, align_rtx;
9383 int align;
9384 rtx tmp;
9385 rtx align_2_label = NULL_RTX;
9386 rtx align_3_label = NULL_RTX;
9387 rtx align_4_label = gen_label_rtx ();
9388 rtx end_0_label = gen_label_rtx ();
9389 rtx mem;
9390 rtx tmpreg = gen_reg_rtx (SImode);
9391 rtx scratch = gen_reg_rtx (SImode);
9393 align = 0;
9394 if (GET_CODE (align_rtx) == CONST_INT)
9395 align = INTVAL (align_rtx);
9397 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
9399 /* Is there a known alignment and is it less than 4? */
9400 if (align < 4)
9402 rtx scratch1 = gen_reg_rtx (Pmode);
9403 emit_move_insn (scratch1, out);
9404 /* Is there a known alignment and is it not 2? */
9405 if (align != 2)
9407 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
9408 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
9410 /* Leave just the 3 lower bits. */
9411 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
9412 NULL_RTX, 0, OPTAB_WIDEN);
9414 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
9415 Pmode, 1, align_4_label);
9416 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL,
9417 Pmode, 1, align_2_label);
9418 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL,
9419 Pmode, 1, align_3_label);
9421 else
9423 /* Since the alignment is 2, we have to check 2 or 0 bytes;
9424 check if is aligned to 4 - byte. */
9426 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (2),
9427 NULL_RTX, 0, OPTAB_WIDEN);
9429 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
9430 Pmode, 1, align_4_label);
9433 mem = gen_rtx_MEM (QImode, out);
9435 /* Now compare the bytes. */
9437 /* Compare the first n unaligned byte on a byte per byte basis. */
9438 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
9439 QImode, 1, end_0_label);
9441 /* Increment the address. */
9442 if (TARGET_64BIT)
9443 emit_insn (gen_adddi3 (out, out, const1_rtx));
9444 else
9445 emit_insn (gen_addsi3 (out, out, const1_rtx));
9447 /* Not needed with an alignment of 2 */
9448 if (align != 2)
9450 emit_label (align_2_label);
9452 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
9453 end_0_label);
9455 if (TARGET_64BIT)
9456 emit_insn (gen_adddi3 (out, out, const1_rtx));
9457 else
9458 emit_insn (gen_addsi3 (out, out, const1_rtx));
9460 emit_label (align_3_label);
9463 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
9464 end_0_label);
9466 if (TARGET_64BIT)
9467 emit_insn (gen_adddi3 (out, out, const1_rtx));
9468 else
9469 emit_insn (gen_addsi3 (out, out, const1_rtx));
9472 /* Generate loop to check 4 bytes at a time. It is not a good idea to
9473 align this loop. It gives only huge programs, but does not help to
9474 speed up. */
9475 emit_label (align_4_label);
9477 mem = gen_rtx_MEM (SImode, out);
9478 emit_move_insn (scratch, mem);
9479 if (TARGET_64BIT)
9480 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
9481 else
9482 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
9484 /* This formula yields a nonzero result iff one of the bytes is zero.
9485 This saves three branches inside loop and many cycles. */
9487 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
9488 emit_insn (gen_one_cmplsi2 (scratch, scratch));
9489 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
9490 emit_insn (gen_andsi3 (tmpreg, tmpreg,
9491 GEN_INT (trunc_int_for_mode
9492 (0x80808080, SImode))));
9493 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
9494 align_4_label);
9496 if (TARGET_CMOVE)
9498 rtx reg = gen_reg_rtx (SImode);
9499 rtx reg2 = gen_reg_rtx (Pmode);
9500 emit_move_insn (reg, tmpreg);
9501 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
9503 /* If zero is not in the first two bytes, move two bytes forward. */
9504 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
9505 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
9506 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
9507 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
9508 gen_rtx_IF_THEN_ELSE (SImode, tmp,
9509 reg,
9510 tmpreg)));
9511 /* Emit lea manually to avoid clobbering of flags. */
9512 emit_insn (gen_rtx_SET (SImode, reg2,
9513 gen_rtx_PLUS (Pmode, out, GEN_INT (2))));
9515 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
9516 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
9517 emit_insn (gen_rtx_SET (VOIDmode, out,
9518 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
9519 reg2,
9520 out)));
9523 else
9525 rtx end_2_label = gen_label_rtx ();
9526 /* Is zero in the first two bytes? */
9528 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
9529 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
9530 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
9531 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9532 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
9533 pc_rtx);
9534 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
9535 JUMP_LABEL (tmp) = end_2_label;
9537 /* Not in the first two. Move two bytes forward. */
9538 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
9539 if (TARGET_64BIT)
9540 emit_insn (gen_adddi3 (out, out, GEN_INT (2)));
9541 else
9542 emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
9544 emit_label (end_2_label);
9548 /* Avoid branch in fixing the byte. */
9549 tmpreg = gen_lowpart (QImode, tmpreg);
9550 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
9551 if (TARGET_64BIT)
9552 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3)));
9553 else
9554 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3)));
9556 emit_label (end_0_label);
9559 /* Clear stack slot assignments remembered from previous functions.
9560 This is called from INIT_EXPANDERS once before RTL is emitted for each
9561 function. */
9563 static void
9564 ix86_init_machine_status (p)
9565 struct function *p;
9567 p->machine = (struct machine_function *)
9568 xcalloc (1, sizeof (struct machine_function));
9571 /* Mark machine specific bits of P for GC. */
9572 static void
9573 ix86_mark_machine_status (p)
9574 struct function *p;
9576 struct machine_function *machine = p->machine;
9577 enum machine_mode mode;
9578 int n;
9580 if (! machine)
9581 return;
9583 for (mode = VOIDmode; (int) mode < (int) MAX_MACHINE_MODE;
9584 mode = (enum machine_mode) ((int) mode + 1))
9585 for (n = 0; n < MAX_386_STACK_LOCALS; n++)
9586 ggc_mark_rtx (machine->stack_locals[(int) mode][n]);
9589 static void
9590 ix86_free_machine_status (p)
9591 struct function *p;
9593 free (p->machine);
9594 p->machine = NULL;
9597 /* Return a MEM corresponding to a stack slot with mode MODE.
9598 Allocate a new slot if necessary.
9600 The RTL for a function can have several slots available: N is
9601 which slot to use. */
9604 assign_386_stack_local (mode, n)
9605 enum machine_mode mode;
9606 int n;
9608 if (n < 0 || n >= MAX_386_STACK_LOCALS)
9609 abort ();
9611 if (ix86_stack_locals[(int) mode][n] == NULL_RTX)
9612 ix86_stack_locals[(int) mode][n]
9613 = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
9615 return ix86_stack_locals[(int) mode][n];
9618 /* Calculate the length of the memory address in the instruction
9619 encoding. Does not include the one-byte modrm, opcode, or prefix. */
9621 static int
9622 memory_address_length (addr)
9623 rtx addr;
9625 struct ix86_address parts;
9626 rtx base, index, disp;
9627 int len;
9629 if (GET_CODE (addr) == PRE_DEC
9630 || GET_CODE (addr) == POST_INC
9631 || GET_CODE (addr) == PRE_MODIFY
9632 || GET_CODE (addr) == POST_MODIFY)
9633 return 0;
9635 if (! ix86_decompose_address (addr, &parts))
9636 abort ();
9638 base = parts.base;
9639 index = parts.index;
9640 disp = parts.disp;
9641 len = 0;
9643 /* Register Indirect. */
9644 if (base && !index && !disp)
9646 /* Special cases: ebp and esp need the two-byte modrm form. */
9647 if (addr == stack_pointer_rtx
9648 || addr == arg_pointer_rtx
9649 || addr == frame_pointer_rtx
9650 || addr == hard_frame_pointer_rtx)
9651 len = 1;
9654 /* Direct Addressing. */
9655 else if (disp && !base && !index)
9656 len = 4;
9658 else
9660 /* Find the length of the displacement constant. */
9661 if (disp)
9663 if (GET_CODE (disp) == CONST_INT
9664 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K'))
9665 len = 1;
9666 else
9667 len = 4;
9670 /* An index requires the two-byte modrm form. */
9671 if (index)
9672 len += 1;
9675 return len;
9678 /* Compute default value for "length_immediate" attribute. When SHORTFORM is set
9679 expect that insn have 8bit immediate alternative. */
9681 ix86_attr_length_immediate_default (insn, shortform)
9682 rtx insn;
9683 int shortform;
9685 int len = 0;
9686 int i;
9687 extract_insn_cached (insn);
9688 for (i = recog_data.n_operands - 1; i >= 0; --i)
9689 if (CONSTANT_P (recog_data.operand[i]))
9691 if (len)
9692 abort ();
9693 if (shortform
9694 && GET_CODE (recog_data.operand[i]) == CONST_INT
9695 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
9696 len = 1;
9697 else
9699 switch (get_attr_mode (insn))
9701 case MODE_QI:
9702 len+=1;
9703 break;
9704 case MODE_HI:
9705 len+=2;
9706 break;
9707 case MODE_SI:
9708 len+=4;
9709 break;
9710 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
9711 case MODE_DI:
9712 len+=4;
9713 break;
9714 default:
9715 fatal_insn ("unknown insn mode", insn);
9719 return len;
9721 /* Compute default value for "length_address" attribute. */
9723 ix86_attr_length_address_default (insn)
9724 rtx insn;
9726 int i;
9727 extract_insn_cached (insn);
9728 for (i = recog_data.n_operands - 1; i >= 0; --i)
9729 if (GET_CODE (recog_data.operand[i]) == MEM)
9731 return memory_address_length (XEXP (recog_data.operand[i], 0));
9732 break;
9734 return 0;
9737 /* Return the maximum number of instructions a cpu can issue. */
9739 static int
9740 ix86_issue_rate ()
9742 switch (ix86_cpu)
9744 case PROCESSOR_PENTIUM:
9745 case PROCESSOR_K6:
9746 return 2;
9748 case PROCESSOR_PENTIUMPRO:
9749 case PROCESSOR_PENTIUM4:
9750 case PROCESSOR_ATHLON:
9751 return 3;
9753 default:
9754 return 1;
9758 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
9759 by DEP_INSN and nothing set by DEP_INSN. */
9761 static int
9762 ix86_flags_dependant (insn, dep_insn, insn_type)
9763 rtx insn, dep_insn;
9764 enum attr_type insn_type;
9766 rtx set, set2;
9768 /* Simplify the test for uninteresting insns. */
9769 if (insn_type != TYPE_SETCC
9770 && insn_type != TYPE_ICMOV
9771 && insn_type != TYPE_FCMOV
9772 && insn_type != TYPE_IBR)
9773 return 0;
9775 if ((set = single_set (dep_insn)) != 0)
9777 set = SET_DEST (set);
9778 set2 = NULL_RTX;
9780 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
9781 && XVECLEN (PATTERN (dep_insn), 0) == 2
9782 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
9783 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
9785 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
9786 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
9788 else
9789 return 0;
9791 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
9792 return 0;
9794 /* This test is true if the dependent insn reads the flags but
9795 not any other potentially set register. */
9796 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
9797 return 0;
9799 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
9800 return 0;
9802 return 1;
9805 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
9806 address with operands set by DEP_INSN. */
9808 static int
9809 ix86_agi_dependant (insn, dep_insn, insn_type)
9810 rtx insn, dep_insn;
9811 enum attr_type insn_type;
9813 rtx addr;
9815 if (insn_type == TYPE_LEA
9816 && TARGET_PENTIUM)
9818 addr = PATTERN (insn);
9819 if (GET_CODE (addr) == SET)
9821 else if (GET_CODE (addr) == PARALLEL
9822 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
9823 addr = XVECEXP (addr, 0, 0);
9824 else
9825 abort ();
9826 addr = SET_SRC (addr);
9828 else
9830 int i;
9831 extract_insn_cached (insn);
9832 for (i = recog_data.n_operands - 1; i >= 0; --i)
9833 if (GET_CODE (recog_data.operand[i]) == MEM)
9835 addr = XEXP (recog_data.operand[i], 0);
9836 goto found;
9838 return 0;
9839 found:;
9842 return modified_in_p (addr, dep_insn);
9845 static int
9846 ix86_adjust_cost (insn, link, dep_insn, cost)
9847 rtx insn, link, dep_insn;
9848 int cost;
9850 enum attr_type insn_type, dep_insn_type;
9851 enum attr_memory memory, dep_memory;
9852 rtx set, set2;
9853 int dep_insn_code_number;
9855 /* Anti and output depenancies have zero cost on all CPUs. */
9856 if (REG_NOTE_KIND (link) != 0)
9857 return 0;
9859 dep_insn_code_number = recog_memoized (dep_insn);
9861 /* If we can't recognize the insns, we can't really do anything. */
9862 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
9863 return cost;
9865 insn_type = get_attr_type (insn);
9866 dep_insn_type = get_attr_type (dep_insn);
9868 switch (ix86_cpu)
9870 case PROCESSOR_PENTIUM:
9871 /* Address Generation Interlock adds a cycle of latency. */
9872 if (ix86_agi_dependant (insn, dep_insn, insn_type))
9873 cost += 1;
9875 /* ??? Compares pair with jump/setcc. */
9876 if (ix86_flags_dependant (insn, dep_insn, insn_type))
9877 cost = 0;
9879 /* Floating point stores require value to be ready one cycle ealier. */
9880 if (insn_type == TYPE_FMOV
9881 && get_attr_memory (insn) == MEMORY_STORE
9882 && !ix86_agi_dependant (insn, dep_insn, insn_type))
9883 cost += 1;
9884 break;
9886 case PROCESSOR_PENTIUMPRO:
9887 memory = get_attr_memory (insn);
9888 dep_memory = get_attr_memory (dep_insn);
9890 /* Since we can't represent delayed latencies of load+operation,
9891 increase the cost here for non-imov insns. */
9892 if (dep_insn_type != TYPE_IMOV
9893 && dep_insn_type != TYPE_FMOV
9894 && (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH))
9895 cost += 1;
9897 /* INT->FP conversion is expensive. */
9898 if (get_attr_fp_int_src (dep_insn))
9899 cost += 5;
9901 /* There is one cycle extra latency between an FP op and a store. */
9902 if (insn_type == TYPE_FMOV
9903 && (set = single_set (dep_insn)) != NULL_RTX
9904 && (set2 = single_set (insn)) != NULL_RTX
9905 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
9906 && GET_CODE (SET_DEST (set2)) == MEM)
9907 cost += 1;
9909 /* Show ability of reorder buffer to hide latency of load by executing
9910 in parallel with previous instruction in case
9911 previous instruction is not needed to compute the address. */
9912 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
9913 && !ix86_agi_dependant (insn, dep_insn, insn_type))
9915 /* Claim moves to take one cycle, as core can issue one load
9916 at time and the next load can start cycle later. */
9917 if (dep_insn_type == TYPE_IMOV
9918 || dep_insn_type == TYPE_FMOV)
9919 cost = 1;
9920 else if (cost > 1)
9921 cost--;
9923 break;
9925 case PROCESSOR_K6:
9926 memory = get_attr_memory (insn);
9927 dep_memory = get_attr_memory (dep_insn);
9928 /* The esp dependency is resolved before the instruction is really
9929 finished. */
9930 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
9931 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
9932 return 1;
9934 /* Since we can't represent delayed latencies of load+operation,
9935 increase the cost here for non-imov insns. */
9936 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
9937 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
9939 /* INT->FP conversion is expensive. */
9940 if (get_attr_fp_int_src (dep_insn))
9941 cost += 5;
9943 /* Show ability of reorder buffer to hide latency of load by executing
9944 in parallel with previous instruction in case
9945 previous instruction is not needed to compute the address. */
9946 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
9947 && !ix86_agi_dependant (insn, dep_insn, insn_type))
9949 /* Claim moves to take one cycle, as core can issue one load
9950 at time and the next load can start cycle later. */
9951 if (dep_insn_type == TYPE_IMOV
9952 || dep_insn_type == TYPE_FMOV)
9953 cost = 1;
9954 else if (cost > 2)
9955 cost -= 2;
9956 else
9957 cost = 1;
9959 break;
9961 case PROCESSOR_ATHLON:
9962 memory = get_attr_memory (insn);
9963 dep_memory = get_attr_memory (dep_insn);
9965 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
9967 if (dep_insn_type == TYPE_IMOV || dep_insn_type == TYPE_FMOV)
9968 cost += 2;
9969 else
9970 cost += 3;
9972 /* Show ability of reorder buffer to hide latency of load by executing
9973 in parallel with previous instruction in case
9974 previous instruction is not needed to compute the address. */
9975 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
9976 && !ix86_agi_dependant (insn, dep_insn, insn_type))
9978 /* Claim moves to take one cycle, as core can issue one load
9979 at time and the next load can start cycle later. */
9980 if (dep_insn_type == TYPE_IMOV
9981 || dep_insn_type == TYPE_FMOV)
9982 cost = 0;
9983 else if (cost >= 3)
9984 cost -= 3;
9985 else
9986 cost = 0;
9989 default:
9990 break;
9993 return cost;
9996 static union
9998 struct ppro_sched_data
10000 rtx decode[3];
10001 int issued_this_cycle;
10002 } ppro;
10003 } ix86_sched_data;
10005 static int
10006 ix86_safe_length (insn)
10007 rtx insn;
10009 if (recog_memoized (insn) >= 0)
10010 return get_attr_length(insn);
10011 else
10012 return 128;
10015 static int
10016 ix86_safe_length_prefix (insn)
10017 rtx insn;
10019 if (recog_memoized (insn) >= 0)
10020 return get_attr_length(insn);
10021 else
10022 return 0;
10025 static enum attr_memory
10026 ix86_safe_memory (insn)
10027 rtx insn;
10029 if (recog_memoized (insn) >= 0)
10030 return get_attr_memory(insn);
10031 else
10032 return MEMORY_UNKNOWN;
10035 static enum attr_pent_pair
10036 ix86_safe_pent_pair (insn)
10037 rtx insn;
10039 if (recog_memoized (insn) >= 0)
10040 return get_attr_pent_pair(insn);
10041 else
10042 return PENT_PAIR_NP;
10045 static enum attr_ppro_uops
10046 ix86_safe_ppro_uops (insn)
10047 rtx insn;
10049 if (recog_memoized (insn) >= 0)
10050 return get_attr_ppro_uops (insn);
10051 else
10052 return PPRO_UOPS_MANY;
10055 static void
10056 ix86_dump_ppro_packet (dump)
10057 FILE *dump;
10059 if (ix86_sched_data.ppro.decode[0])
10061 fprintf (dump, "PPRO packet: %d",
10062 INSN_UID (ix86_sched_data.ppro.decode[0]));
10063 if (ix86_sched_data.ppro.decode[1])
10064 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
10065 if (ix86_sched_data.ppro.decode[2])
10066 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
10067 fputc ('\n', dump);
10071 /* We're beginning a new block. Initialize data structures as necessary. */
10073 static void
10074 ix86_sched_init (dump, sched_verbose, veclen)
10075 FILE *dump ATTRIBUTE_UNUSED;
10076 int sched_verbose ATTRIBUTE_UNUSED;
10077 int veclen ATTRIBUTE_UNUSED;
10079 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
10082 /* Shift INSN to SLOT, and shift everything else down. */
10084 static void
10085 ix86_reorder_insn (insnp, slot)
10086 rtx *insnp, *slot;
10088 if (insnp != slot)
10090 rtx insn = *insnp;
10092 insnp[0] = insnp[1];
10093 while (++insnp != slot);
10094 *insnp = insn;
10098 /* Find an instruction with given pairability and minimal amount of cycles
10099 lost by the fact that the CPU waits for both pipelines to finish before
10100 reading next instructions. Also take care that both instructions together
10101 can not exceed 7 bytes. */
10103 static rtx *
10104 ix86_pent_find_pair (e_ready, ready, type, first)
10105 rtx *e_ready;
10106 rtx *ready;
10107 enum attr_pent_pair type;
10108 rtx first;
10110 int mincycles, cycles;
10111 enum attr_pent_pair tmp;
10112 enum attr_memory memory;
10113 rtx *insnp, *bestinsnp = NULL;
10115 if (ix86_safe_length (first) > 7 + ix86_safe_length_prefix (first))
10116 return NULL;
10118 memory = ix86_safe_memory (first);
10119 cycles = result_ready_cost (first);
10120 mincycles = INT_MAX;
10122 for (insnp = e_ready; insnp >= ready && mincycles; --insnp)
10123 if ((tmp = ix86_safe_pent_pair (*insnp)) == type
10124 && ix86_safe_length (*insnp) <= 7 + ix86_safe_length_prefix (*insnp))
10126 enum attr_memory second_memory;
10127 int secondcycles, currentcycles;
10129 second_memory = ix86_safe_memory (*insnp);
10130 secondcycles = result_ready_cost (*insnp);
10131 currentcycles = abs (cycles - secondcycles);
10133 if (secondcycles >= 1 && cycles >= 1)
10135 /* Two read/modify/write instructions together takes two
10136 cycles longer. */
10137 if (memory == MEMORY_BOTH && second_memory == MEMORY_BOTH)
10138 currentcycles += 2;
10140 /* Read modify/write instruction followed by read/modify
10141 takes one cycle longer. */
10142 if (memory == MEMORY_BOTH && second_memory == MEMORY_LOAD
10143 && tmp != PENT_PAIR_UV
10144 && ix86_safe_pent_pair (first) != PENT_PAIR_UV)
10145 currentcycles += 1;
10147 if (currentcycles < mincycles)
10148 bestinsnp = insnp, mincycles = currentcycles;
10151 return bestinsnp;
10154 /* Subroutines of ix86_sched_reorder. */
10156 static void
10157 ix86_sched_reorder_pentium (ready, e_ready)
10158 rtx *ready;
10159 rtx *e_ready;
10161 enum attr_pent_pair pair1, pair2;
10162 rtx *insnp;
10164 /* This wouldn't be necessary if Haifa knew that static insn ordering
10165 is important to which pipe an insn is issued to. So we have to make
10166 some minor rearrangements. */
10168 pair1 = ix86_safe_pent_pair (*e_ready);
10170 /* If the first insn is non-pairable, let it be. */
10171 if (pair1 == PENT_PAIR_NP)
10172 return;
10174 pair2 = PENT_PAIR_NP;
10175 insnp = 0;
10177 /* If the first insn is UV or PV pairable, search for a PU
10178 insn to go with. */
10179 if (pair1 == PENT_PAIR_UV || pair1 == PENT_PAIR_PV)
10181 insnp = ix86_pent_find_pair (e_ready-1, ready,
10182 PENT_PAIR_PU, *e_ready);
10183 if (insnp)
10184 pair2 = PENT_PAIR_PU;
10187 /* If the first insn is PU or UV pairable, search for a PV
10188 insn to go with. */
10189 if (pair2 == PENT_PAIR_NP
10190 && (pair1 == PENT_PAIR_PU || pair1 == PENT_PAIR_UV))
10192 insnp = ix86_pent_find_pair (e_ready-1, ready,
10193 PENT_PAIR_PV, *e_ready);
10194 if (insnp)
10195 pair2 = PENT_PAIR_PV;
10198 /* If the first insn is pairable, search for a UV
10199 insn to go with. */
10200 if (pair2 == PENT_PAIR_NP)
10202 insnp = ix86_pent_find_pair (e_ready-1, ready,
10203 PENT_PAIR_UV, *e_ready);
10204 if (insnp)
10205 pair2 = PENT_PAIR_UV;
10208 if (pair2 == PENT_PAIR_NP)
10209 return;
10211 /* Found something! Decide if we need to swap the order. */
10212 if (pair1 == PENT_PAIR_PV || pair2 == PENT_PAIR_PU
10213 || (pair1 == PENT_PAIR_UV && pair2 == PENT_PAIR_UV
10214 && ix86_safe_memory (*e_ready) == MEMORY_BOTH
10215 && ix86_safe_memory (*insnp) == MEMORY_LOAD))
10216 ix86_reorder_insn (insnp, e_ready);
10217 else
10218 ix86_reorder_insn (insnp, e_ready - 1);
10221 static void
10222 ix86_sched_reorder_ppro (ready, e_ready)
10223 rtx *ready;
10224 rtx *e_ready;
10226 rtx decode[3];
10227 enum attr_ppro_uops cur_uops;
10228 int issued_this_cycle;
10229 rtx *insnp;
10230 int i;
10232 /* At this point .ppro.decode contains the state of the three
10233 decoders from last "cycle". That is, those insns that were
10234 actually independent. But here we're scheduling for the
10235 decoder, and we may find things that are decodable in the
10236 same cycle. */
10238 memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode));
10239 issued_this_cycle = 0;
10241 insnp = e_ready;
10242 cur_uops = ix86_safe_ppro_uops (*insnp);
10244 /* If the decoders are empty, and we've a complex insn at the
10245 head of the priority queue, let it issue without complaint. */
10246 if (decode[0] == NULL)
10248 if (cur_uops == PPRO_UOPS_MANY)
10250 decode[0] = *insnp;
10251 goto ppro_done;
10254 /* Otherwise, search for a 2-4 uop unsn to issue. */
10255 while (cur_uops != PPRO_UOPS_FEW)
10257 if (insnp == ready)
10258 break;
10259 cur_uops = ix86_safe_ppro_uops (*--insnp);
10262 /* If so, move it to the head of the line. */
10263 if (cur_uops == PPRO_UOPS_FEW)
10264 ix86_reorder_insn (insnp, e_ready);
10266 /* Issue the head of the queue. */
10267 issued_this_cycle = 1;
10268 decode[0] = *e_ready--;
10271 /* Look for simple insns to fill in the other two slots. */
10272 for (i = 1; i < 3; ++i)
10273 if (decode[i] == NULL)
10275 if (ready >= e_ready)
10276 goto ppro_done;
10278 insnp = e_ready;
10279 cur_uops = ix86_safe_ppro_uops (*insnp);
10280 while (cur_uops != PPRO_UOPS_ONE)
10282 if (insnp == ready)
10283 break;
10284 cur_uops = ix86_safe_ppro_uops (*--insnp);
10287 /* Found one. Move it to the head of the queue and issue it. */
10288 if (cur_uops == PPRO_UOPS_ONE)
10290 ix86_reorder_insn (insnp, e_ready);
10291 decode[i] = *e_ready--;
10292 issued_this_cycle++;
10293 continue;
10296 /* ??? Didn't find one. Ideally, here we would do a lazy split
10297 of 2-uop insns, issue one and queue the other. */
10300 ppro_done:
10301 if (issued_this_cycle == 0)
10302 issued_this_cycle = 1;
10303 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
10306 /* We are about to being issuing insns for this clock cycle.
10307 Override the default sort algorithm to better slot instructions. */
10308 static int
10309 ix86_sched_reorder (dump, sched_verbose, ready, n_readyp, clock_var)
10310 FILE *dump ATTRIBUTE_UNUSED;
10311 int sched_verbose ATTRIBUTE_UNUSED;
10312 rtx *ready;
10313 int *n_readyp;
10314 int clock_var ATTRIBUTE_UNUSED;
10316 int n_ready = *n_readyp;
10317 rtx *e_ready = ready + n_ready - 1;
10319 if (n_ready < 2)
10320 goto out;
10322 switch (ix86_cpu)
10324 default:
10325 break;
10327 case PROCESSOR_PENTIUM:
10328 ix86_sched_reorder_pentium (ready, e_ready);
10329 break;
10331 case PROCESSOR_PENTIUMPRO:
10332 ix86_sched_reorder_ppro (ready, e_ready);
10333 break;
10336 out:
10337 return ix86_issue_rate ();
10340 /* We are about to issue INSN. Return the number of insns left on the
10341 ready queue that can be issued this cycle. */
10343 static int
10344 ix86_variable_issue (dump, sched_verbose, insn, can_issue_more)
10345 FILE *dump;
10346 int sched_verbose;
10347 rtx insn;
10348 int can_issue_more;
10350 int i;
10351 switch (ix86_cpu)
10353 default:
10354 return can_issue_more - 1;
10356 case PROCESSOR_PENTIUMPRO:
10358 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
10360 if (uops == PPRO_UOPS_MANY)
10362 if (sched_verbose)
10363 ix86_dump_ppro_packet (dump);
10364 ix86_sched_data.ppro.decode[0] = insn;
10365 ix86_sched_data.ppro.decode[1] = NULL;
10366 ix86_sched_data.ppro.decode[2] = NULL;
10367 if (sched_verbose)
10368 ix86_dump_ppro_packet (dump);
10369 ix86_sched_data.ppro.decode[0] = NULL;
10371 else if (uops == PPRO_UOPS_FEW)
10373 if (sched_verbose)
10374 ix86_dump_ppro_packet (dump);
10375 ix86_sched_data.ppro.decode[0] = insn;
10376 ix86_sched_data.ppro.decode[1] = NULL;
10377 ix86_sched_data.ppro.decode[2] = NULL;
10379 else
10381 for (i = 0; i < 3; ++i)
10382 if (ix86_sched_data.ppro.decode[i] == NULL)
10384 ix86_sched_data.ppro.decode[i] = insn;
10385 break;
10387 if (i == 3)
10388 abort ();
10389 if (i == 2)
10391 if (sched_verbose)
10392 ix86_dump_ppro_packet (dump);
10393 ix86_sched_data.ppro.decode[0] = NULL;
10394 ix86_sched_data.ppro.decode[1] = NULL;
10395 ix86_sched_data.ppro.decode[2] = NULL;
10399 return --ix86_sched_data.ppro.issued_this_cycle;
10403 /* Walk through INSNS and look for MEM references whose address is DSTREG or
10404 SRCREG and set the memory attribute to those of DSTREF and SRCREF, as
10405 appropriate. */
10407 void
10408 ix86_set_move_mem_attrs (insns, dstref, srcref, dstreg, srcreg)
10409 rtx insns;
10410 rtx dstref, srcref, dstreg, srcreg;
10412 rtx insn;
10414 for (insn = insns; insn != 0 ; insn = NEXT_INSN (insn))
10415 if (INSN_P (insn))
10416 ix86_set_move_mem_attrs_1 (PATTERN (insn), dstref, srcref,
10417 dstreg, srcreg);
10420 /* Subroutine of above to actually do the updating by recursively walking
10421 the rtx. */
10423 static void
10424 ix86_set_move_mem_attrs_1 (x, dstref, srcref, dstreg, srcreg)
10425 rtx x;
10426 rtx dstref, srcref, dstreg, srcreg;
10428 enum rtx_code code = GET_CODE (x);
10429 const char *format_ptr = GET_RTX_FORMAT (code);
10430 int i, j;
10432 if (code == MEM && XEXP (x, 0) == dstreg)
10433 MEM_COPY_ATTRIBUTES (x, dstref);
10434 else if (code == MEM && XEXP (x, 0) == srcreg)
10435 MEM_COPY_ATTRIBUTES (x, srcref);
10437 for (i = 0; i < GET_RTX_LENGTH (code); i++, format_ptr++)
10439 if (*format_ptr == 'e')
10440 ix86_set_move_mem_attrs_1 (XEXP (x, i), dstref, srcref,
10441 dstreg, srcreg);
10442 else if (*format_ptr == 'E')
10443 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
10444 ix86_set_move_mem_attrs_1 (XVECEXP (x, i, j), dstref, srcref,
10445 dstreg, srcreg);
10449 /* Compute the alignment given to a constant that is being placed in memory.
10450 EXP is the constant and ALIGN is the alignment that the object would
10451 ordinarily have.
10452 The value of this function is used instead of that alignment to align
10453 the object. */
10456 ix86_constant_alignment (exp, align)
10457 tree exp;
10458 int align;
10460 if (TREE_CODE (exp) == REAL_CST)
10462 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
10463 return 64;
10464 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
10465 return 128;
10467 else if (TREE_CODE (exp) == STRING_CST && TREE_STRING_LENGTH (exp) >= 31
10468 && align < 256)
10469 return 256;
10471 return align;
10474 /* Compute the alignment for a static variable.
10475 TYPE is the data type, and ALIGN is the alignment that
10476 the object would ordinarily have. The value of this function is used
10477 instead of that alignment to align the object. */
10480 ix86_data_alignment (type, align)
10481 tree type;
10482 int align;
10484 if (AGGREGATE_TYPE_P (type)
10485 && TYPE_SIZE (type)
10486 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
10487 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
10488 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
10489 return 256;
10491 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
10492 to 16byte boundary. */
10493 if (TARGET_64BIT)
10495 if (AGGREGATE_TYPE_P (type)
10496 && TYPE_SIZE (type)
10497 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
10498 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
10499 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
10500 return 128;
10503 if (TREE_CODE (type) == ARRAY_TYPE)
10505 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
10506 return 64;
10507 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
10508 return 128;
10510 else if (TREE_CODE (type) == COMPLEX_TYPE)
10513 if (TYPE_MODE (type) == DCmode && align < 64)
10514 return 64;
10515 if (TYPE_MODE (type) == XCmode && align < 128)
10516 return 128;
10518 else if ((TREE_CODE (type) == RECORD_TYPE
10519 || TREE_CODE (type) == UNION_TYPE
10520 || TREE_CODE (type) == QUAL_UNION_TYPE)
10521 && TYPE_FIELDS (type))
10523 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
10524 return 64;
10525 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
10526 return 128;
10528 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
10529 || TREE_CODE (type) == INTEGER_TYPE)
10531 if (TYPE_MODE (type) == DFmode && align < 64)
10532 return 64;
10533 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
10534 return 128;
10537 return align;
10540 /* Compute the alignment for a local variable.
10541 TYPE is the data type, and ALIGN is the alignment that
10542 the object would ordinarily have. The value of this macro is used
10543 instead of that alignment to align the object. */
10546 ix86_local_alignment (type, align)
10547 tree type;
10548 int align;
10550 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
10551 to 16byte boundary. */
10552 if (TARGET_64BIT)
10554 if (AGGREGATE_TYPE_P (type)
10555 && TYPE_SIZE (type)
10556 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
10557 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
10558 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
10559 return 128;
10561 if (TREE_CODE (type) == ARRAY_TYPE)
10563 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
10564 return 64;
10565 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
10566 return 128;
10568 else if (TREE_CODE (type) == COMPLEX_TYPE)
10570 if (TYPE_MODE (type) == DCmode && align < 64)
10571 return 64;
10572 if (TYPE_MODE (type) == XCmode && align < 128)
10573 return 128;
10575 else if ((TREE_CODE (type) == RECORD_TYPE
10576 || TREE_CODE (type) == UNION_TYPE
10577 || TREE_CODE (type) == QUAL_UNION_TYPE)
10578 && TYPE_FIELDS (type))
10580 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
10581 return 64;
10582 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
10583 return 128;
10585 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
10586 || TREE_CODE (type) == INTEGER_TYPE)
10589 if (TYPE_MODE (type) == DFmode && align < 64)
10590 return 64;
10591 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
10592 return 128;
10594 return align;
10597 /* Emit RTL insns to initialize the variable parts of a trampoline.
10598 FNADDR is an RTX for the address of the function's pure code.
10599 CXT is an RTX for the static chain value for the function. */
10600 void
10601 x86_initialize_trampoline (tramp, fnaddr, cxt)
10602 rtx tramp, fnaddr, cxt;
10604 if (!TARGET_64BIT)
10606 /* Compute offset from the end of the jmp to the target function. */
10607 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
10608 plus_constant (tramp, 10),
10609 NULL_RTX, 1, OPTAB_DIRECT);
10610 emit_move_insn (gen_rtx_MEM (QImode, tramp),
10611 GEN_INT (trunc_int_for_mode (0xb9, QImode)));
10612 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
10613 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
10614 GEN_INT (trunc_int_for_mode (0xe9, QImode)));
10615 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
10617 else
10619 int offset = 0;
10620 /* Try to load address using shorter movl instead of movabs.
10621 We may want to support movq for kernel mode, but kernel does not use
10622 trampolines at the moment. */
10623 if (x86_64_zero_extended_value (fnaddr))
10625 fnaddr = copy_to_mode_reg (DImode, fnaddr);
10626 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
10627 GEN_INT (trunc_int_for_mode (0xbb41, HImode)));
10628 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
10629 gen_lowpart (SImode, fnaddr));
10630 offset += 6;
10632 else
10634 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
10635 GEN_INT (trunc_int_for_mode (0xbb49, HImode)));
10636 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
10637 fnaddr);
10638 offset += 10;
10640 /* Load static chain using movabs to r10. */
10641 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
10642 GEN_INT (trunc_int_for_mode (0xba49, HImode)));
10643 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
10644 cxt);
10645 offset += 10;
10646 /* Jump to the r11 */
10647 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
10648 GEN_INT (trunc_int_for_mode (0xff49, HImode)));
10649 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
10650 GEN_INT (trunc_int_for_mode (0xe3, QImode)));
10651 offset += 3;
10652 if (offset > TRAMPOLINE_SIZE)
10653 abort();
10657 #define def_builtin(MASK, NAME, TYPE, CODE) \
10658 do { \
10659 if ((MASK) & target_flags) \
10660 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, NULL); \
10661 } while (0)
10663 struct builtin_description
10665 const unsigned int mask;
10666 const enum insn_code icode;
10667 const char *const name;
10668 const enum ix86_builtins code;
10669 const enum rtx_code comparison;
10670 const unsigned int flag;
10673 static const struct builtin_description bdesc_comi[] =
10675 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, EQ, 0 },
10676 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, LT, 0 },
10677 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, LE, 0 },
10678 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, LT, 1 },
10679 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, LE, 1 },
10680 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, NE, 0 },
10681 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, EQ, 0 },
10682 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, LT, 0 },
10683 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, LE, 0 },
10684 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, LT, 1 },
10685 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, LE, 1 },
10686 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, NE, 0 }
10689 static const struct builtin_description bdesc_2arg[] =
10691 /* SSE */
10692 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
10693 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
10694 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
10695 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
10696 { MASK_SSE, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
10697 { MASK_SSE, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
10698 { MASK_SSE, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
10699 { MASK_SSE, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
10701 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
10702 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
10703 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
10704 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
10705 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
10706 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
10707 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
10708 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
10709 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
10710 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
10711 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
10712 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
10713 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
10714 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
10715 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
10716 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgtss", IX86_BUILTIN_CMPGTSS, LT, 1 },
10717 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgess", IX86_BUILTIN_CMPGESS, LE, 1 },
10718 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
10719 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
10720 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
10721 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
10722 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, LT, 1 },
10723 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, LE, 1 },
10724 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
10726 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
10727 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
10728 { MASK_SSE, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
10729 { MASK_SSE, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
10731 { MASK_SSE, CODE_FOR_sse_andti3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
10732 { MASK_SSE, CODE_FOR_sse_nandti3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
10733 { MASK_SSE, CODE_FOR_sse_iorti3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
10734 { MASK_SSE, CODE_FOR_sse_xorti3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
10736 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
10737 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
10738 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
10739 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
10740 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
10742 /* MMX */
10743 { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
10744 { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
10745 { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
10746 { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
10747 { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
10748 { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
10750 { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
10751 { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
10752 { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
10753 { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
10754 { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
10755 { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
10756 { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
10757 { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
10759 { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
10760 { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
10761 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
10763 { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
10764 { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
10765 { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
10766 { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
10768 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
10769 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
10771 { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
10772 { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
10773 { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
10774 { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
10775 { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
10776 { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
10778 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
10779 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
10780 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
10781 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
10783 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
10784 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
10785 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
10786 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
10787 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
10788 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
10790 /* Special. */
10791 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
10792 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
10793 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
10795 { MASK_SSE, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
10796 { MASK_SSE, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
10798 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
10799 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
10800 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
10801 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
10802 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
10803 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
10805 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
10806 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
10807 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
10808 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
10809 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
10810 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
10812 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
10813 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
10814 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
10815 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
10817 { MASK_SSE, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
10818 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 }
10822 static const struct builtin_description bdesc_1arg[] =
10824 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
10825 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
10827 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
10828 { MASK_SSE, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
10829 { MASK_SSE, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
10831 { MASK_SSE, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
10832 { MASK_SSE, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
10833 { MASK_SSE, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
10834 { MASK_SSE, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 }
10838 void
10839 ix86_init_builtins ()
10841 if (TARGET_MMX)
10842 ix86_init_mmx_sse_builtins ();
10845 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
10846 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
10847 builtins. */
10848 void
10849 ix86_init_mmx_sse_builtins ()
10851 const struct builtin_description * d;
10852 size_t i;
10853 tree endlink = void_list_node;
10855 tree pchar_type_node = build_pointer_type (char_type_node);
10856 tree pfloat_type_node = build_pointer_type (float_type_node);
10857 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
10858 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
10860 /* Comparisons. */
10861 tree int_ftype_v4sf_v4sf
10862 = build_function_type (integer_type_node,
10863 tree_cons (NULL_TREE, V4SF_type_node,
10864 tree_cons (NULL_TREE,
10865 V4SF_type_node,
10866 endlink)));
10867 tree v4si_ftype_v4sf_v4sf
10868 = build_function_type (V4SI_type_node,
10869 tree_cons (NULL_TREE, V4SF_type_node,
10870 tree_cons (NULL_TREE,
10871 V4SF_type_node,
10872 endlink)));
10873 /* MMX/SSE/integer conversions. */
10874 tree int_ftype_v4sf
10875 = build_function_type (integer_type_node,
10876 tree_cons (NULL_TREE, V4SF_type_node,
10877 endlink));
10878 tree int_ftype_v8qi
10879 = build_function_type (integer_type_node,
10880 tree_cons (NULL_TREE, V8QI_type_node,
10881 endlink));
10882 tree int_ftype_v2si
10883 = build_function_type (integer_type_node,
10884 tree_cons (NULL_TREE, V2SI_type_node,
10885 endlink));
10886 tree v2si_ftype_int
10887 = build_function_type (V2SI_type_node,
10888 tree_cons (NULL_TREE, integer_type_node,
10889 endlink));
10890 tree v4sf_ftype_v4sf_int
10891 = build_function_type (V4SF_type_node,
10892 tree_cons (NULL_TREE, V4SF_type_node,
10893 tree_cons (NULL_TREE, integer_type_node,
10894 endlink)));
10895 tree v4sf_ftype_v4sf_v2si
10896 = build_function_type (V4SF_type_node,
10897 tree_cons (NULL_TREE, V4SF_type_node,
10898 tree_cons (NULL_TREE, V2SI_type_node,
10899 endlink)));
10900 tree int_ftype_v4hi_int
10901 = build_function_type (integer_type_node,
10902 tree_cons (NULL_TREE, V4HI_type_node,
10903 tree_cons (NULL_TREE, integer_type_node,
10904 endlink)));
10905 tree v4hi_ftype_v4hi_int_int
10906 = build_function_type (V4HI_type_node,
10907 tree_cons (NULL_TREE, V4HI_type_node,
10908 tree_cons (NULL_TREE, integer_type_node,
10909 tree_cons (NULL_TREE,
10910 integer_type_node,
10911 endlink))));
10912 /* Miscellaneous. */
10913 tree v8qi_ftype_v4hi_v4hi
10914 = build_function_type (V8QI_type_node,
10915 tree_cons (NULL_TREE, V4HI_type_node,
10916 tree_cons (NULL_TREE, V4HI_type_node,
10917 endlink)));
10918 tree v4hi_ftype_v2si_v2si
10919 = build_function_type (V4HI_type_node,
10920 tree_cons (NULL_TREE, V2SI_type_node,
10921 tree_cons (NULL_TREE, V2SI_type_node,
10922 endlink)));
10923 tree v4sf_ftype_v4sf_v4sf_int
10924 = build_function_type (V4SF_type_node,
10925 tree_cons (NULL_TREE, V4SF_type_node,
10926 tree_cons (NULL_TREE, V4SF_type_node,
10927 tree_cons (NULL_TREE,
10928 integer_type_node,
10929 endlink))));
10930 tree v4hi_ftype_v8qi_v8qi
10931 = build_function_type (V4HI_type_node,
10932 tree_cons (NULL_TREE, V8QI_type_node,
10933 tree_cons (NULL_TREE, V8QI_type_node,
10934 endlink)));
10935 tree v2si_ftype_v4hi_v4hi
10936 = build_function_type (V2SI_type_node,
10937 tree_cons (NULL_TREE, V4HI_type_node,
10938 tree_cons (NULL_TREE, V4HI_type_node,
10939 endlink)));
10940 tree v4hi_ftype_v4hi_int
10941 = build_function_type (V4HI_type_node,
10942 tree_cons (NULL_TREE, V4HI_type_node,
10943 tree_cons (NULL_TREE, integer_type_node,
10944 endlink)));
10945 tree v4hi_ftype_v4hi_di
10946 = build_function_type (V4HI_type_node,
10947 tree_cons (NULL_TREE, V4HI_type_node,
10948 tree_cons (NULL_TREE,
10949 long_long_integer_type_node,
10950 endlink)));
10951 tree v2si_ftype_v2si_di
10952 = build_function_type (V2SI_type_node,
10953 tree_cons (NULL_TREE, V2SI_type_node,
10954 tree_cons (NULL_TREE,
10955 long_long_integer_type_node,
10956 endlink)));
10957 tree void_ftype_void
10958 = build_function_type (void_type_node, endlink);
10959 tree void_ftype_pchar_int
10960 = build_function_type (void_type_node,
10961 tree_cons (NULL_TREE, pchar_type_node,
10962 tree_cons (NULL_TREE, integer_type_node,
10963 endlink)));
10964 tree void_ftype_unsigned
10965 = build_function_type (void_type_node,
10966 tree_cons (NULL_TREE, unsigned_type_node,
10967 endlink));
10968 tree unsigned_ftype_void
10969 = build_function_type (unsigned_type_node, endlink);
10970 tree di_ftype_void
10971 = build_function_type (long_long_unsigned_type_node, endlink);
10972 tree ti_ftype_void
10973 = build_function_type (intTI_type_node, endlink);
10974 tree v2si_ftype_v4sf
10975 = build_function_type (V2SI_type_node,
10976 tree_cons (NULL_TREE, V4SF_type_node,
10977 endlink));
10978 /* Loads/stores. */
10979 tree maskmovq_args = tree_cons (NULL_TREE, V8QI_type_node,
10980 tree_cons (NULL_TREE, V8QI_type_node,
10981 tree_cons (NULL_TREE,
10982 pchar_type_node,
10983 endlink)));
10984 tree void_ftype_v8qi_v8qi_pchar
10985 = build_function_type (void_type_node, maskmovq_args);
10986 tree v4sf_ftype_pfloat
10987 = build_function_type (V4SF_type_node,
10988 tree_cons (NULL_TREE, pfloat_type_node,
10989 endlink));
10990 tree v4sf_ftype_float
10991 = build_function_type (V4SF_type_node,
10992 tree_cons (NULL_TREE, float_type_node,
10993 endlink));
10994 tree v4sf_ftype_float_float_float_float
10995 = build_function_type (V4SF_type_node,
10996 tree_cons (NULL_TREE, float_type_node,
10997 tree_cons (NULL_TREE, float_type_node,
10998 tree_cons (NULL_TREE,
10999 float_type_node,
11000 tree_cons (NULL_TREE,
11001 float_type_node,
11002 endlink)))));
11003 /* @@@ the type is bogus */
11004 tree v4sf_ftype_v4sf_pv2si
11005 = build_function_type (V4SF_type_node,
11006 tree_cons (NULL_TREE, V4SF_type_node,
11007 tree_cons (NULL_TREE, pv2si_type_node,
11008 endlink)));
11009 tree void_ftype_pv2si_v4sf
11010 = build_function_type (void_type_node,
11011 tree_cons (NULL_TREE, pv2si_type_node,
11012 tree_cons (NULL_TREE, V4SF_type_node,
11013 endlink)));
11014 tree void_ftype_pfloat_v4sf
11015 = build_function_type (void_type_node,
11016 tree_cons (NULL_TREE, pfloat_type_node,
11017 tree_cons (NULL_TREE, V4SF_type_node,
11018 endlink)));
11019 tree void_ftype_pdi_di
11020 = build_function_type (void_type_node,
11021 tree_cons (NULL_TREE, pdi_type_node,
11022 tree_cons (NULL_TREE,
11023 long_long_unsigned_type_node,
11024 endlink)));
11025 /* Normal vector unops. */
11026 tree v4sf_ftype_v4sf
11027 = build_function_type (V4SF_type_node,
11028 tree_cons (NULL_TREE, V4SF_type_node,
11029 endlink));
11031 /* Normal vector binops. */
11032 tree v4sf_ftype_v4sf_v4sf
11033 = build_function_type (V4SF_type_node,
11034 tree_cons (NULL_TREE, V4SF_type_node,
11035 tree_cons (NULL_TREE, V4SF_type_node,
11036 endlink)));
11037 tree v8qi_ftype_v8qi_v8qi
11038 = build_function_type (V8QI_type_node,
11039 tree_cons (NULL_TREE, V8QI_type_node,
11040 tree_cons (NULL_TREE, V8QI_type_node,
11041 endlink)));
11042 tree v4hi_ftype_v4hi_v4hi
11043 = build_function_type (V4HI_type_node,
11044 tree_cons (NULL_TREE, V4HI_type_node,
11045 tree_cons (NULL_TREE, V4HI_type_node,
11046 endlink)));
11047 tree v2si_ftype_v2si_v2si
11048 = build_function_type (V2SI_type_node,
11049 tree_cons (NULL_TREE, V2SI_type_node,
11050 tree_cons (NULL_TREE, V2SI_type_node,
11051 endlink)));
11052 tree ti_ftype_ti_ti
11053 = build_function_type (intTI_type_node,
11054 tree_cons (NULL_TREE, intTI_type_node,
11055 tree_cons (NULL_TREE, intTI_type_node,
11056 endlink)));
11057 tree di_ftype_di_di
11058 = build_function_type (long_long_unsigned_type_node,
11059 tree_cons (NULL_TREE, long_long_unsigned_type_node,
11060 tree_cons (NULL_TREE,
11061 long_long_unsigned_type_node,
11062 endlink)));
11064 tree v2si_ftype_v2sf
11065 = build_function_type (V2SI_type_node,
11066 tree_cons (NULL_TREE, V2SF_type_node,
11067 endlink));
11068 tree v2sf_ftype_v2si
11069 = build_function_type (V2SF_type_node,
11070 tree_cons (NULL_TREE, V2SI_type_node,
11071 endlink));
11072 tree v2si_ftype_v2si
11073 = build_function_type (V2SI_type_node,
11074 tree_cons (NULL_TREE, V2SI_type_node,
11075 endlink));
11076 tree v2sf_ftype_v2sf
11077 = build_function_type (V2SF_type_node,
11078 tree_cons (NULL_TREE, V2SF_type_node,
11079 endlink));
11080 tree v2sf_ftype_v2sf_v2sf
11081 = build_function_type (V2SF_type_node,
11082 tree_cons (NULL_TREE, V2SF_type_node,
11083 tree_cons (NULL_TREE,
11084 V2SF_type_node,
11085 endlink)));
11086 tree v2si_ftype_v2sf_v2sf
11087 = build_function_type (V2SI_type_node,
11088 tree_cons (NULL_TREE, V2SF_type_node,
11089 tree_cons (NULL_TREE,
11090 V2SF_type_node,
11091 endlink)));
11093 tree void_ftype_pchar
11094 = build_function_type (void_type_node,
11095 tree_cons (NULL_TREE, pchar_type_node,
11096 endlink));
11098 /* Add all builtins that are more or less simple operations on two
11099 operands. */
11100 for (i = 0, d = bdesc_2arg; i < sizeof (bdesc_2arg) / sizeof *d; i++, d++)
11102 /* Use one of the operands; the target can have a different mode for
11103 mask-generating compares. */
11104 enum machine_mode mode;
11105 tree type;
11107 if (d->name == 0)
11108 continue;
11109 mode = insn_data[d->icode].operand[1].mode;
11111 switch (mode)
11113 case V4SFmode:
11114 type = v4sf_ftype_v4sf_v4sf;
11115 break;
11116 case V8QImode:
11117 type = v8qi_ftype_v8qi_v8qi;
11118 break;
11119 case V4HImode:
11120 type = v4hi_ftype_v4hi_v4hi;
11121 break;
11122 case V2SImode:
11123 type = v2si_ftype_v2si_v2si;
11124 break;
11125 case TImode:
11126 type = ti_ftype_ti_ti;
11127 break;
11128 case DImode:
11129 type = di_ftype_di_di;
11130 break;
11132 default:
11133 abort ();
11136 /* Override for comparisons. */
11137 if (d->icode == CODE_FOR_maskcmpv4sf3
11138 || d->icode == CODE_FOR_maskncmpv4sf3
11139 || d->icode == CODE_FOR_vmmaskcmpv4sf3
11140 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
11141 type = v4si_ftype_v4sf_v4sf;
11143 def_builtin (d->mask, d->name, type, d->code);
11146 /* Add the remaining MMX insns with somewhat more complicated types. */
11147 def_builtin (MASK_MMX, "__builtin_ia32_m_from_int", v2si_ftype_int, IX86_BUILTIN_M_FROM_INT);
11148 def_builtin (MASK_MMX, "__builtin_ia32_m_to_int", int_ftype_v2si, IX86_BUILTIN_M_TO_INT);
11149 def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
11150 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
11151 def_builtin (MASK_MMX, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
11152 def_builtin (MASK_MMX, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
11153 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
11154 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
11155 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
11157 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
11158 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
11159 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
11161 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
11162 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
11164 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
11165 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
11167 /* comi/ucomi insns. */
11168 for (i = 0, d = bdesc_comi; i < sizeof (bdesc_comi) / sizeof *d; i++, d++)
11169 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
11171 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
11172 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
11173 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
11175 def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
11176 def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
11177 def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
11178 def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
11179 def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
11180 def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
11182 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
11183 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
11185 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
11187 def_builtin (MASK_SSE, "__builtin_ia32_loadaps", v4sf_ftype_pfloat, IX86_BUILTIN_LOADAPS);
11188 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pfloat, IX86_BUILTIN_LOADUPS);
11189 def_builtin (MASK_SSE, "__builtin_ia32_loadss", v4sf_ftype_pfloat, IX86_BUILTIN_LOADSS);
11190 def_builtin (MASK_SSE, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
11191 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
11192 def_builtin (MASK_SSE, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
11194 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
11195 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
11196 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
11197 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
11199 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
11200 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
11201 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
11202 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
11204 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
11205 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_prefetch", void_ftype_pchar_int, IX86_BUILTIN_PREFETCH);
11207 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", v4hi_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
11209 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
11210 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
11211 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
11212 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
11213 def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
11214 def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
11216 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
11218 /* Original 3DNow! */
11219 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
11220 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
11221 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
11222 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
11223 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
11224 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
11225 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
11226 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
11227 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
11228 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
11229 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
11230 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
11231 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
11232 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
11233 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
11234 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
11235 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
11236 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
11237 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
11238 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
11239 def_builtin (MASK_3DNOW, "__builtin_ia32_prefetch_3dnow", void_ftype_pchar, IX86_BUILTIN_PREFETCH_3DNOW);
11240 def_builtin (MASK_3DNOW, "__builtin_ia32_prefetchw", void_ftype_pchar, IX86_BUILTIN_PREFETCHW);
11242 /* 3DNow! extension as used in the Athlon CPU. */
11243 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
11244 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
11245 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
11246 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
11247 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
11248 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
11250 /* Composite intrinsics. */
11251 def_builtin (MASK_SSE, "__builtin_ia32_setps1", v4sf_ftype_float, IX86_BUILTIN_SETPS1);
11252 def_builtin (MASK_SSE, "__builtin_ia32_setps", v4sf_ftype_float_float_float_float, IX86_BUILTIN_SETPS);
11253 def_builtin (MASK_SSE, "__builtin_ia32_setzerops", ti_ftype_void, IX86_BUILTIN_CLRPS);
11254 def_builtin (MASK_SSE, "__builtin_ia32_loadps1", v4sf_ftype_pfloat, IX86_BUILTIN_LOADPS1);
11255 def_builtin (MASK_SSE, "__builtin_ia32_loadrps", v4sf_ftype_pfloat, IX86_BUILTIN_LOADRPS);
11256 def_builtin (MASK_SSE, "__builtin_ia32_storeps1", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREPS1);
11257 def_builtin (MASK_SSE, "__builtin_ia32_storerps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORERPS);
11260 /* Errors in the source file can cause expand_expr to return const0_rtx
11261 where we expect a vector. To avoid crashing, use one of the vector
11262 clear instructions. */
11263 static rtx
11264 safe_vector_operand (x, mode)
11265 rtx x;
11266 enum machine_mode mode;
11268 if (x != const0_rtx)
11269 return x;
11270 x = gen_reg_rtx (mode);
11272 if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
11273 emit_insn (gen_mmx_clrdi (mode == DImode ? x
11274 : gen_rtx_SUBREG (DImode, x, 0)));
11275 else
11276 emit_insn (gen_sse_clrti (mode == TImode ? x
11277 : gen_rtx_SUBREG (TImode, x, 0)));
11278 return x;
11281 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
11283 static rtx
11284 ix86_expand_binop_builtin (icode, arglist, target)
11285 enum insn_code icode;
11286 tree arglist;
11287 rtx target;
11289 rtx pat;
11290 tree arg0 = TREE_VALUE (arglist);
11291 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11292 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11293 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11294 enum machine_mode tmode = insn_data[icode].operand[0].mode;
11295 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
11296 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
11298 if (VECTOR_MODE_P (mode0))
11299 op0 = safe_vector_operand (op0, mode0);
11300 if (VECTOR_MODE_P (mode1))
11301 op1 = safe_vector_operand (op1, mode1);
11303 if (! target
11304 || GET_MODE (target) != tmode
11305 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11306 target = gen_reg_rtx (tmode);
11308 /* In case the insn wants input operands in modes different from
11309 the result, abort. */
11310 if (GET_MODE (op0) != mode0 || GET_MODE (op1) != mode1)
11311 abort ();
11313 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11314 op0 = copy_to_mode_reg (mode0, op0);
11315 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
11316 op1 = copy_to_mode_reg (mode1, op1);
11318 pat = GEN_FCN (icode) (target, op0, op1);
11319 if (! pat)
11320 return 0;
11321 emit_insn (pat);
11322 return target;
11325 /* Subroutine of ix86_expand_builtin to take care of stores. */
11327 static rtx
11328 ix86_expand_store_builtin (icode, arglist, shuffle)
11329 enum insn_code icode;
11330 tree arglist;
11331 int shuffle;
11333 rtx pat;
11334 tree arg0 = TREE_VALUE (arglist);
11335 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11336 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11337 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11338 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
11339 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
11341 if (VECTOR_MODE_P (mode1))
11342 op1 = safe_vector_operand (op1, mode1);
11344 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
11345 if (shuffle >= 0 || ! (*insn_data[icode].operand[1].predicate) (op1, mode1))
11346 op1 = copy_to_mode_reg (mode1, op1);
11347 if (shuffle >= 0)
11348 emit_insn (gen_sse_shufps (op1, op1, op1, GEN_INT (shuffle)));
11349 pat = GEN_FCN (icode) (op0, op1);
11350 if (pat)
11351 emit_insn (pat);
11352 return 0;
11355 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
11357 static rtx
11358 ix86_expand_unop_builtin (icode, arglist, target, do_load)
11359 enum insn_code icode;
11360 tree arglist;
11361 rtx target;
11362 int do_load;
11364 rtx pat;
11365 tree arg0 = TREE_VALUE (arglist);
11366 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11367 enum machine_mode tmode = insn_data[icode].operand[0].mode;
11368 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
11370 if (! target
11371 || GET_MODE (target) != tmode
11372 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11373 target = gen_reg_rtx (tmode);
11374 if (do_load)
11375 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
11376 else
11378 if (VECTOR_MODE_P (mode0))
11379 op0 = safe_vector_operand (op0, mode0);
11381 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11382 op0 = copy_to_mode_reg (mode0, op0);
11385 pat = GEN_FCN (icode) (target, op0);
11386 if (! pat)
11387 return 0;
11388 emit_insn (pat);
11389 return target;
11392 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
11393 sqrtss, rsqrtss, rcpss. */
11395 static rtx
11396 ix86_expand_unop1_builtin (icode, arglist, target)
11397 enum insn_code icode;
11398 tree arglist;
11399 rtx target;
11401 rtx pat;
11402 tree arg0 = TREE_VALUE (arglist);
11403 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11404 enum machine_mode tmode = insn_data[icode].operand[0].mode;
11405 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
11407 if (! target
11408 || GET_MODE (target) != tmode
11409 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11410 target = gen_reg_rtx (tmode);
11412 if (VECTOR_MODE_P (mode0))
11413 op0 = safe_vector_operand (op0, mode0);
11415 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11416 op0 = copy_to_mode_reg (mode0, op0);
11418 pat = GEN_FCN (icode) (target, op0, op0);
11419 if (! pat)
11420 return 0;
11421 emit_insn (pat);
11422 return target;
11425 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
11427 static rtx
11428 ix86_expand_sse_compare (d, arglist, target)
11429 const struct builtin_description *d;
11430 tree arglist;
11431 rtx target;
11433 rtx pat;
11434 tree arg0 = TREE_VALUE (arglist);
11435 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11436 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11437 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11438 rtx op2;
11439 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
11440 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
11441 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
11442 enum rtx_code comparison = d->comparison;
11444 if (VECTOR_MODE_P (mode0))
11445 op0 = safe_vector_operand (op0, mode0);
11446 if (VECTOR_MODE_P (mode1))
11447 op1 = safe_vector_operand (op1, mode1);
11449 /* Swap operands if we have a comparison that isn't available in
11450 hardware. */
11451 if (d->flag)
11453 rtx tmp = gen_reg_rtx (mode1);
11454 emit_move_insn (tmp, op1);
11455 op1 = op0;
11456 op0 = tmp;
11459 if (! target
11460 || GET_MODE (target) != tmode
11461 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
11462 target = gen_reg_rtx (tmode);
11464 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
11465 op0 = copy_to_mode_reg (mode0, op0);
11466 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
11467 op1 = copy_to_mode_reg (mode1, op1);
11469 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
11470 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
11471 if (! pat)
11472 return 0;
11473 emit_insn (pat);
11474 return target;
11477 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
11479 static rtx
11480 ix86_expand_sse_comi (d, arglist, target)
11481 const struct builtin_description *d;
11482 tree arglist;
11483 rtx target;
11485 rtx pat;
11486 tree arg0 = TREE_VALUE (arglist);
11487 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11488 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11489 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11490 rtx op2;
11491 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
11492 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
11493 enum rtx_code comparison = d->comparison;
11495 if (VECTOR_MODE_P (mode0))
11496 op0 = safe_vector_operand (op0, mode0);
11497 if (VECTOR_MODE_P (mode1))
11498 op1 = safe_vector_operand (op1, mode1);
11500 /* Swap operands if we have a comparison that isn't available in
11501 hardware. */
11502 if (d->flag)
11504 rtx tmp = op1;
11505 op1 = op0;
11506 op0 = tmp;
11509 target = gen_reg_rtx (SImode);
11510 emit_move_insn (target, const0_rtx);
11511 target = gen_rtx_SUBREG (QImode, target, 0);
11513 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
11514 op0 = copy_to_mode_reg (mode0, op0);
11515 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
11516 op1 = copy_to_mode_reg (mode1, op1);
11518 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
11519 pat = GEN_FCN (d->icode) (op0, op1, op2);
11520 if (! pat)
11521 return 0;
11522 emit_insn (pat);
11523 emit_insn (gen_rtx_SET (VOIDmode,
11524 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
11525 gen_rtx_fmt_ee (comparison, QImode,
11526 gen_rtx_REG (CCmode, FLAGS_REG),
11527 const0_rtx)));
11529 return target;
11532 /* Expand an expression EXP that calls a built-in function,
11533 with result going to TARGET if that's convenient
11534 (and in mode MODE if that's convenient).
11535 SUBTARGET may be used as the target for computing one of EXP's operands.
11536 IGNORE is nonzero if the value is to be ignored. */
11539 ix86_expand_builtin (exp, target, subtarget, mode, ignore)
11540 tree exp;
11541 rtx target;
11542 rtx subtarget ATTRIBUTE_UNUSED;
11543 enum machine_mode mode ATTRIBUTE_UNUSED;
11544 int ignore ATTRIBUTE_UNUSED;
11546 const struct builtin_description *d;
11547 size_t i;
11548 enum insn_code icode;
11549 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
11550 tree arglist = TREE_OPERAND (exp, 1);
11551 tree arg0, arg1, arg2, arg3;
11552 rtx op0, op1, op2, pat;
11553 enum machine_mode tmode, mode0, mode1, mode2;
11554 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
11556 switch (fcode)
11558 case IX86_BUILTIN_EMMS:
11559 emit_insn (gen_emms ());
11560 return 0;
11562 case IX86_BUILTIN_SFENCE:
11563 emit_insn (gen_sfence ());
11564 return 0;
11566 case IX86_BUILTIN_M_FROM_INT:
11567 target = gen_reg_rtx (DImode);
11568 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
11569 emit_move_insn (gen_rtx_SUBREG (SImode, target, 0), op0);
11570 return target;
11572 case IX86_BUILTIN_M_TO_INT:
11573 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
11574 op0 = copy_to_mode_reg (DImode, op0);
11575 target = gen_reg_rtx (SImode);
11576 emit_move_insn (target, gen_rtx_SUBREG (SImode, op0, 0));
11577 return target;
11579 case IX86_BUILTIN_PEXTRW:
11580 icode = CODE_FOR_mmx_pextrw;
11581 arg0 = TREE_VALUE (arglist);
11582 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11583 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11584 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11585 tmode = insn_data[icode].operand[0].mode;
11586 mode0 = insn_data[icode].operand[1].mode;
11587 mode1 = insn_data[icode].operand[2].mode;
11589 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11590 op0 = copy_to_mode_reg (mode0, op0);
11591 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
11593 /* @@@ better error message */
11594 error ("selector must be an immediate");
11595 return const0_rtx;
11597 if (target == 0
11598 || GET_MODE (target) != tmode
11599 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11600 target = gen_reg_rtx (tmode);
11601 pat = GEN_FCN (icode) (target, op0, op1);
11602 if (! pat)
11603 return 0;
11604 emit_insn (pat);
11605 return target;
11607 case IX86_BUILTIN_PINSRW:
11608 icode = CODE_FOR_mmx_pinsrw;
11609 arg0 = TREE_VALUE (arglist);
11610 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11611 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
11612 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11613 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11614 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
11615 tmode = insn_data[icode].operand[0].mode;
11616 mode0 = insn_data[icode].operand[1].mode;
11617 mode1 = insn_data[icode].operand[2].mode;
11618 mode2 = insn_data[icode].operand[3].mode;
11620 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11621 op0 = copy_to_mode_reg (mode0, op0);
11622 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
11623 op1 = copy_to_mode_reg (mode1, op1);
11624 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
11626 /* @@@ better error message */
11627 error ("selector must be an immediate");
11628 return const0_rtx;
11630 if (target == 0
11631 || GET_MODE (target) != tmode
11632 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11633 target = gen_reg_rtx (tmode);
11634 pat = GEN_FCN (icode) (target, op0, op1, op2);
11635 if (! pat)
11636 return 0;
11637 emit_insn (pat);
11638 return target;
11640 case IX86_BUILTIN_MASKMOVQ:
11641 icode = CODE_FOR_mmx_maskmovq;
11642 /* Note the arg order is different from the operand order. */
11643 arg1 = TREE_VALUE (arglist);
11644 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
11645 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
11646 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11647 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11648 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
11649 mode0 = insn_data[icode].operand[0].mode;
11650 mode1 = insn_data[icode].operand[1].mode;
11651 mode2 = insn_data[icode].operand[2].mode;
11653 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11654 op0 = copy_to_mode_reg (mode0, op0);
11655 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
11656 op1 = copy_to_mode_reg (mode1, op1);
11657 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
11658 op2 = copy_to_mode_reg (mode2, op2);
11659 pat = GEN_FCN (icode) (op0, op1, op2);
11660 if (! pat)
11661 return 0;
11662 emit_insn (pat);
11663 return 0;
11665 case IX86_BUILTIN_SQRTSS:
11666 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
11667 case IX86_BUILTIN_RSQRTSS:
11668 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
11669 case IX86_BUILTIN_RCPSS:
11670 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
11672 case IX86_BUILTIN_LOADAPS:
11673 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
11675 case IX86_BUILTIN_LOADUPS:
11676 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
11678 case IX86_BUILTIN_STOREAPS:
11679 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist, -1);
11680 case IX86_BUILTIN_STOREUPS:
11681 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist, -1);
11683 case IX86_BUILTIN_LOADSS:
11684 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
11686 case IX86_BUILTIN_STORESS:
11687 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist, -1);
11689 case IX86_BUILTIN_LOADHPS:
11690 case IX86_BUILTIN_LOADLPS:
11691 icode = (fcode == IX86_BUILTIN_LOADHPS
11692 ? CODE_FOR_sse_movhps : CODE_FOR_sse_movlps);
11693 arg0 = TREE_VALUE (arglist);
11694 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11695 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11696 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11697 tmode = insn_data[icode].operand[0].mode;
11698 mode0 = insn_data[icode].operand[1].mode;
11699 mode1 = insn_data[icode].operand[2].mode;
11701 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11702 op0 = copy_to_mode_reg (mode0, op0);
11703 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
11704 if (target == 0
11705 || GET_MODE (target) != tmode
11706 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11707 target = gen_reg_rtx (tmode);
11708 pat = GEN_FCN (icode) (target, op0, op1);
11709 if (! pat)
11710 return 0;
11711 emit_insn (pat);
11712 return target;
11714 case IX86_BUILTIN_STOREHPS:
11715 case IX86_BUILTIN_STORELPS:
11716 icode = (fcode == IX86_BUILTIN_STOREHPS
11717 ? CODE_FOR_sse_movhps : CODE_FOR_sse_movlps);
11718 arg0 = TREE_VALUE (arglist);
11719 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11720 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11721 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11722 mode0 = insn_data[icode].operand[1].mode;
11723 mode1 = insn_data[icode].operand[2].mode;
11725 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
11726 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
11727 op1 = copy_to_mode_reg (mode1, op1);
11729 pat = GEN_FCN (icode) (op0, op0, op1);
11730 if (! pat)
11731 return 0;
11732 emit_insn (pat);
11733 return 0;
11735 case IX86_BUILTIN_MOVNTPS:
11736 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist, -1);
11737 case IX86_BUILTIN_MOVNTQ:
11738 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist, -1);
11740 case IX86_BUILTIN_LDMXCSR:
11741 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
11742 target = assign_386_stack_local (SImode, 0);
11743 emit_move_insn (target, op0);
11744 emit_insn (gen_ldmxcsr (target));
11745 return 0;
11747 case IX86_BUILTIN_STMXCSR:
11748 target = assign_386_stack_local (SImode, 0);
11749 emit_insn (gen_stmxcsr (target));
11750 return copy_to_mode_reg (SImode, target);
11752 case IX86_BUILTIN_PREFETCH:
11753 icode = CODE_FOR_prefetch_sse;
11754 arg0 = TREE_VALUE (arglist);
11755 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11756 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11757 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11758 mode0 = insn_data[icode].operand[0].mode;
11759 mode1 = insn_data[icode].operand[1].mode;
11761 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
11763 /* @@@ better error message */
11764 error ("selector must be an immediate");
11765 return const0_rtx;
11768 op0 = copy_to_mode_reg (Pmode, op0);
11769 pat = GEN_FCN (icode) (op0, op1);
11770 if (! pat)
11771 return 0;
11772 emit_insn (pat);
11773 return target;
11775 case IX86_BUILTIN_SHUFPS:
11776 icode = CODE_FOR_sse_shufps;
11777 arg0 = TREE_VALUE (arglist);
11778 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11779 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
11780 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11781 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11782 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
11783 tmode = insn_data[icode].operand[0].mode;
11784 mode0 = insn_data[icode].operand[1].mode;
11785 mode1 = insn_data[icode].operand[2].mode;
11786 mode2 = insn_data[icode].operand[3].mode;
11788 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11789 op0 = copy_to_mode_reg (mode0, op0);
11790 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
11791 op1 = copy_to_mode_reg (mode1, op1);
11792 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
11794 /* @@@ better error message */
11795 error ("mask must be an immediate");
11796 return const0_rtx;
11798 if (target == 0
11799 || GET_MODE (target) != tmode
11800 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11801 target = gen_reg_rtx (tmode);
11802 pat = GEN_FCN (icode) (target, op0, op1, op2);
11803 if (! pat)
11804 return 0;
11805 emit_insn (pat);
11806 return target;
11808 case IX86_BUILTIN_PSHUFW:
11809 icode = CODE_FOR_mmx_pshufw;
11810 arg0 = TREE_VALUE (arglist);
11811 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11812 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11813 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11814 tmode = insn_data[icode].operand[0].mode;
11815 mode1 = insn_data[icode].operand[1].mode;
11816 mode2 = insn_data[icode].operand[2].mode;
11818 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
11819 op0 = copy_to_mode_reg (mode1, op0);
11820 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
11822 /* @@@ better error message */
11823 error ("mask must be an immediate");
11824 return const0_rtx;
11826 if (target == 0
11827 || GET_MODE (target) != tmode
11828 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11829 target = gen_reg_rtx (tmode);
11830 pat = GEN_FCN (icode) (target, op0, op1);
11831 if (! pat)
11832 return 0;
11833 emit_insn (pat);
11834 return target;
11836 case IX86_BUILTIN_FEMMS:
11837 emit_insn (gen_femms ());
11838 return NULL_RTX;
11840 case IX86_BUILTIN_PAVGUSB:
11841 return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target);
11843 case IX86_BUILTIN_PF2ID:
11844 return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0);
11846 case IX86_BUILTIN_PFACC:
11847 return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target);
11849 case IX86_BUILTIN_PFADD:
11850 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target);
11852 case IX86_BUILTIN_PFCMPEQ:
11853 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target);
11855 case IX86_BUILTIN_PFCMPGE:
11856 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target);
11858 case IX86_BUILTIN_PFCMPGT:
11859 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target);
11861 case IX86_BUILTIN_PFMAX:
11862 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target);
11864 case IX86_BUILTIN_PFMIN:
11865 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target);
11867 case IX86_BUILTIN_PFMUL:
11868 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target);
11870 case IX86_BUILTIN_PFRCP:
11871 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0);
11873 case IX86_BUILTIN_PFRCPIT1:
11874 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target);
11876 case IX86_BUILTIN_PFRCPIT2:
11877 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target);
11879 case IX86_BUILTIN_PFRSQIT1:
11880 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target);
11882 case IX86_BUILTIN_PFRSQRT:
11883 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0);
11885 case IX86_BUILTIN_PFSUB:
11886 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target);
11888 case IX86_BUILTIN_PFSUBR:
11889 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target);
11891 case IX86_BUILTIN_PI2FD:
11892 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0);
11894 case IX86_BUILTIN_PMULHRW:
11895 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
11897 case IX86_BUILTIN_PREFETCH_3DNOW:
11898 case IX86_BUILTIN_PREFETCHW:
11899 icode = CODE_FOR_prefetch_3dnow;
11900 arg0 = TREE_VALUE (arglist);
11901 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11902 op1 = (fcode == IX86_BUILTIN_PREFETCH_3DNOW ? const0_rtx : const1_rtx);
11903 mode0 = insn_data[icode].operand[0].mode;
11904 pat = GEN_FCN (icode) (copy_to_mode_reg (Pmode, op0), op1);
11905 if (! pat)
11906 return NULL_RTX;
11907 emit_insn (pat);
11908 return NULL_RTX;
11910 case IX86_BUILTIN_PF2IW:
11911 return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
11913 case IX86_BUILTIN_PFNACC:
11914 return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target);
11916 case IX86_BUILTIN_PFPNACC:
11917 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target);
11919 case IX86_BUILTIN_PI2FW:
11920 return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0);
11922 case IX86_BUILTIN_PSWAPDSI:
11923 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0);
11925 case IX86_BUILTIN_PSWAPDSF:
11926 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
11928 /* Composite intrinsics. */
11929 case IX86_BUILTIN_SETPS1:
11930 target = assign_386_stack_local (SFmode, 0);
11931 arg0 = TREE_VALUE (arglist);
11932 emit_move_insn (adjust_address (target, SFmode, 0),
11933 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
11934 op0 = gen_reg_rtx (V4SFmode);
11935 emit_insn (gen_sse_loadss (op0, adjust_address (target, V4SFmode, 0)));
11936 emit_insn (gen_sse_shufps (op0, op0, op0, GEN_INT (0)));
11937 return op0;
11939 case IX86_BUILTIN_SETPS:
11940 target = assign_386_stack_local (V4SFmode, 0);
11941 arg0 = TREE_VALUE (arglist);
11942 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11943 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
11944 arg3 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
11945 emit_move_insn (adjust_address (target, SFmode, 0),
11946 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
11947 emit_move_insn (adjust_address (target, SFmode, 4),
11948 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
11949 emit_move_insn (adjust_address (target, SFmode, 8),
11950 expand_expr (arg2, NULL_RTX, VOIDmode, 0));
11951 emit_move_insn (adjust_address (target, SFmode, 12),
11952 expand_expr (arg3, NULL_RTX, VOIDmode, 0));
11953 op0 = gen_reg_rtx (V4SFmode);
11954 emit_insn (gen_sse_movaps (op0, target));
11955 return op0;
11957 case IX86_BUILTIN_CLRPS:
11958 target = gen_reg_rtx (TImode);
11959 emit_insn (gen_sse_clrti (target));
11960 return target;
11962 case IX86_BUILTIN_LOADRPS:
11963 target = ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist,
11964 gen_reg_rtx (V4SFmode), 1);
11965 emit_insn (gen_sse_shufps (target, target, target, GEN_INT (0x1b)));
11966 return target;
11968 case IX86_BUILTIN_LOADPS1:
11969 target = ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist,
11970 gen_reg_rtx (V4SFmode), 1);
11971 emit_insn (gen_sse_shufps (target, target, target, const0_rtx));
11972 return target;
11974 case IX86_BUILTIN_STOREPS1:
11975 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist, 0);
11976 case IX86_BUILTIN_STORERPS:
11977 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist, 0x1B);
11979 case IX86_BUILTIN_MMX_ZERO:
11980 target = gen_reg_rtx (DImode);
11981 emit_insn (gen_mmx_clrdi (target));
11982 return target;
11984 default:
11985 break;
11988 for (i = 0, d = bdesc_2arg; i < sizeof (bdesc_2arg) / sizeof *d; i++, d++)
11989 if (d->code == fcode)
11991 /* Compares are treated specially. */
11992 if (d->icode == CODE_FOR_maskcmpv4sf3
11993 || d->icode == CODE_FOR_vmmaskcmpv4sf3
11994 || d->icode == CODE_FOR_maskncmpv4sf3
11995 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
11996 return ix86_expand_sse_compare (d, arglist, target);
11998 return ix86_expand_binop_builtin (d->icode, arglist, target);
12001 for (i = 0, d = bdesc_1arg; i < sizeof (bdesc_1arg) / sizeof *d; i++, d++)
12002 if (d->code == fcode)
12003 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
12005 for (i = 0, d = bdesc_comi; i < sizeof (bdesc_comi) / sizeof *d; i++, d++)
12006 if (d->code == fcode)
12007 return ix86_expand_sse_comi (d, arglist, target);
12009 /* @@@ Should really do something sensible here. */
12010 return 0;
12013 /* Store OPERAND to the memory after reload is completed. This means
12014 that we can't easily use assign_stack_local. */
12016 ix86_force_to_memory (mode, operand)
12017 enum machine_mode mode;
12018 rtx operand;
12020 rtx result;
12021 if (!reload_completed)
12022 abort ();
12023 if (TARGET_64BIT && TARGET_RED_ZONE)
12025 result = gen_rtx_MEM (mode,
12026 gen_rtx_PLUS (Pmode,
12027 stack_pointer_rtx,
12028 GEN_INT (-RED_ZONE_SIZE)));
12029 emit_move_insn (result, operand);
12031 else if (TARGET_64BIT && !TARGET_RED_ZONE)
12033 switch (mode)
12035 case HImode:
12036 case SImode:
12037 operand = gen_lowpart (DImode, operand);
12038 /* FALLTHRU */
12039 case DImode:
12040 emit_insn (
12041 gen_rtx_SET (VOIDmode,
12042 gen_rtx_MEM (DImode,
12043 gen_rtx_PRE_DEC (DImode,
12044 stack_pointer_rtx)),
12045 operand));
12046 break;
12047 default:
12048 abort ();
12050 result = gen_rtx_MEM (mode, stack_pointer_rtx);
12052 else
12054 switch (mode)
12056 case DImode:
12058 rtx operands[2];
12059 split_di (&operand, 1, operands, operands + 1);
12060 emit_insn (
12061 gen_rtx_SET (VOIDmode,
12062 gen_rtx_MEM (SImode,
12063 gen_rtx_PRE_DEC (Pmode,
12064 stack_pointer_rtx)),
12065 operands[1]));
12066 emit_insn (
12067 gen_rtx_SET (VOIDmode,
12068 gen_rtx_MEM (SImode,
12069 gen_rtx_PRE_DEC (Pmode,
12070 stack_pointer_rtx)),
12071 operands[0]));
12073 break;
12074 case HImode:
12075 /* It is better to store HImodes as SImodes. */
12076 if (!TARGET_PARTIAL_REG_STALL)
12077 operand = gen_lowpart (SImode, operand);
12078 /* FALLTHRU */
12079 case SImode:
12080 emit_insn (
12081 gen_rtx_SET (VOIDmode,
12082 gen_rtx_MEM (GET_MODE (operand),
12083 gen_rtx_PRE_DEC (SImode,
12084 stack_pointer_rtx)),
12085 operand));
12086 break;
12087 default:
12088 abort ();
12090 result = gen_rtx_MEM (mode, stack_pointer_rtx);
12092 return result;
12095 /* Free operand from the memory. */
12096 void
12097 ix86_free_from_memory (mode)
12098 enum machine_mode mode;
12100 if (!TARGET_64BIT || !TARGET_RED_ZONE)
12102 int size;
12104 if (mode == DImode || TARGET_64BIT)
12105 size = 8;
12106 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
12107 size = 2;
12108 else
12109 size = 4;
12110 /* Use LEA to deallocate stack space. In peephole2 it will be converted
12111 to pop or add instruction if registers are available. */
12112 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
12113 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12114 GEN_INT (size))));
12118 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
12119 QImode must go into class Q_REGS.
12120 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
12121 movdf to do mem-to-mem moves through integer regs. */
12122 enum reg_class
12123 ix86_preferred_reload_class (x, class)
12124 rtx x;
12125 enum reg_class class;
12127 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
12129 /* SSE can't load any constant directly yet. */
12130 if (SSE_CLASS_P (class))
12131 return NO_REGS;
12132 /* Floats can load 0 and 1. */
12133 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
12135 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
12136 if (MAYBE_SSE_CLASS_P (class))
12137 return (reg_class_subset_p (class, GENERAL_REGS)
12138 ? GENERAL_REGS : FLOAT_REGS);
12139 else
12140 return class;
12142 /* General regs can load everything. */
12143 if (reg_class_subset_p (class, GENERAL_REGS))
12144 return GENERAL_REGS;
12145 /* In case we haven't resolved FLOAT or SSE yet, give up. */
12146 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
12147 return NO_REGS;
12149 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
12150 return NO_REGS;
12151 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
12152 return Q_REGS;
12153 return class;
12156 /* If we are copying between general and FP registers, we need a memory
12157 location. The same is true for SSE and MMX registers.
12159 The macro can't work reliably when one of the CLASSES is class containing
12160 registers from multiple units (SSE, MMX, integer). We avoid this by never
12161 combining those units in single alternative in the machine description.
12162 Ensure that this constraint holds to avoid unexpected surprises.
12164 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
12165 enforce these sanity checks. */
12167 ix86_secondary_memory_needed (class1, class2, mode, strict)
12168 enum reg_class class1, class2;
12169 enum machine_mode mode;
12170 int strict;
12172 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
12173 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
12174 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
12175 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
12176 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
12177 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
12179 if (strict)
12180 abort ();
12181 else
12182 return 1;
12184 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
12185 || (SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
12186 && (mode) != SImode)
12187 || (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
12188 && (mode) != SImode));
12190 /* Return the cost of moving data from a register in class CLASS1 to
12191 one in class CLASS2.
12193 It is not required that the cost always equal 2 when FROM is the same as TO;
12194 on some machines it is expensive to move between registers if they are not
12195 general registers. */
12197 ix86_register_move_cost (mode, class1, class2)
12198 enum machine_mode mode;
12199 enum reg_class class1, class2;
12201 /* In case we require secondary memory, compute cost of the store followed
12202 by load. In case of copying from general_purpose_register we may emit
12203 multiple stores followed by single load causing memory size mismatch
12204 stall. Count this as arbitarily high cost of 20. */
12205 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
12207 int add_cost = 0;
12208 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
12209 add_cost = 20;
12210 return (MEMORY_MOVE_COST (mode, class1, 0)
12211 + MEMORY_MOVE_COST (mode, class2, 1) + add_cost);
12213 /* Moves between SSE/MMX and integer unit are expensive. */
12214 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
12215 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
12216 return ix86_cost->mmxsse_to_integer;
12217 if (MAYBE_FLOAT_CLASS_P (class1))
12218 return ix86_cost->fp_move;
12219 if (MAYBE_SSE_CLASS_P (class1))
12220 return ix86_cost->sse_move;
12221 if (MAYBE_MMX_CLASS_P (class1))
12222 return ix86_cost->mmx_move;
12223 return 2;
12226 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
12228 ix86_hard_regno_mode_ok (regno, mode)
12229 int regno;
12230 enum machine_mode mode;
12232 /* Flags and only flags can only hold CCmode values. */
12233 if (CC_REGNO_P (regno))
12234 return GET_MODE_CLASS (mode) == MODE_CC;
12235 if (GET_MODE_CLASS (mode) == MODE_CC
12236 || GET_MODE_CLASS (mode) == MODE_RANDOM
12237 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
12238 return 0;
12239 if (FP_REGNO_P (regno))
12240 return VALID_FP_MODE_P (mode);
12241 if (SSE_REGNO_P (regno))
12242 return VALID_SSE_REG_MODE (mode);
12243 if (MMX_REGNO_P (regno))
12244 return VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode);
12245 /* We handle both integer and floats in the general purpose registers.
12246 In future we should be able to handle vector modes as well. */
12247 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
12248 return 0;
12249 /* Take care for QImode values - they can be in non-QI regs, but then
12250 they do cause partial register stalls. */
12251 if (regno < 4 || mode != QImode || TARGET_64BIT)
12252 return 1;
12253 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
12256 /* Return the cost of moving data of mode M between a
12257 register and memory. A value of 2 is the default; this cost is
12258 relative to those in `REGISTER_MOVE_COST'.
12260 If moving between registers and memory is more expensive than
12261 between two registers, you should define this macro to express the
12262 relative cost.
12264 Model also increased moving costs of QImode registers in non
12265 Q_REGS classes.
12268 ix86_memory_move_cost (mode, class, in)
12269 enum machine_mode mode;
12270 enum reg_class class;
12271 int in;
12273 if (FLOAT_CLASS_P (class))
12275 int index;
12276 switch (mode)
12278 case SFmode:
12279 index = 0;
12280 break;
12281 case DFmode:
12282 index = 1;
12283 break;
12284 case XFmode:
12285 case TFmode:
12286 index = 2;
12287 break;
12288 default:
12289 return 100;
12291 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
12293 if (SSE_CLASS_P (class))
12295 int index;
12296 switch (GET_MODE_SIZE (mode))
12298 case 4:
12299 index = 0;
12300 break;
12301 case 8:
12302 index = 1;
12303 break;
12304 case 16:
12305 index = 2;
12306 break;
12307 default:
12308 return 100;
12310 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
12312 if (MMX_CLASS_P (class))
12314 int index;
12315 switch (GET_MODE_SIZE (mode))
12317 case 4:
12318 index = 0;
12319 break;
12320 case 8:
12321 index = 1;
12322 break;
12323 default:
12324 return 100;
12326 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
12328 switch (GET_MODE_SIZE (mode))
12330 case 1:
12331 if (in)
12332 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
12333 : ix86_cost->movzbl_load);
12334 else
12335 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
12336 : ix86_cost->int_store[0] + 4);
12337 break;
12338 case 2:
12339 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
12340 default:
12341 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
12342 if (mode == TFmode)
12343 mode = XFmode;
12344 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
12345 * (int) GET_MODE_SIZE (mode) / 4);
12349 #ifdef DO_GLOBAL_CTORS_BODY
12350 static void
12351 ix86_svr3_asm_out_constructor (symbol, priority)
12352 rtx symbol;
12353 int priority ATTRIBUTE_UNUSED;
12355 init_section ();
12356 fputs ("\tpushl $", asm_out_file);
12357 assemble_name (asm_out_file, XSTR (symbol, 0));
12358 fputc ('\n', asm_out_file);
12360 #endif