* c-decl.c (grokdeclarator): Use ISO word.
[official-gcc.git] / gcc / config / i386 / i386.c
blob410b310d7e5211f301d9256e9af769d4e9cd0198
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001
3 Free Software Foundation, Inc.
5 This file is part of GNU CC.
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
22 #include "config.h"
23 #include "system.h"
24 #include "rtl.h"
25 #include "tree.h"
26 #include "tm_p.h"
27 #include "regs.h"
28 #include "hard-reg-set.h"
29 #include "real.h"
30 #include "insn-config.h"
31 #include "conditions.h"
32 #include "output.h"
33 #include "insn-attr.h"
34 #include "flags.h"
35 #include "except.h"
36 #include "function.h"
37 #include "recog.h"
38 #include "expr.h"
39 #include "optabs.h"
40 #include "toplev.h"
41 #include "basic-block.h"
42 #include "ggc.h"
43 #include "target.h"
44 #include "target-def.h"
46 #ifndef CHECK_STACK_LIMIT
47 #define CHECK_STACK_LIMIT -1
48 #endif
50 /* Processor costs (relative to an add) */
51 static const
52 struct processor_costs size_cost = { /* costs for tunning for size */
53 2, /* cost of an add instruction */
54 3, /* cost of a lea instruction */
55 2, /* variable shift costs */
56 3, /* constant shift costs */
57 3, /* cost of starting a multiply */
58 0, /* cost of multiply per each bit set */
59 3, /* cost of a divide/mod */
60 3, /* cost of movsx */
61 3, /* cost of movzx */
62 0, /* "large" insn */
63 2, /* MOVE_RATIO */
64 2, /* cost for loading QImode using movzbl */
65 {2, 2, 2}, /* cost of loading integer registers
66 in QImode, HImode and SImode.
67 Relative to reg-reg move (2). */
68 {2, 2, 2}, /* cost of storing integer registers */
69 2, /* cost of reg,reg fld/fst */
70 {2, 2, 2}, /* cost of loading fp registers
71 in SFmode, DFmode and XFmode */
72 {2, 2, 2}, /* cost of loading integer registers */
73 3, /* cost of moving MMX register */
74 {3, 3}, /* cost of loading MMX registers
75 in SImode and DImode */
76 {3, 3}, /* cost of storing MMX registers
77 in SImode and DImode */
78 3, /* cost of moving SSE register */
79 {3, 3, 3}, /* cost of loading SSE registers
80 in SImode, DImode and TImode */
81 {3, 3, 3}, /* cost of storing SSE registers
82 in SImode, DImode and TImode */
83 3, /* MMX or SSE register to integer */
85 /* Processor costs (relative to an add) */
86 static const
87 struct processor_costs i386_cost = { /* 386 specific costs */
88 1, /* cost of an add instruction */
89 1, /* cost of a lea instruction */
90 3, /* variable shift costs */
91 2, /* constant shift costs */
92 6, /* cost of starting a multiply */
93 1, /* cost of multiply per each bit set */
94 23, /* cost of a divide/mod */
95 3, /* cost of movsx */
96 2, /* cost of movzx */
97 15, /* "large" insn */
98 3, /* MOVE_RATIO */
99 4, /* cost for loading QImode using movzbl */
100 {2, 4, 2}, /* cost of loading integer registers
101 in QImode, HImode and SImode.
102 Relative to reg-reg move (2). */
103 {2, 4, 2}, /* cost of storing integer registers */
104 2, /* cost of reg,reg fld/fst */
105 {8, 8, 8}, /* cost of loading fp registers
106 in SFmode, DFmode and XFmode */
107 {8, 8, 8}, /* cost of loading integer registers */
108 2, /* cost of moving MMX register */
109 {4, 8}, /* cost of loading MMX registers
110 in SImode and DImode */
111 {4, 8}, /* cost of storing MMX registers
112 in SImode and DImode */
113 2, /* cost of moving SSE register */
114 {4, 8, 16}, /* cost of loading SSE registers
115 in SImode, DImode and TImode */
116 {4, 8, 16}, /* cost of storing SSE registers
117 in SImode, DImode and TImode */
118 3, /* MMX or SSE register to integer */
121 static const
122 struct processor_costs i486_cost = { /* 486 specific costs */
123 1, /* cost of an add instruction */
124 1, /* cost of a lea instruction */
125 3, /* variable shift costs */
126 2, /* constant shift costs */
127 12, /* cost of starting a multiply */
128 1, /* cost of multiply per each bit set */
129 40, /* cost of a divide/mod */
130 3, /* cost of movsx */
131 2, /* cost of movzx */
132 15, /* "large" insn */
133 3, /* MOVE_RATIO */
134 4, /* cost for loading QImode using movzbl */
135 {2, 4, 2}, /* cost of loading integer registers
136 in QImode, HImode and SImode.
137 Relative to reg-reg move (2). */
138 {2, 4, 2}, /* cost of storing integer registers */
139 2, /* cost of reg,reg fld/fst */
140 {8, 8, 8}, /* cost of loading fp registers
141 in SFmode, DFmode and XFmode */
142 {8, 8, 8}, /* cost of loading integer registers */
143 2, /* cost of moving MMX register */
144 {4, 8}, /* cost of loading MMX registers
145 in SImode and DImode */
146 {4, 8}, /* cost of storing MMX registers
147 in SImode and DImode */
148 2, /* cost of moving SSE register */
149 {4, 8, 16}, /* cost of loading SSE registers
150 in SImode, DImode and TImode */
151 {4, 8, 16}, /* cost of storing SSE registers
152 in SImode, DImode and TImode */
153 3 /* MMX or SSE register to integer */
156 static const
157 struct processor_costs pentium_cost = {
158 1, /* cost of an add instruction */
159 1, /* cost of a lea instruction */
160 4, /* variable shift costs */
161 1, /* constant shift costs */
162 11, /* cost of starting a multiply */
163 0, /* cost of multiply per each bit set */
164 25, /* cost of a divide/mod */
165 3, /* cost of movsx */
166 2, /* cost of movzx */
167 8, /* "large" insn */
168 6, /* MOVE_RATIO */
169 6, /* cost for loading QImode using movzbl */
170 {2, 4, 2}, /* cost of loading integer registers
171 in QImode, HImode and SImode.
172 Relative to reg-reg move (2). */
173 {2, 4, 2}, /* cost of storing integer registers */
174 2, /* cost of reg,reg fld/fst */
175 {2, 2, 6}, /* cost of loading fp registers
176 in SFmode, DFmode and XFmode */
177 {4, 4, 6}, /* cost of loading integer registers */
178 8, /* cost of moving MMX register */
179 {8, 8}, /* cost of loading MMX registers
180 in SImode and DImode */
181 {8, 8}, /* cost of storing MMX registers
182 in SImode and DImode */
183 2, /* cost of moving SSE register */
184 {4, 8, 16}, /* cost of loading SSE registers
185 in SImode, DImode and TImode */
186 {4, 8, 16}, /* cost of storing SSE registers
187 in SImode, DImode and TImode */
188 3 /* MMX or SSE register to integer */
191 static const
192 struct processor_costs pentiumpro_cost = {
193 1, /* cost of an add instruction */
194 1, /* cost of a lea instruction */
195 1, /* variable shift costs */
196 1, /* constant shift costs */
197 4, /* cost of starting a multiply */
198 0, /* cost of multiply per each bit set */
199 17, /* cost of a divide/mod */
200 1, /* cost of movsx */
201 1, /* cost of movzx */
202 8, /* "large" insn */
203 6, /* MOVE_RATIO */
204 2, /* cost for loading QImode using movzbl */
205 {4, 4, 4}, /* cost of loading integer registers
206 in QImode, HImode and SImode.
207 Relative to reg-reg move (2). */
208 {2, 2, 2}, /* cost of storing integer registers */
209 2, /* cost of reg,reg fld/fst */
210 {2, 2, 6}, /* cost of loading fp registers
211 in SFmode, DFmode and XFmode */
212 {4, 4, 6}, /* cost of loading integer registers */
213 2, /* cost of moving MMX register */
214 {2, 2}, /* cost of loading MMX registers
215 in SImode and DImode */
216 {2, 2}, /* cost of storing MMX registers
217 in SImode and DImode */
218 2, /* cost of moving SSE register */
219 {2, 2, 8}, /* cost of loading SSE registers
220 in SImode, DImode and TImode */
221 {2, 2, 8}, /* cost of storing SSE registers
222 in SImode, DImode and TImode */
223 3 /* MMX or SSE register to integer */
226 static const
227 struct processor_costs k6_cost = {
228 1, /* cost of an add instruction */
229 2, /* cost of a lea instruction */
230 1, /* variable shift costs */
231 1, /* constant shift costs */
232 3, /* cost of starting a multiply */
233 0, /* cost of multiply per each bit set */
234 18, /* cost of a divide/mod */
235 2, /* cost of movsx */
236 2, /* cost of movzx */
237 8, /* "large" insn */
238 4, /* MOVE_RATIO */
239 3, /* cost for loading QImode using movzbl */
240 {4, 5, 4}, /* cost of loading integer registers
241 in QImode, HImode and SImode.
242 Relative to reg-reg move (2). */
243 {2, 3, 2}, /* cost of storing integer registers */
244 4, /* cost of reg,reg fld/fst */
245 {6, 6, 6}, /* cost of loading fp registers
246 in SFmode, DFmode and XFmode */
247 {4, 4, 4}, /* cost of loading integer registers */
248 2, /* cost of moving MMX register */
249 {2, 2}, /* cost of loading MMX registers
250 in SImode and DImode */
251 {2, 2}, /* cost of storing MMX registers
252 in SImode and DImode */
253 2, /* cost of moving SSE register */
254 {2, 2, 8}, /* cost of loading SSE registers
255 in SImode, DImode and TImode */
256 {2, 2, 8}, /* cost of storing SSE registers
257 in SImode, DImode and TImode */
258 6 /* MMX or SSE register to integer */
261 static const
262 struct processor_costs athlon_cost = {
263 1, /* cost of an add instruction */
264 2, /* cost of a lea instruction */
265 1, /* variable shift costs */
266 1, /* constant shift costs */
267 5, /* cost of starting a multiply */
268 0, /* cost of multiply per each bit set */
269 42, /* cost of a divide/mod */
270 1, /* cost of movsx */
271 1, /* cost of movzx */
272 8, /* "large" insn */
273 9, /* MOVE_RATIO */
274 4, /* cost for loading QImode using movzbl */
275 {4, 5, 4}, /* cost of loading integer registers
276 in QImode, HImode and SImode.
277 Relative to reg-reg move (2). */
278 {2, 3, 2}, /* cost of storing integer registers */
279 4, /* cost of reg,reg fld/fst */
280 {6, 6, 20}, /* cost of loading fp registers
281 in SFmode, DFmode and XFmode */
282 {4, 4, 16}, /* cost of loading integer registers */
283 2, /* cost of moving MMX register */
284 {2, 2}, /* cost of loading MMX registers
285 in SImode and DImode */
286 {2, 2}, /* cost of storing MMX registers
287 in SImode and DImode */
288 2, /* cost of moving SSE register */
289 {2, 2, 8}, /* cost of loading SSE registers
290 in SImode, DImode and TImode */
291 {2, 2, 8}, /* cost of storing SSE registers
292 in SImode, DImode and TImode */
293 6 /* MMX or SSE register to integer */
296 static const
297 struct processor_costs pentium4_cost = {
298 1, /* cost of an add instruction */
299 1, /* cost of a lea instruction */
300 8, /* variable shift costs */
301 8, /* constant shift costs */
302 30, /* cost of starting a multiply */
303 0, /* cost of multiply per each bit set */
304 112, /* cost of a divide/mod */
305 1, /* cost of movsx */
306 1, /* cost of movzx */
307 16, /* "large" insn */
308 6, /* MOVE_RATIO */
309 2, /* cost for loading QImode using movzbl */
310 {4, 5, 4}, /* cost of loading integer registers
311 in QImode, HImode and SImode.
312 Relative to reg-reg move (2). */
313 {2, 3, 2}, /* cost of storing integer registers */
314 2, /* cost of reg,reg fld/fst */
315 {2, 2, 6}, /* cost of loading fp registers
316 in SFmode, DFmode and XFmode */
317 {4, 4, 6}, /* cost of loading integer registers */
318 2, /* cost of moving MMX register */
319 {2, 2}, /* cost of loading MMX registers
320 in SImode and DImode */
321 {2, 2}, /* cost of storing MMX registers
322 in SImode and DImode */
323 12, /* cost of moving SSE register */
324 {12, 12, 12}, /* cost of loading SSE registers
325 in SImode, DImode and TImode */
326 {2, 2, 8}, /* cost of storing SSE registers
327 in SImode, DImode and TImode */
328 10, /* MMX or SSE register to integer */
331 const struct processor_costs *ix86_cost = &pentium_cost;
333 /* Processor feature/optimization bitmasks. */
334 #define m_386 (1<<PROCESSOR_I386)
335 #define m_486 (1<<PROCESSOR_I486)
336 #define m_PENT (1<<PROCESSOR_PENTIUM)
337 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
338 #define m_K6 (1<<PROCESSOR_K6)
339 #define m_ATHLON (1<<PROCESSOR_ATHLON)
340 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
342 const int x86_use_leave = m_386 | m_K6 | m_ATHLON;
343 const int x86_push_memory = m_386 | m_K6 | m_ATHLON | m_PENT4;
344 const int x86_zero_extend_with_and = m_486 | m_PENT;
345 const int x86_movx = m_ATHLON | m_PPRO | m_PENT4 /* m_386 | m_K6 */;
346 const int x86_double_with_add = ~m_386;
347 const int x86_use_bit_test = m_386;
348 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON | m_K6;
349 const int x86_cmove = m_PPRO | m_ATHLON | m_PENT4;
350 const int x86_3dnow_a = m_ATHLON;
351 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON | m_PENT4;
352 const int x86_branch_hints = m_PENT4;
353 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4;
354 const int x86_partial_reg_stall = m_PPRO;
355 const int x86_use_loop = m_K6;
356 const int x86_use_fiop = ~(m_PPRO | m_ATHLON | m_PENT);
357 const int x86_use_mov0 = m_K6;
358 const int x86_use_cltd = ~(m_PENT | m_K6);
359 const int x86_read_modify_write = ~m_PENT;
360 const int x86_read_modify = ~(m_PENT | m_PPRO);
361 const int x86_split_long_moves = m_PPRO;
362 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486;
363 const int x86_single_stringop = m_386 | m_PENT4;
364 const int x86_qimode_math = ~(0);
365 const int x86_promote_qi_regs = 0;
366 const int x86_himode_math = ~(m_PPRO);
367 const int x86_promote_hi_regs = m_PPRO;
368 const int x86_sub_esp_4 = m_ATHLON | m_PPRO | m_PENT4;
369 const int x86_sub_esp_8 = m_ATHLON | m_PPRO | m_386 | m_486 | m_PENT4;
370 const int x86_add_esp_4 = m_ATHLON | m_K6 | m_PENT4;
371 const int x86_add_esp_8 = m_ATHLON | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4;
372 const int x86_integer_DFmode_moves = ~(m_ATHLON | m_PENT4);
373 const int x86_partial_reg_dependency = m_ATHLON | m_PENT4;
374 const int x86_memory_mismatch_stall = m_ATHLON | m_PENT4;
375 const int x86_accumulate_outgoing_args = m_ATHLON | m_PENT4 | m_PPRO;
376 const int x86_prologue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
377 const int x86_epilogue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
378 const int x86_decompose_lea = m_PENT4;
380 /* In case the avreage insn count for single function invocation is
381 lower than this constant, emit fast (but longer) prologue and
382 epilogue code. */
383 #define FAST_PROLOGUE_INSN_COUNT 30
384 /* Set by prologue expander and used by epilogue expander to determine
385 the style used. */
386 static int use_fast_prologue_epilogue;
388 #define AT_BP(mode) (gen_rtx_MEM ((mode), hard_frame_pointer_rtx))
390 static const char *const hi_reg_name[] = HI_REGISTER_NAMES; /* names for 16 bit regs */
391 static const char *const qi_reg_name[] = QI_REGISTER_NAMES; /* names for 8 bit regs (low) */
392 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES; /* names for 8 bit regs (high) */
394 /* Array of the smallest class containing reg number REGNO, indexed by
395 REGNO. Used by REGNO_REG_CLASS in i386.h. */
397 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
399 /* ax, dx, cx, bx */
400 AREG, DREG, CREG, BREG,
401 /* si, di, bp, sp */
402 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
403 /* FP registers */
404 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
405 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
406 /* arg pointer */
407 NON_Q_REGS,
408 /* flags, fpsr, dirflag, frame */
409 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
410 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
411 SSE_REGS, SSE_REGS,
412 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
413 MMX_REGS, MMX_REGS,
414 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
415 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
416 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
417 SSE_REGS, SSE_REGS,
420 /* The "default" register map used in 32bit mode. */
422 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
424 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
425 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
426 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
427 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
428 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
429 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
430 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
433 static int x86_64_int_parameter_registers[6] = {5 /*RDI*/, 4 /*RSI*/,
434 1 /*RDX*/, 2 /*RCX*/,
435 FIRST_REX_INT_REG /*R8 */,
436 FIRST_REX_INT_REG + 1 /*R9 */};
437 static int x86_64_int_return_registers[4] = {0 /*RAX*/, 1 /*RDI*/, 5, 4};
439 /* The "default" register map used in 64bit mode. */
440 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
442 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
443 33, 34, 35, 36, 37, 38, 39, 40 /* fp regs */
444 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
445 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
446 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
447 8,9,10,11,12,13,14,15, /* extended integer registers */
448 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
451 /* Define the register numbers to be used in Dwarf debugging information.
452 The SVR4 reference port C compiler uses the following register numbers
453 in its Dwarf output code:
454 0 for %eax (gcc regno = 0)
455 1 for %ecx (gcc regno = 2)
456 2 for %edx (gcc regno = 1)
457 3 for %ebx (gcc regno = 3)
458 4 for %esp (gcc regno = 7)
459 5 for %ebp (gcc regno = 6)
460 6 for %esi (gcc regno = 4)
461 7 for %edi (gcc regno = 5)
462 The following three DWARF register numbers are never generated by
463 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
464 believes these numbers have these meanings.
465 8 for %eip (no gcc equivalent)
466 9 for %eflags (gcc regno = 17)
467 10 for %trapno (no gcc equivalent)
468 It is not at all clear how we should number the FP stack registers
469 for the x86 architecture. If the version of SDB on x86/svr4 were
470 a bit less brain dead with respect to floating-point then we would
471 have a precedent to follow with respect to DWARF register numbers
472 for x86 FP registers, but the SDB on x86/svr4 is so completely
473 broken with respect to FP registers that it is hardly worth thinking
474 of it as something to strive for compatibility with.
475 The version of x86/svr4 SDB I have at the moment does (partially)
476 seem to believe that DWARF register number 11 is associated with
477 the x86 register %st(0), but that's about all. Higher DWARF
478 register numbers don't seem to be associated with anything in
479 particular, and even for DWARF regno 11, SDB only seems to under-
480 stand that it should say that a variable lives in %st(0) (when
481 asked via an `=' command) if we said it was in DWARF regno 11,
482 but SDB still prints garbage when asked for the value of the
483 variable in question (via a `/' command).
484 (Also note that the labels SDB prints for various FP stack regs
485 when doing an `x' command are all wrong.)
486 Note that these problems generally don't affect the native SVR4
487 C compiler because it doesn't allow the use of -O with -g and
488 because when it is *not* optimizing, it allocates a memory
489 location for each floating-point variable, and the memory
490 location is what gets described in the DWARF AT_location
491 attribute for the variable in question.
492 Regardless of the severe mental illness of the x86/svr4 SDB, we
493 do something sensible here and we use the following DWARF
494 register numbers. Note that these are all stack-top-relative
495 numbers.
496 11 for %st(0) (gcc regno = 8)
497 12 for %st(1) (gcc regno = 9)
498 13 for %st(2) (gcc regno = 10)
499 14 for %st(3) (gcc regno = 11)
500 15 for %st(4) (gcc regno = 12)
501 16 for %st(5) (gcc regno = 13)
502 17 for %st(6) (gcc regno = 14)
503 18 for %st(7) (gcc regno = 15)
505 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
507 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
508 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
509 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
510 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
511 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
512 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded integer registers */
513 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded SSE registers */
516 /* Test and compare insns in i386.md store the information needed to
517 generate branch and scc insns here. */
519 struct rtx_def *ix86_compare_op0 = NULL_RTX;
520 struct rtx_def *ix86_compare_op1 = NULL_RTX;
522 #define MAX_386_STACK_LOCALS 3
523 /* Size of the register save area. */
524 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
526 /* Define the structure for the machine field in struct function. */
527 struct machine_function
529 rtx stack_locals[(int) MAX_MACHINE_MODE][MAX_386_STACK_LOCALS];
530 int save_varrargs_registers;
531 int accesses_prev_frame;
534 #define ix86_stack_locals (cfun->machine->stack_locals)
535 #define ix86_save_varrargs_registers (cfun->machine->save_varrargs_registers)
537 /* Structure describing stack frame layout.
538 Stack grows downward:
540 [arguments]
541 <- ARG_POINTER
542 saved pc
544 saved frame pointer if frame_pointer_needed
545 <- HARD_FRAME_POINTER
546 [saved regs]
548 [padding1] \
550 [va_arg registers] (
551 > to_allocate <- FRAME_POINTER
552 [frame] (
554 [padding2] /
556 struct ix86_frame
558 int nregs;
559 int padding1;
560 int va_arg_size;
561 HOST_WIDE_INT frame;
562 int padding2;
563 int outgoing_arguments_size;
564 int red_zone_size;
566 HOST_WIDE_INT to_allocate;
567 /* The offsets relative to ARG_POINTER. */
568 HOST_WIDE_INT frame_pointer_offset;
569 HOST_WIDE_INT hard_frame_pointer_offset;
570 HOST_WIDE_INT stack_pointer_offset;
573 /* Code model option as passed by user. */
574 const char *ix86_cmodel_string;
575 /* Parsed value. */
576 enum cmodel ix86_cmodel;
578 /* which cpu are we scheduling for */
579 enum processor_type ix86_cpu;
581 /* which instruction set architecture to use. */
582 int ix86_arch;
584 /* Strings to hold which cpu and instruction set architecture to use. */
585 const char *ix86_cpu_string; /* for -mcpu=<xxx> */
586 const char *ix86_arch_string; /* for -march=<xxx> */
588 /* # of registers to use to pass arguments. */
589 const char *ix86_regparm_string;
591 /* ix86_regparm_string as a number */
592 int ix86_regparm;
594 /* Alignment to use for loops and jumps: */
596 /* Power of two alignment for loops. */
597 const char *ix86_align_loops_string;
599 /* Power of two alignment for non-loop jumps. */
600 const char *ix86_align_jumps_string;
602 /* Power of two alignment for stack boundary in bytes. */
603 const char *ix86_preferred_stack_boundary_string;
605 /* Preferred alignment for stack boundary in bits. */
606 int ix86_preferred_stack_boundary;
608 /* Values 1-5: see jump.c */
609 int ix86_branch_cost;
610 const char *ix86_branch_cost_string;
612 /* Power of two alignment for functions. */
613 const char *ix86_align_funcs_string;
615 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
616 static char internal_label_prefix[16];
617 static int internal_label_prefix_len;
619 static int local_symbolic_operand PARAMS ((rtx, enum machine_mode));
620 static void output_pic_addr_const PARAMS ((FILE *, rtx, int));
621 static void put_condition_code PARAMS ((enum rtx_code, enum machine_mode,
622 int, int, FILE *));
623 static rtx ix86_expand_int_compare PARAMS ((enum rtx_code, rtx, rtx));
624 static enum rtx_code ix86_prepare_fp_compare_args PARAMS ((enum rtx_code,
625 rtx *, rtx *));
626 static rtx gen_push PARAMS ((rtx));
627 static int memory_address_length PARAMS ((rtx addr));
628 static int ix86_flags_dependant PARAMS ((rtx, rtx, enum attr_type));
629 static int ix86_agi_dependant PARAMS ((rtx, rtx, enum attr_type));
630 static int ix86_safe_length PARAMS ((rtx));
631 static enum attr_memory ix86_safe_memory PARAMS ((rtx));
632 static enum attr_pent_pair ix86_safe_pent_pair PARAMS ((rtx));
633 static enum attr_ppro_uops ix86_safe_ppro_uops PARAMS ((rtx));
634 static void ix86_dump_ppro_packet PARAMS ((FILE *));
635 static void ix86_reorder_insn PARAMS ((rtx *, rtx *));
636 static rtx * ix86_pent_find_pair PARAMS ((rtx *, rtx *, enum attr_pent_pair,
637 rtx));
638 static void ix86_init_machine_status PARAMS ((struct function *));
639 static void ix86_mark_machine_status PARAMS ((struct function *));
640 static void ix86_free_machine_status PARAMS ((struct function *));
641 static int ix86_split_to_parts PARAMS ((rtx, rtx *, enum machine_mode));
642 static int ix86_safe_length_prefix PARAMS ((rtx));
643 static int ix86_nsaved_regs PARAMS((void));
644 static void ix86_emit_save_regs PARAMS((void));
645 static void ix86_emit_save_regs_using_mov PARAMS ((rtx, HOST_WIDE_INT));
646 static void ix86_emit_restore_regs_using_mov PARAMS ((rtx, int, int));
647 static void ix86_set_move_mem_attrs_1 PARAMS ((rtx, rtx, rtx, rtx, rtx));
648 static void ix86_sched_reorder_pentium PARAMS((rtx *, rtx *));
649 static void ix86_sched_reorder_ppro PARAMS((rtx *, rtx *));
650 static HOST_WIDE_INT ix86_GOT_alias_set PARAMS ((void));
651 static void ix86_adjust_counter PARAMS ((rtx, HOST_WIDE_INT));
652 static rtx ix86_expand_aligntest PARAMS ((rtx, int));
653 static void ix86_expand_strlensi_unroll_1 PARAMS ((rtx, rtx));
654 static int ix86_issue_rate PARAMS ((void));
655 static int ix86_adjust_cost PARAMS ((rtx, rtx, rtx, int));
656 static void ix86_sched_init PARAMS ((FILE *, int, int));
657 static int ix86_sched_reorder PARAMS ((FILE *, int, rtx *, int *, int));
658 static int ix86_variable_issue PARAMS ((FILE *, int, rtx, int));
660 struct ix86_address
662 rtx base, index, disp;
663 HOST_WIDE_INT scale;
666 static int ix86_decompose_address PARAMS ((rtx, struct ix86_address *));
668 struct builtin_description;
669 static rtx ix86_expand_sse_comi PARAMS ((const struct builtin_description *,
670 tree, rtx));
671 static rtx ix86_expand_sse_compare PARAMS ((const struct builtin_description *,
672 tree, rtx));
673 static rtx ix86_expand_unop1_builtin PARAMS ((enum insn_code, tree, rtx));
674 static rtx ix86_expand_unop_builtin PARAMS ((enum insn_code, tree, rtx, int));
675 static rtx ix86_expand_binop_builtin PARAMS ((enum insn_code, tree, rtx));
676 static rtx ix86_expand_store_builtin PARAMS ((enum insn_code, tree, int));
677 static rtx safe_vector_operand PARAMS ((rtx, enum machine_mode));
678 static enum rtx_code ix86_fp_compare_code_to_integer PARAMS ((enum rtx_code));
679 static void ix86_fp_comparison_codes PARAMS ((enum rtx_code code,
680 enum rtx_code *,
681 enum rtx_code *,
682 enum rtx_code *));
683 static rtx ix86_expand_fp_compare PARAMS ((enum rtx_code, rtx, rtx, rtx,
684 rtx *, rtx *));
685 static int ix86_fp_comparison_arithmetics_cost PARAMS ((enum rtx_code code));
686 static int ix86_fp_comparison_fcomi_cost PARAMS ((enum rtx_code code));
687 static int ix86_fp_comparison_sahf_cost PARAMS ((enum rtx_code code));
688 static int ix86_fp_comparison_cost PARAMS ((enum rtx_code code));
689 static int ix86_save_reg PARAMS ((int, int));
690 static void ix86_compute_frame_layout PARAMS ((struct ix86_frame *));
691 static int ix86_comp_type_attributes PARAMS ((tree, tree));
692 const struct attribute_spec ix86_attribute_table[];
693 static tree ix86_handle_cdecl_attribute PARAMS ((tree *, tree, tree, int, bool *));
694 static tree ix86_handle_regparm_attribute PARAMS ((tree *, tree, tree, int, bool *));
696 #ifdef DO_GLOBAL_CTORS_BODY
697 static void ix86_svr3_asm_out_constructor PARAMS ((rtx, int));
698 #endif
700 /* Register class used for passing given 64bit part of the argument.
701 These represent classes as documented by the PS ABI, with the exception
702 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
703 use SF or DFmode move instead of DImode to avoid reformating penalties.
705 Similary we play games with INTEGERSI_CLASS to use cheaper SImode moves
706 whenever possible (upper half does contain padding).
708 enum x86_64_reg_class
710 X86_64_NO_CLASS,
711 X86_64_INTEGER_CLASS,
712 X86_64_INTEGERSI_CLASS,
713 X86_64_SSE_CLASS,
714 X86_64_SSESF_CLASS,
715 X86_64_SSEDF_CLASS,
716 X86_64_SSEUP_CLASS,
717 X86_64_X87_CLASS,
718 X86_64_X87UP_CLASS,
719 X86_64_MEMORY_CLASS
721 const char * const x86_64_reg_class_name[] =
722 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
724 #define MAX_CLASSES 4
725 static int classify_argument PARAMS ((enum machine_mode, tree,
726 enum x86_64_reg_class [MAX_CLASSES],
727 int));
728 static int examine_argument PARAMS ((enum machine_mode, tree, int, int *,
729 int *));
730 static rtx construct_container PARAMS ((enum machine_mode, tree, int, int, int,
731 int *, int));
732 static enum x86_64_reg_class merge_classes PARAMS ((enum x86_64_reg_class,
733 enum x86_64_reg_class));
735 /* Initialize the GCC target structure. */
736 #undef TARGET_ATTRIBUTE_TABLE
737 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
738 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
739 # undef TARGET_MERGE_DECL_ATTRIBUTES
740 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
741 #endif
743 #undef TARGET_COMP_TYPE_ATTRIBUTES
744 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
746 #undef TARGET_INIT_BUILTINS
747 #define TARGET_INIT_BUILTINS ix86_init_builtins
749 #undef TARGET_EXPAND_BUILTIN
750 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
752 #if defined (OSF_OS) || defined (TARGET_OSF1ELF)
753 static void ix86_osf_output_function_prologue PARAMS ((FILE *,
754 HOST_WIDE_INT));
755 # undef TARGET_ASM_FUNCTION_PROLOGUE
756 # define TARGET_ASM_FUNCTION_PROLOGUE ix86_osf_output_function_prologue
757 #endif
759 #undef TARGET_ASM_OPEN_PAREN
760 #define TARGET_ASM_OPEN_PAREN ""
761 #undef TARGET_ASM_CLOSE_PAREN
762 #define TARGET_ASM_CLOSE_PAREN ""
764 #undef TARGET_SCHED_ADJUST_COST
765 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
766 #undef TARGET_SCHED_ISSUE_RATE
767 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
768 #undef TARGET_SCHED_VARIABLE_ISSUE
769 #define TARGET_SCHED_VARIABLE_ISSUE ix86_variable_issue
770 #undef TARGET_SCHED_INIT
771 #define TARGET_SCHED_INIT ix86_sched_init
772 #undef TARGET_SCHED_REORDER
773 #define TARGET_SCHED_REORDER ix86_sched_reorder
775 struct gcc_target targetm = TARGET_INITIALIZER;
777 /* Sometimes certain combinations of command options do not make
778 sense on a particular target machine. You can define a macro
779 `OVERRIDE_OPTIONS' to take account of this. This macro, if
780 defined, is executed once just after all the command options have
781 been parsed.
783 Don't use this macro to turn on various extra optimizations for
784 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
786 void
787 override_options ()
789 int i;
790 /* Comes from final.c -- no real reason to change it. */
791 #define MAX_CODE_ALIGN 16
793 static struct ptt
795 const struct processor_costs *cost; /* Processor costs */
796 const int target_enable; /* Target flags to enable. */
797 const int target_disable; /* Target flags to disable. */
798 const int align_loop; /* Default alignments. */
799 const int align_loop_max_skip;
800 const int align_jump;
801 const int align_jump_max_skip;
802 const int align_func;
803 const int branch_cost;
805 const processor_target_table[PROCESSOR_max] =
807 {&i386_cost, 0, 0, 4, 3, 4, 3, 4, 1},
808 {&i486_cost, 0, 0, 16, 15, 16, 15, 16, 1},
809 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16, 1},
810 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16, 1},
811 {&k6_cost, 0, 0, 32, 7, 32, 7, 32, 1},
812 {&athlon_cost, 0, 0, 16, 7, 64, 7, 16, 1},
813 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0, 1}
816 static struct pta
818 const char *const name; /* processor name or nickname. */
819 const enum processor_type processor;
821 const processor_alias_table[] =
823 {"i386", PROCESSOR_I386},
824 {"i486", PROCESSOR_I486},
825 {"i586", PROCESSOR_PENTIUM},
826 {"pentium", PROCESSOR_PENTIUM},
827 {"i686", PROCESSOR_PENTIUMPRO},
828 {"pentiumpro", PROCESSOR_PENTIUMPRO},
829 {"k6", PROCESSOR_K6},
830 {"athlon", PROCESSOR_ATHLON},
831 {"pentium4", PROCESSOR_PENTIUM4},
834 int const pta_size = sizeof (processor_alias_table) / sizeof (struct pta);
836 #ifdef SUBTARGET_OVERRIDE_OPTIONS
837 SUBTARGET_OVERRIDE_OPTIONS;
838 #endif
840 ix86_arch = PROCESSOR_I386;
841 ix86_cpu = (enum processor_type) TARGET_CPU_DEFAULT;
843 if (ix86_cmodel_string != 0)
845 if (!strcmp (ix86_cmodel_string, "small"))
846 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
847 else if (flag_pic)
848 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
849 else if (!strcmp (ix86_cmodel_string, "32"))
850 ix86_cmodel = CM_32;
851 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
852 ix86_cmodel = CM_KERNEL;
853 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
854 ix86_cmodel = CM_MEDIUM;
855 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
856 ix86_cmodel = CM_LARGE;
857 else
858 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
860 else
862 ix86_cmodel = CM_32;
863 if (TARGET_64BIT)
864 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
866 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
867 error ("code model `%s' not supported in the %s bit mode",
868 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
869 if (ix86_cmodel == CM_LARGE)
870 sorry ("code model `large' not supported yet");
871 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
872 sorry ("%i-bit mode not compiled in",
873 (target_flags & MASK_64BIT) ? 64 : 32);
875 if (ix86_arch_string != 0)
877 for (i = 0; i < pta_size; i++)
878 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
880 ix86_arch = processor_alias_table[i].processor;
881 /* Default cpu tuning to the architecture. */
882 ix86_cpu = ix86_arch;
883 break;
886 if (i == pta_size)
887 error ("bad value (%s) for -march= switch", ix86_arch_string);
890 if (ix86_cpu_string != 0)
892 for (i = 0; i < pta_size; i++)
893 if (! strcmp (ix86_cpu_string, processor_alias_table[i].name))
895 ix86_cpu = processor_alias_table[i].processor;
896 break;
898 if (i == pta_size)
899 error ("bad value (%s) for -mcpu= switch", ix86_cpu_string);
902 if (optimize_size)
903 ix86_cost = &size_cost;
904 else
905 ix86_cost = processor_target_table[ix86_cpu].cost;
906 target_flags |= processor_target_table[ix86_cpu].target_enable;
907 target_flags &= ~processor_target_table[ix86_cpu].target_disable;
909 /* Arrange to set up i386_stack_locals for all functions. */
910 init_machine_status = ix86_init_machine_status;
911 mark_machine_status = ix86_mark_machine_status;
912 free_machine_status = ix86_free_machine_status;
914 /* Validate -mregparm= value. */
915 if (ix86_regparm_string)
917 i = atoi (ix86_regparm_string);
918 if (i < 0 || i > REGPARM_MAX)
919 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
920 else
921 ix86_regparm = i;
923 else
924 if (TARGET_64BIT)
925 ix86_regparm = REGPARM_MAX;
927 /* If the user has provided any of the -malign-* options,
928 warn and use that value only if -falign-* is not set.
929 Remove this code in GCC 3.2 or later. */
930 if (ix86_align_loops_string)
932 warning ("-malign-loops is obsolete, use -falign-loops");
933 if (align_loops == 0)
935 i = atoi (ix86_align_loops_string);
936 if (i < 0 || i > MAX_CODE_ALIGN)
937 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
938 else
939 align_loops = 1 << i;
943 if (ix86_align_jumps_string)
945 warning ("-malign-jumps is obsolete, use -falign-jumps");
946 if (align_jumps == 0)
948 i = atoi (ix86_align_jumps_string);
949 if (i < 0 || i > MAX_CODE_ALIGN)
950 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
951 else
952 align_jumps = 1 << i;
956 if (ix86_align_funcs_string)
958 warning ("-malign-functions is obsolete, use -falign-functions");
959 if (align_functions == 0)
961 i = atoi (ix86_align_funcs_string);
962 if (i < 0 || i > MAX_CODE_ALIGN)
963 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
964 else
965 align_functions = 1 << i;
969 /* Default align_* from the processor table. */
970 #define abs(n) (n < 0 ? -n : n)
971 if (align_loops == 0)
973 align_loops = processor_target_table[ix86_cpu].align_loop;
974 align_loops_max_skip = processor_target_table[ix86_cpu].align_loop_max_skip;
976 if (align_jumps == 0)
978 align_jumps = processor_target_table[ix86_cpu].align_jump;
979 align_jumps_max_skip = processor_target_table[ix86_cpu].align_jump_max_skip;
981 if (align_functions == 0)
983 align_functions = processor_target_table[ix86_cpu].align_func;
986 /* Validate -mpreferred-stack-boundary= value, or provide default.
987 The default of 128 bits is for Pentium III's SSE __m128, but we
988 don't want additional code to keep the stack aligned when
989 optimizing for code size. */
990 ix86_preferred_stack_boundary = (optimize_size
991 ? TARGET_64BIT ? 64 : 32
992 : 128);
993 if (ix86_preferred_stack_boundary_string)
995 i = atoi (ix86_preferred_stack_boundary_string);
996 if (i < (TARGET_64BIT ? 3 : 2) || i > 12)
997 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
998 TARGET_64BIT ? 3 : 2);
999 else
1000 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1003 /* Validate -mbranch-cost= value, or provide default. */
1004 ix86_branch_cost = processor_target_table[ix86_cpu].branch_cost;
1005 if (ix86_branch_cost_string)
1007 i = atoi (ix86_branch_cost_string);
1008 if (i < 0 || i > 5)
1009 error ("-mbranch-cost=%d is not between 0 and 5", i);
1010 else
1011 ix86_branch_cost = i;
1014 /* Keep nonleaf frame pointers. */
1015 if (TARGET_OMIT_LEAF_FRAME_POINTER)
1016 flag_omit_frame_pointer = 1;
1018 /* If we're doing fast math, we don't care about comparison order
1019 wrt NaNs. This lets us use a shorter comparison sequence. */
1020 if (flag_unsafe_math_optimizations)
1021 target_flags &= ~MASK_IEEE_FP;
1023 if (TARGET_64BIT)
1025 if (TARGET_ALIGN_DOUBLE)
1026 error ("-malign-double makes no sense in the 64bit mode");
1027 if (TARGET_RTD)
1028 error ("-mrtd calling convention not supported in the 64bit mode");
1029 /* Enable by default the SSE and MMX builtins. */
1030 target_flags |= MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE;
1033 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1034 on by -msse. */
1035 if (TARGET_SSE)
1036 target_flags |= MASK_MMX;
1038 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1039 if (TARGET_3DNOW)
1041 target_flags |= MASK_MMX;
1042 /* If we are targetting the Athlon architecture, enable the 3Dnow/MMX
1043 extensions it adds. */
1044 if (x86_3dnow_a & (1 << ix86_arch))
1045 target_flags |= MASK_3DNOW_A;
1047 if ((x86_accumulate_outgoing_args & CPUMASK)
1048 && !(target_flags & MASK_NO_ACCUMULATE_OUTGOING_ARGS)
1049 && !optimize_size)
1050 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1052 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1054 char *p;
1055 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1056 p = strchr (internal_label_prefix, 'X');
1057 internal_label_prefix_len = p - internal_label_prefix;
1058 *p = '\0';
1062 void
1063 optimization_options (level, size)
1064 int level;
1065 int size ATTRIBUTE_UNUSED;
1067 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1068 make the problem with not enough registers even worse. */
1069 #ifdef INSN_SCHEDULING
1070 if (level > 1)
1071 flag_schedule_insns = 0;
1072 #endif
1073 if (TARGET_64BIT && optimize >= 1)
1074 flag_omit_frame_pointer = 1;
1075 if (TARGET_64BIT)
1077 flag_pcc_struct_return = 0;
1078 flag_asynchronous_unwind_tables = 1;
1082 /* Table of valid machine attributes. */
1083 const struct attribute_spec ix86_attribute_table[] =
1085 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1086 /* Stdcall attribute says callee is responsible for popping arguments
1087 if they are not variable. */
1088 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1089 /* Cdecl attribute says the callee is a normal C declaration */
1090 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1091 /* Regparm attribute specifies how many integer arguments are to be
1092 passed in registers. */
1093 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
1094 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
1095 { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1096 { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1097 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
1098 #endif
1099 { NULL, 0, 0, false, false, false, NULL }
1102 /* Handle a "cdecl" or "stdcall" attribute;
1103 arguments as in struct attribute_spec.handler. */
1104 static tree
1105 ix86_handle_cdecl_attribute (node, name, args, flags, no_add_attrs)
1106 tree *node;
1107 tree name;
1108 tree args ATTRIBUTE_UNUSED;
1109 int flags ATTRIBUTE_UNUSED;
1110 bool *no_add_attrs;
1112 if (TREE_CODE (*node) != FUNCTION_TYPE
1113 && TREE_CODE (*node) != METHOD_TYPE
1114 && TREE_CODE (*node) != FIELD_DECL
1115 && TREE_CODE (*node) != TYPE_DECL)
1117 warning ("`%s' attribute only applies to functions",
1118 IDENTIFIER_POINTER (name));
1119 *no_add_attrs = true;
1122 if (TARGET_64BIT)
1124 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1125 *no_add_attrs = true;
1128 return NULL_TREE;
1131 /* Handle a "regparm" attribute;
1132 arguments as in struct attribute_spec.handler. */
1133 static tree
1134 ix86_handle_regparm_attribute (node, name, args, flags, no_add_attrs)
1135 tree *node;
1136 tree name;
1137 tree args;
1138 int flags ATTRIBUTE_UNUSED;
1139 bool *no_add_attrs;
1141 if (TREE_CODE (*node) != FUNCTION_TYPE
1142 && TREE_CODE (*node) != METHOD_TYPE
1143 && TREE_CODE (*node) != FIELD_DECL
1144 && TREE_CODE (*node) != TYPE_DECL)
1146 warning ("`%s' attribute only applies to functions",
1147 IDENTIFIER_POINTER (name));
1148 *no_add_attrs = true;
1150 else
1152 tree cst;
1154 cst = TREE_VALUE (args);
1155 if (TREE_CODE (cst) != INTEGER_CST)
1157 warning ("`%s' attribute requires an integer constant argument",
1158 IDENTIFIER_POINTER (name));
1159 *no_add_attrs = true;
1161 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1163 warning ("argument to `%s' attribute larger than %d",
1164 IDENTIFIER_POINTER (name), REGPARM_MAX);
1165 *no_add_attrs = true;
1169 return NULL_TREE;
1172 #if defined (OSF_OS) || defined (TARGET_OSF1ELF)
1174 /* Generate the assembly code for function entry. FILE is a stdio
1175 stream to output the code to. SIZE is an int: how many units of
1176 temporary storage to allocate.
1178 Refer to the array `regs_ever_live' to determine which registers to
1179 save; `regs_ever_live[I]' is nonzero if register number I is ever
1180 used in the function. This function is responsible for knowing
1181 which registers should not be saved even if used.
1183 We override it here to allow for the new profiling code to go before
1184 the prologue and the old mcount code to go after the prologue (and
1185 after %ebx has been set up for ELF shared library support). */
1187 static void
1188 ix86_osf_output_function_prologue (file, size)
1189 FILE *file;
1190 HOST_WIDE_INT size;
1192 const char *prefix = "";
1193 const char *const lprefix = LPREFIX;
1194 int labelno = profile_label_no;
1196 #ifdef OSF_OS
1198 if (TARGET_UNDERSCORES)
1199 prefix = "_";
1201 if (profile_flag && OSF_PROFILE_BEFORE_PROLOGUE)
1203 if (!flag_pic && !HALF_PIC_P ())
1205 fprintf (file, "\tmovl $%sP%d,%%edx\n", lprefix, labelno);
1206 fprintf (file, "\tcall *%s_mcount_ptr\n", prefix);
1209 else if (HALF_PIC_P ())
1211 rtx symref;
1213 HALF_PIC_EXTERNAL ("_mcount_ptr");
1214 symref = HALF_PIC_PTR (gen_rtx_SYMBOL_REF (Pmode,
1215 "_mcount_ptr"));
1217 fprintf (file, "\tmovl $%sP%d,%%edx\n", lprefix, labelno);
1218 fprintf (file, "\tmovl %s%s,%%eax\n", prefix,
1219 XSTR (symref, 0));
1220 fprintf (file, "\tcall *(%%eax)\n");
1223 else
1225 static int call_no = 0;
1227 fprintf (file, "\tcall %sPc%d\n", lprefix, call_no);
1228 fprintf (file, "%sPc%d:\tpopl %%eax\n", lprefix, call_no);
1229 fprintf (file, "\taddl $_GLOBAL_OFFSET_TABLE_+[.-%sPc%d],%%eax\n",
1230 lprefix, call_no++);
1231 fprintf (file, "\tleal %sP%d@GOTOFF(%%eax),%%edx\n",
1232 lprefix, labelno);
1233 fprintf (file, "\tmovl %s_mcount_ptr@GOT(%%eax),%%eax\n",
1234 prefix);
1235 fprintf (file, "\tcall *(%%eax)\n");
1239 #else /* !OSF_OS */
1241 if (profile_flag && OSF_PROFILE_BEFORE_PROLOGUE)
1243 if (!flag_pic)
1245 fprintf (file, "\tmovl $%sP%d,%%edx\n", lprefix, labelno);
1246 fprintf (file, "\tcall *%s_mcount_ptr\n", prefix);
1249 else
1251 static int call_no = 0;
1253 fprintf (file, "\tcall %sPc%d\n", lprefix, call_no);
1254 fprintf (file, "%sPc%d:\tpopl %%eax\n", lprefix, call_no);
1255 fprintf (file, "\taddl $_GLOBAL_OFFSET_TABLE_+[.-%sPc%d],%%eax\n",
1256 lprefix, call_no++);
1257 fprintf (file, "\tleal %sP%d@GOTOFF(%%eax),%%edx\n",
1258 lprefix, labelno);
1259 fprintf (file, "\tmovl %s_mcount_ptr@GOT(%%eax),%%eax\n",
1260 prefix);
1261 fprintf (file, "\tcall *(%%eax)\n");
1264 #endif /* !OSF_OS */
1266 function_prologue (file, size);
1269 #endif /* OSF_OS || TARGET_OSF1ELF */
1271 /* Return 0 if the attributes for two types are incompatible, 1 if they
1272 are compatible, and 2 if they are nearly compatible (which causes a
1273 warning to be generated). */
1275 static int
1276 ix86_comp_type_attributes (type1, type2)
1277 tree type1;
1278 tree type2;
1280 /* Check for mismatch of non-default calling convention. */
1281 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
1283 if (TREE_CODE (type1) != FUNCTION_TYPE)
1284 return 1;
1286 /* Check for mismatched return types (cdecl vs stdcall). */
1287 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1288 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
1289 return 0;
1290 return 1;
1293 /* Value is the number of bytes of arguments automatically
1294 popped when returning from a subroutine call.
1295 FUNDECL is the declaration node of the function (as a tree),
1296 FUNTYPE is the data type of the function (as a tree),
1297 or for a library call it is an identifier node for the subroutine name.
1298 SIZE is the number of bytes of arguments passed on the stack.
1300 On the 80386, the RTD insn may be used to pop them if the number
1301 of args is fixed, but if the number is variable then the caller
1302 must pop them all. RTD can't be used for library calls now
1303 because the library is compiled with the Unix compiler.
1304 Use of RTD is a selectable option, since it is incompatible with
1305 standard Unix calling sequences. If the option is not selected,
1306 the caller must always pop the args.
1308 The attribute stdcall is equivalent to RTD on a per module basis. */
1311 ix86_return_pops_args (fundecl, funtype, size)
1312 tree fundecl;
1313 tree funtype;
1314 int size;
1316 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
1318 /* Cdecl functions override -mrtd, and never pop the stack. */
1319 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
1321 /* Stdcall functions will pop the stack if not variable args. */
1322 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype)))
1323 rtd = 1;
1325 if (rtd
1326 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
1327 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1328 == void_type_node)))
1329 return size;
1332 /* Lose any fake structure return argument. */
1333 if (aggregate_value_p (TREE_TYPE (funtype))
1334 && !TARGET_64BIT)
1335 return GET_MODE_SIZE (Pmode);
1337 return 0;
1340 /* Argument support functions. */
1342 /* Return true when register may be used to pass function parameters. */
1343 bool
1344 ix86_function_arg_regno_p (regno)
1345 int regno;
1347 int i;
1348 if (!TARGET_64BIT)
1349 return regno < REGPARM_MAX || (TARGET_SSE && SSE_REGNO_P (regno));
1350 if (SSE_REGNO_P (regno) && TARGET_SSE)
1351 return true;
1352 /* RAX is used as hidden argument to va_arg functions. */
1353 if (!regno)
1354 return true;
1355 for (i = 0; i < REGPARM_MAX; i++)
1356 if (regno == x86_64_int_parameter_registers[i])
1357 return true;
1358 return false;
1361 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1362 for a call to a function whose data type is FNTYPE.
1363 For a library call, FNTYPE is 0. */
1365 void
1366 init_cumulative_args (cum, fntype, libname)
1367 CUMULATIVE_ARGS *cum; /* Argument info to initialize */
1368 tree fntype; /* tree ptr for function decl */
1369 rtx libname; /* SYMBOL_REF of library name or 0 */
1371 static CUMULATIVE_ARGS zero_cum;
1372 tree param, next_param;
1374 if (TARGET_DEBUG_ARG)
1376 fprintf (stderr, "\ninit_cumulative_args (");
1377 if (fntype)
1378 fprintf (stderr, "fntype code = %s, ret code = %s",
1379 tree_code_name[(int) TREE_CODE (fntype)],
1380 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
1381 else
1382 fprintf (stderr, "no fntype");
1384 if (libname)
1385 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1388 *cum = zero_cum;
1390 /* Set up the number of registers to use for passing arguments. */
1391 cum->nregs = ix86_regparm;
1392 cum->sse_nregs = SSE_REGPARM_MAX;
1393 if (fntype && !TARGET_64BIT)
1395 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype));
1397 if (attr)
1398 cum->nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1400 cum->maybe_vaarg = false;
1402 /* Determine if this function has variable arguments. This is
1403 indicated by the last argument being 'void_type_mode' if there
1404 are no variable arguments. If there are variable arguments, then
1405 we won't pass anything in registers */
1407 if (cum->nregs)
1409 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
1410 param != 0; param = next_param)
1412 next_param = TREE_CHAIN (param);
1413 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
1415 if (!TARGET_64BIT)
1416 cum->nregs = 0;
1417 cum->maybe_vaarg = true;
1421 if ((!fntype && !libname)
1422 || (fntype && !TYPE_ARG_TYPES (fntype)))
1423 cum->maybe_vaarg = 1;
1425 if (TARGET_DEBUG_ARG)
1426 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
1428 return;
1431 /* x86-64 register passing impleemntation. See x86-64 ABI for details. Goal
1432 of this code is to classify each 8bytes of incoming argument by the register
1433 class and assign registers accordingly. */
1435 /* Return the union class of CLASS1 and CLASS2.
1436 See the x86-64 PS ABI for details. */
1438 static enum x86_64_reg_class
1439 merge_classes (class1, class2)
1440 enum x86_64_reg_class class1, class2;
1442 /* Rule #1: If both classes are equal, this is the resulting class. */
1443 if (class1 == class2)
1444 return class1;
1446 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
1447 the other class. */
1448 if (class1 == X86_64_NO_CLASS)
1449 return class2;
1450 if (class2 == X86_64_NO_CLASS)
1451 return class1;
1453 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
1454 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
1455 return X86_64_MEMORY_CLASS;
1457 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
1458 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
1459 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
1460 return X86_64_INTEGERSI_CLASS;
1461 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
1462 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
1463 return X86_64_INTEGER_CLASS;
1465 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
1466 if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS
1467 || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS)
1468 return X86_64_MEMORY_CLASS;
1470 /* Rule #6: Otherwise class SSE is used. */
1471 return X86_64_SSE_CLASS;
1474 /* Classify the argument of type TYPE and mode MODE.
1475 CLASSES will be filled by the register class used to pass each word
1476 of the operand. The number of words is returned. In case the parameter
1477 should be passed in memory, 0 is returned. As a special case for zero
1478 sized containers, classes[0] will be NO_CLASS and 1 is returned.
1480 BIT_OFFSET is used internally for handling records and specifies offset
1481 of the offset in bits modulo 256 to avoid overflow cases.
1483 See the x86-64 PS ABI for details.
1486 static int
1487 classify_argument (mode, type, classes, bit_offset)
1488 enum machine_mode mode;
1489 tree type;
1490 enum x86_64_reg_class classes[MAX_CLASSES];
1491 int bit_offset;
1493 int bytes =
1494 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1495 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1497 if (type && AGGREGATE_TYPE_P (type))
1499 int i;
1500 tree field;
1501 enum x86_64_reg_class subclasses[MAX_CLASSES];
1503 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
1504 if (bytes > 16)
1505 return 0;
1507 for (i = 0; i < words; i++)
1508 classes[i] = X86_64_NO_CLASS;
1510 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
1511 signalize memory class, so handle it as special case. */
1512 if (!words)
1514 classes[0] = X86_64_NO_CLASS;
1515 return 1;
1518 /* Classify each field of record and merge classes. */
1519 if (TREE_CODE (type) == RECORD_TYPE)
1521 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1523 if (TREE_CODE (field) == FIELD_DECL)
1525 int num;
1527 /* Bitfields are always classified as integer. Handle them
1528 early, since later code would consider them to be
1529 misaligned integers. */
1530 if (DECL_BIT_FIELD (field))
1532 for (i = int_bit_position (field) / 8 / 8;
1533 i < (int_bit_position (field)
1534 + tree_low_cst (DECL_SIZE (field), 0)
1535 + 63) / 8 / 8; i++)
1536 classes[i] =
1537 merge_classes (X86_64_INTEGER_CLASS,
1538 classes[i]);
1540 else
1542 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
1543 TREE_TYPE (field), subclasses,
1544 (int_bit_position (field)
1545 + bit_offset) % 256);
1546 if (!num)
1547 return 0;
1548 for (i = 0; i < num; i++)
1550 int pos =
1551 (int_bit_position (field) + bit_offset) / 8 / 8;
1552 classes[i + pos] =
1553 merge_classes (subclasses[i], classes[i + pos]);
1559 /* Arrays are handled as small records. */
1560 else if (TREE_CODE (type) == ARRAY_TYPE)
1562 int num;
1563 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
1564 TREE_TYPE (type), subclasses, bit_offset);
1565 if (!num)
1566 return 0;
1568 /* The partial classes are now full classes. */
1569 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
1570 subclasses[0] = X86_64_SSE_CLASS;
1571 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
1572 subclasses[0] = X86_64_INTEGER_CLASS;
1574 for (i = 0; i < words; i++)
1575 classes[i] = subclasses[i % num];
1577 /* Unions are similar to RECORD_TYPE but offset is always 0. */
1578 else if (TREE_CODE (type) == UNION_TYPE)
1580 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1582 if (TREE_CODE (field) == FIELD_DECL)
1584 int num;
1585 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
1586 TREE_TYPE (field), subclasses,
1587 bit_offset);
1588 if (!num)
1589 return 0;
1590 for (i = 0; i < num; i++)
1591 classes[i] = merge_classes (subclasses[i], classes[i]);
1595 else
1596 abort ();
1598 /* Final merger cleanup. */
1599 for (i = 0; i < words; i++)
1601 /* If one class is MEMORY, everything should be passed in
1602 memory. */
1603 if (classes[i] == X86_64_MEMORY_CLASS)
1604 return 0;
1606 /* The X86_64_SSEUP_CLASS should be always preceeded by
1607 X86_64_SSE_CLASS. */
1608 if (classes[i] == X86_64_SSEUP_CLASS
1609 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
1610 classes[i] = X86_64_SSE_CLASS;
1612 /* X86_64_X87UP_CLASS should be preceeded by X86_64_X87_CLASS. */
1613 if (classes[i] == X86_64_X87UP_CLASS
1614 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
1615 classes[i] = X86_64_SSE_CLASS;
1617 return words;
1620 /* Compute alignment needed. We align all types to natural boundaries with
1621 exception of XFmode that is aligned to 64bits. */
1622 if (mode != VOIDmode && mode != BLKmode)
1624 int mode_alignment = GET_MODE_BITSIZE (mode);
1626 if (mode == XFmode)
1627 mode_alignment = 128;
1628 else if (mode == XCmode)
1629 mode_alignment = 256;
1630 /* Misaligned fields are always returned in memory. */
1631 if (bit_offset % mode_alignment)
1632 return 0;
1635 /* Classification of atomic types. */
1636 switch (mode)
1638 case DImode:
1639 case SImode:
1640 case HImode:
1641 case QImode:
1642 case CSImode:
1643 case CHImode:
1644 case CQImode:
1645 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
1646 classes[0] = X86_64_INTEGERSI_CLASS;
1647 else
1648 classes[0] = X86_64_INTEGER_CLASS;
1649 return 1;
1650 case CDImode:
1651 case TImode:
1652 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
1653 return 2;
1654 case CTImode:
1655 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
1656 classes[2] = classes[3] = X86_64_INTEGER_CLASS;
1657 return 4;
1658 case SFmode:
1659 if (!(bit_offset % 64))
1660 classes[0] = X86_64_SSESF_CLASS;
1661 else
1662 classes[0] = X86_64_SSE_CLASS;
1663 return 1;
1664 case DFmode:
1665 classes[0] = X86_64_SSEDF_CLASS;
1666 return 1;
1667 case TFmode:
1668 classes[0] = X86_64_X87_CLASS;
1669 classes[1] = X86_64_X87UP_CLASS;
1670 return 2;
1671 case TCmode:
1672 classes[0] = X86_64_X87_CLASS;
1673 classes[1] = X86_64_X87UP_CLASS;
1674 classes[2] = X86_64_X87_CLASS;
1675 classes[3] = X86_64_X87UP_CLASS;
1676 return 4;
1677 case DCmode:
1678 classes[0] = X86_64_SSEDF_CLASS;
1679 classes[1] = X86_64_SSEDF_CLASS;
1680 return 2;
1681 case SCmode:
1682 classes[0] = X86_64_SSE_CLASS;
1683 return 1;
1684 case BLKmode:
1685 return 0;
1686 default:
1687 abort ();
1691 /* Examine the argument and return set number of register required in each
1692 class. Return 0 iff parameter should be passed in memory. */
1693 static int
1694 examine_argument (mode, type, in_return, int_nregs, sse_nregs)
1695 enum machine_mode mode;
1696 tree type;
1697 int *int_nregs, *sse_nregs;
1698 int in_return;
1700 enum x86_64_reg_class class[MAX_CLASSES];
1701 int n = classify_argument (mode, type, class, 0);
1703 *int_nregs = 0;
1704 *sse_nregs = 0;
1705 if (!n)
1706 return 0;
1707 for (n--; n >= 0; n--)
1708 switch (class[n])
1710 case X86_64_INTEGER_CLASS:
1711 case X86_64_INTEGERSI_CLASS:
1712 (*int_nregs)++;
1713 break;
1714 case X86_64_SSE_CLASS:
1715 case X86_64_SSESF_CLASS:
1716 case X86_64_SSEDF_CLASS:
1717 (*sse_nregs)++;
1718 break;
1719 case X86_64_NO_CLASS:
1720 case X86_64_SSEUP_CLASS:
1721 break;
1722 case X86_64_X87_CLASS:
1723 case X86_64_X87UP_CLASS:
1724 if (!in_return)
1725 return 0;
1726 break;
1727 case X86_64_MEMORY_CLASS:
1728 abort ();
1730 return 1;
1732 /* Construct container for the argument used by GCC interface. See
1733 FUNCTION_ARG for the detailed description. */
1734 static rtx
1735 construct_container (mode, type, in_return, nintregs, nsseregs, intreg, sse_regno)
1736 enum machine_mode mode;
1737 tree type;
1738 int in_return;
1739 int nintregs, nsseregs;
1740 int *intreg, sse_regno;
1742 enum machine_mode tmpmode;
1743 int bytes =
1744 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1745 enum x86_64_reg_class class[MAX_CLASSES];
1746 int n;
1747 int i;
1748 int nexps = 0;
1749 int needed_sseregs, needed_intregs;
1750 rtx exp[MAX_CLASSES];
1751 rtx ret;
1753 n = classify_argument (mode, type, class, 0);
1754 if (TARGET_DEBUG_ARG)
1756 if (!n)
1757 fprintf (stderr, "Memory class\n");
1758 else
1760 fprintf (stderr, "Classes:");
1761 for (i = 0; i < n; i++)
1763 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
1765 fprintf (stderr, "\n");
1768 if (!n)
1769 return NULL;
1770 if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs))
1771 return NULL;
1772 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
1773 return NULL;
1775 /* First construct simple cases. Avoid SCmode, since we want to use
1776 single register to pass this type. */
1777 if (n == 1 && mode != SCmode)
1778 switch (class[0])
1780 case X86_64_INTEGER_CLASS:
1781 case X86_64_INTEGERSI_CLASS:
1782 return gen_rtx_REG (mode, intreg[0]);
1783 case X86_64_SSE_CLASS:
1784 case X86_64_SSESF_CLASS:
1785 case X86_64_SSEDF_CLASS:
1786 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
1787 case X86_64_X87_CLASS:
1788 return gen_rtx_REG (mode, FIRST_STACK_REG);
1789 case X86_64_NO_CLASS:
1790 /* Zero sized array, struct or class. */
1791 return NULL;
1792 default:
1793 abort ();
1795 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS)
1796 return gen_rtx_REG (TImode, SSE_REGNO (sse_regno));
1797 if (n == 2
1798 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
1799 return gen_rtx_REG (TFmode, FIRST_STACK_REG);
1800 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
1801 && class[1] == X86_64_INTEGER_CLASS
1802 && (mode == CDImode || mode == TImode)
1803 && intreg[0] + 1 == intreg[1])
1804 return gen_rtx_REG (mode, intreg[0]);
1805 if (n == 4
1806 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS
1807 && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS)
1808 return gen_rtx_REG (TCmode, FIRST_STACK_REG);
1810 /* Otherwise figure out the entries of the PARALLEL. */
1811 for (i = 0; i < n; i++)
1813 switch (class[i])
1815 case X86_64_NO_CLASS:
1816 break;
1817 case X86_64_INTEGER_CLASS:
1818 case X86_64_INTEGERSI_CLASS:
1819 /* Merge TImodes on aligned occassions here too. */
1820 if (i * 8 + 8 > bytes)
1821 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
1822 else if (class[i] == X86_64_INTEGERSI_CLASS)
1823 tmpmode = SImode;
1824 else
1825 tmpmode = DImode;
1826 /* We've requested 24 bytes we don't have mode for. Use DImode. */
1827 if (tmpmode == BLKmode)
1828 tmpmode = DImode;
1829 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
1830 gen_rtx_REG (tmpmode, *intreg),
1831 GEN_INT (i*8));
1832 intreg++;
1833 break;
1834 case X86_64_SSESF_CLASS:
1835 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
1836 gen_rtx_REG (SFmode,
1837 SSE_REGNO (sse_regno)),
1838 GEN_INT (i*8));
1839 sse_regno++;
1840 break;
1841 case X86_64_SSEDF_CLASS:
1842 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
1843 gen_rtx_REG (DFmode,
1844 SSE_REGNO (sse_regno)),
1845 GEN_INT (i*8));
1846 sse_regno++;
1847 break;
1848 case X86_64_SSE_CLASS:
1849 if (i < n && class[i + 1] == X86_64_SSEUP_CLASS)
1850 tmpmode = TImode, i++;
1851 else
1852 tmpmode = DImode;
1853 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
1854 gen_rtx_REG (tmpmode,
1855 SSE_REGNO (sse_regno)),
1856 GEN_INT (i*8));
1857 sse_regno++;
1858 break;
1859 default:
1860 abort ();
1863 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
1864 for (i = 0; i < nexps; i++)
1865 XVECEXP (ret, 0, i) = exp [i];
1866 return ret;
1869 /* Update the data in CUM to advance over an argument
1870 of mode MODE and data type TYPE.
1871 (TYPE is null for libcalls where that information may not be available.) */
1873 void
1874 function_arg_advance (cum, mode, type, named)
1875 CUMULATIVE_ARGS *cum; /* current arg information */
1876 enum machine_mode mode; /* current arg mode */
1877 tree type; /* type of the argument or 0 if lib support */
1878 int named; /* whether or not the argument was named */
1880 int bytes =
1881 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1882 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1884 if (TARGET_DEBUG_ARG)
1885 fprintf (stderr,
1886 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
1887 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
1888 if (TARGET_64BIT)
1890 int int_nregs, sse_nregs;
1891 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
1892 cum->words += words;
1893 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
1895 cum->nregs -= int_nregs;
1896 cum->sse_nregs -= sse_nregs;
1897 cum->regno += int_nregs;
1898 cum->sse_regno += sse_nregs;
1900 else
1901 cum->words += words;
1903 else
1905 if (TARGET_SSE && mode == TImode)
1907 cum->sse_words += words;
1908 cum->sse_nregs -= 1;
1909 cum->sse_regno += 1;
1910 if (cum->sse_nregs <= 0)
1912 cum->sse_nregs = 0;
1913 cum->sse_regno = 0;
1916 else
1918 cum->words += words;
1919 cum->nregs -= words;
1920 cum->regno += words;
1922 if (cum->nregs <= 0)
1924 cum->nregs = 0;
1925 cum->regno = 0;
1929 return;
1932 /* Define where to put the arguments to a function.
1933 Value is zero to push the argument on the stack,
1934 or a hard register in which to store the argument.
1936 MODE is the argument's machine mode.
1937 TYPE is the data type of the argument (as a tree).
1938 This is null for libcalls where that information may
1939 not be available.
1940 CUM is a variable of type CUMULATIVE_ARGS which gives info about
1941 the preceding args and about the function being called.
1942 NAMED is nonzero if this argument is a named parameter
1943 (otherwise it is an extra parameter matching an ellipsis). */
1945 struct rtx_def *
1946 function_arg (cum, mode, type, named)
1947 CUMULATIVE_ARGS *cum; /* current arg information */
1948 enum machine_mode mode; /* current arg mode */
1949 tree type; /* type of the argument or 0 if lib support */
1950 int named; /* != 0 for normal args, == 0 for ... args */
1952 rtx ret = NULL_RTX;
1953 int bytes =
1954 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1955 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1957 /* Handle an hidden AL argument containing number of registers for varargs
1958 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
1959 any AL settings. */
1960 if (mode == VOIDmode)
1962 if (TARGET_64BIT)
1963 return GEN_INT (cum->maybe_vaarg
1964 ? (cum->sse_nregs < 0
1965 ? SSE_REGPARM_MAX
1966 : cum->sse_regno)
1967 : -1);
1968 else
1969 return constm1_rtx;
1971 if (TARGET_64BIT)
1972 ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs,
1973 &x86_64_int_parameter_registers [cum->regno],
1974 cum->sse_regno);
1975 else
1976 switch (mode)
1978 /* For now, pass fp/complex values on the stack. */
1979 default:
1980 break;
1982 case BLKmode:
1983 case DImode:
1984 case SImode:
1985 case HImode:
1986 case QImode:
1987 if (words <= cum->nregs)
1988 ret = gen_rtx_REG (mode, cum->regno);
1989 break;
1990 case TImode:
1991 if (cum->sse_nregs)
1992 ret = gen_rtx_REG (mode, cum->sse_regno);
1993 break;
1996 if (TARGET_DEBUG_ARG)
1998 fprintf (stderr,
1999 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d",
2000 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2002 if (ret)
2003 fprintf (stderr, ", reg=%%e%s", reg_names[ REGNO(ret) ]);
2004 else
2005 fprintf (stderr, ", stack");
2007 fprintf (stderr, " )\n");
2010 return ret;
2013 /* Gives the alignment boundary, in bits, of an argument with the specified mode
2014 and type. */
2017 ix86_function_arg_boundary (mode, type)
2018 enum machine_mode mode;
2019 tree type;
2021 int align;
2022 if (!TARGET_64BIT)
2023 return PARM_BOUNDARY;
2024 if (type)
2025 align = TYPE_ALIGN (type);
2026 else
2027 align = GET_MODE_ALIGNMENT (mode);
2028 if (align < PARM_BOUNDARY)
2029 align = PARM_BOUNDARY;
2030 if (align > 128)
2031 align = 128;
2032 return align;
2035 /* Return true if N is a possible register number of function value. */
2036 bool
2037 ix86_function_value_regno_p (regno)
2038 int regno;
2040 if (!TARGET_64BIT)
2042 return ((regno) == 0
2043 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
2044 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
2046 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
2047 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
2048 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
2051 /* Define how to find the value returned by a function.
2052 VALTYPE is the data type of the value (as a tree).
2053 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2054 otherwise, FUNC is 0. */
2056 ix86_function_value (valtype)
2057 tree valtype;
2059 if (TARGET_64BIT)
2061 rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1,
2062 REGPARM_MAX, SSE_REGPARM_MAX,
2063 x86_64_int_return_registers, 0);
2064 /* For zero sized structures, construct_continer return NULL, but we need
2065 to keep rest of compiler happy by returning meaningfull value. */
2066 if (!ret)
2067 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
2068 return ret;
2070 else
2071 return gen_rtx_REG (TYPE_MODE (valtype), VALUE_REGNO (TYPE_MODE (valtype)));
2074 /* Return false iff type is returned in memory. */
2076 ix86_return_in_memory (type)
2077 tree type;
2079 int needed_intregs, needed_sseregs;
2080 if (TARGET_64BIT)
2082 return !examine_argument (TYPE_MODE (type), type, 1,
2083 &needed_intregs, &needed_sseregs);
2085 else
2087 if (TYPE_MODE (type) == BLKmode
2088 || (VECTOR_MODE_P (TYPE_MODE (type))
2089 && int_size_in_bytes (type) == 8)
2090 || (int_size_in_bytes (type) > 12 && TYPE_MODE (type) != TImode
2091 && TYPE_MODE (type) != TFmode
2092 && !VECTOR_MODE_P (TYPE_MODE (type))))
2093 return 1;
2094 return 0;
2098 /* Define how to find the value returned by a library function
2099 assuming the value has mode MODE. */
2101 ix86_libcall_value (mode)
2102 enum machine_mode mode;
2104 if (TARGET_64BIT)
2106 switch (mode)
2108 case SFmode:
2109 case SCmode:
2110 case DFmode:
2111 case DCmode:
2112 return gen_rtx_REG (mode, FIRST_SSE_REG);
2113 case TFmode:
2114 case TCmode:
2115 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
2116 default:
2117 return gen_rtx_REG (mode, 0);
2120 else
2121 return gen_rtx_REG (mode, VALUE_REGNO (mode));
2124 /* Create the va_list data type. */
2126 tree
2127 ix86_build_va_list ()
2129 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
2131 /* For i386 we use plain pointer to argument area. */
2132 if (!TARGET_64BIT)
2133 return build_pointer_type (char_type_node);
2135 record = make_lang_type (RECORD_TYPE);
2136 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
2138 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
2139 unsigned_type_node);
2140 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
2141 unsigned_type_node);
2142 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
2143 ptr_type_node);
2144 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
2145 ptr_type_node);
2147 DECL_FIELD_CONTEXT (f_gpr) = record;
2148 DECL_FIELD_CONTEXT (f_fpr) = record;
2149 DECL_FIELD_CONTEXT (f_ovf) = record;
2150 DECL_FIELD_CONTEXT (f_sav) = record;
2152 TREE_CHAIN (record) = type_decl;
2153 TYPE_NAME (record) = type_decl;
2154 TYPE_FIELDS (record) = f_gpr;
2155 TREE_CHAIN (f_gpr) = f_fpr;
2156 TREE_CHAIN (f_fpr) = f_ovf;
2157 TREE_CHAIN (f_ovf) = f_sav;
2159 layout_type (record);
2161 /* The correct type is an array type of one element. */
2162 return build_array_type (record, build_index_type (size_zero_node));
2165 /* Perform any needed actions needed for a function that is receiving a
2166 variable number of arguments.
2168 CUM is as above.
2170 MODE and TYPE are the mode and type of the current parameter.
2172 PRETEND_SIZE is a variable that should be set to the amount of stack
2173 that must be pushed by the prolog to pretend that our caller pushed
2176 Normally, this macro will push all remaining incoming registers on the
2177 stack and set PRETEND_SIZE to the length of the registers pushed. */
2179 void
2180 ix86_setup_incoming_varargs (cum, mode, type, pretend_size, no_rtl)
2181 CUMULATIVE_ARGS *cum;
2182 enum machine_mode mode;
2183 tree type;
2184 int *pretend_size ATTRIBUTE_UNUSED;
2185 int no_rtl;
2188 CUMULATIVE_ARGS next_cum;
2189 rtx save_area = NULL_RTX, mem;
2190 rtx label;
2191 rtx label_ref;
2192 rtx tmp_reg;
2193 rtx nsse_reg;
2194 int set;
2195 tree fntype;
2196 int stdarg_p;
2197 int i;
2199 if (!TARGET_64BIT)
2200 return;
2202 /* Indicate to allocate space on the stack for varargs save area. */
2203 ix86_save_varrargs_registers = 1;
2205 fntype = TREE_TYPE (current_function_decl);
2206 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
2207 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
2208 != void_type_node));
2210 /* For varargs, we do not want to skip the dummy va_dcl argument.
2211 For stdargs, we do want to skip the last named argument. */
2212 next_cum = *cum;
2213 if (stdarg_p)
2214 function_arg_advance (&next_cum, mode, type, 1);
2216 if (!no_rtl)
2217 save_area = frame_pointer_rtx;
2219 set = get_varargs_alias_set ();
2221 for (i = next_cum.regno; i < ix86_regparm; i++)
2223 mem = gen_rtx_MEM (Pmode,
2224 plus_constant (save_area, i * UNITS_PER_WORD));
2225 set_mem_alias_set (mem, set);
2226 emit_move_insn (mem, gen_rtx_REG (Pmode,
2227 x86_64_int_parameter_registers[i]));
2230 if (next_cum.sse_nregs)
2232 /* Now emit code to save SSE registers. The AX parameter contains number
2233 of SSE parameter regsiters used to call this function. We use
2234 sse_prologue_save insn template that produces computed jump across
2235 SSE saves. We need some preparation work to get this working. */
2237 label = gen_label_rtx ();
2238 label_ref = gen_rtx_LABEL_REF (Pmode, label);
2240 /* Compute address to jump to :
2241 label - 5*eax + nnamed_sse_arguments*5 */
2242 tmp_reg = gen_reg_rtx (Pmode);
2243 nsse_reg = gen_reg_rtx (Pmode);
2244 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
2245 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2246 gen_rtx_MULT (Pmode, nsse_reg,
2247 GEN_INT (4))));
2248 if (next_cum.sse_regno)
2249 emit_move_insn
2250 (nsse_reg,
2251 gen_rtx_CONST (DImode,
2252 gen_rtx_PLUS (DImode,
2253 label_ref,
2254 GEN_INT (next_cum.sse_regno * 4))));
2255 else
2256 emit_move_insn (nsse_reg, label_ref);
2257 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
2259 /* Compute address of memory block we save into. We always use pointer
2260 pointing 127 bytes after first byte to store - this is needed to keep
2261 instruction size limited by 4 bytes. */
2262 tmp_reg = gen_reg_rtx (Pmode);
2263 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2264 plus_constant (save_area,
2265 8 * REGPARM_MAX + 127)));
2266 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
2267 set_mem_alias_set (mem, set);
2268 set_mem_align (mem, BITS_PER_WORD);
2270 /* And finally do the dirty job! */
2271 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
2272 GEN_INT (next_cum.sse_regno), label));
2277 /* Implement va_start. */
2279 void
2280 ix86_va_start (stdarg_p, valist, nextarg)
2281 int stdarg_p;
2282 tree valist;
2283 rtx nextarg;
2285 HOST_WIDE_INT words, n_gpr, n_fpr;
2286 tree f_gpr, f_fpr, f_ovf, f_sav;
2287 tree gpr, fpr, ovf, sav, t;
2289 /* Only 64bit target needs something special. */
2290 if (!TARGET_64BIT)
2292 std_expand_builtin_va_start (stdarg_p, valist, nextarg);
2293 return;
2296 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2297 f_fpr = TREE_CHAIN (f_gpr);
2298 f_ovf = TREE_CHAIN (f_fpr);
2299 f_sav = TREE_CHAIN (f_ovf);
2301 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2302 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2303 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2304 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2305 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2307 /* Count number of gp and fp argument registers used. */
2308 words = current_function_args_info.words;
2309 n_gpr = current_function_args_info.regno;
2310 n_fpr = current_function_args_info.sse_regno;
2312 if (TARGET_DEBUG_ARG)
2313 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
2314 (int)words, (int)n_gpr, (int)n_fpr);
2316 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
2317 build_int_2 (n_gpr * 8, 0));
2318 TREE_SIDE_EFFECTS (t) = 1;
2319 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2321 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
2322 build_int_2 (n_fpr * 16 + 8*REGPARM_MAX, 0));
2323 TREE_SIDE_EFFECTS (t) = 1;
2324 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2326 /* Find the overflow area. */
2327 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
2328 if (words != 0)
2329 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
2330 build_int_2 (words * UNITS_PER_WORD, 0));
2331 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2332 TREE_SIDE_EFFECTS (t) = 1;
2333 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2335 /* Find the register save area.
2336 Prologue of the function save it right above stack frame. */
2337 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
2338 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
2339 TREE_SIDE_EFFECTS (t) = 1;
2340 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2343 /* Implement va_arg. */
2345 ix86_va_arg (valist, type)
2346 tree valist, type;
2348 static int intreg[6] = { 0, 1, 2, 3, 4, 5 };
2349 tree f_gpr, f_fpr, f_ovf, f_sav;
2350 tree gpr, fpr, ovf, sav, t;
2351 int size, rsize;
2352 rtx lab_false, lab_over = NULL_RTX;
2353 rtx addr_rtx, r;
2354 rtx container;
2356 /* Only 64bit target needs something special. */
2357 if (!TARGET_64BIT)
2359 return std_expand_builtin_va_arg (valist, type);
2362 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2363 f_fpr = TREE_CHAIN (f_gpr);
2364 f_ovf = TREE_CHAIN (f_fpr);
2365 f_sav = TREE_CHAIN (f_ovf);
2367 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2368 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2369 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2370 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2371 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2373 size = int_size_in_bytes (type);
2374 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2376 container = construct_container (TYPE_MODE (type), type, 0,
2377 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
2379 * Pull the value out of the saved registers ...
2382 addr_rtx = gen_reg_rtx (Pmode);
2384 if (container)
2386 rtx int_addr_rtx, sse_addr_rtx;
2387 int needed_intregs, needed_sseregs;
2388 int need_temp;
2390 lab_over = gen_label_rtx ();
2391 lab_false = gen_label_rtx ();
2393 examine_argument (TYPE_MODE (type), type, 0,
2394 &needed_intregs, &needed_sseregs);
2397 need_temp = ((needed_intregs && TYPE_ALIGN (type) > 64)
2398 || TYPE_ALIGN (type) > 128);
2400 /* In case we are passing structure, verify that it is consetuctive block
2401 on the register save area. If not we need to do moves. */
2402 if (!need_temp && !REG_P (container))
2404 /* Verify that all registers are strictly consetuctive */
2405 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
2407 int i;
2409 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2411 rtx slot = XVECEXP (container, 0, i);
2412 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int)i
2413 || INTVAL (XEXP (slot, 1)) != i * 16)
2414 need_temp = 1;
2417 else
2419 int i;
2421 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2423 rtx slot = XVECEXP (container, 0, i);
2424 if (REGNO (XEXP (slot, 0)) != (unsigned int)i
2425 || INTVAL (XEXP (slot, 1)) != i * 8)
2426 need_temp = 1;
2430 if (!need_temp)
2432 int_addr_rtx = addr_rtx;
2433 sse_addr_rtx = addr_rtx;
2435 else
2437 int_addr_rtx = gen_reg_rtx (Pmode);
2438 sse_addr_rtx = gen_reg_rtx (Pmode);
2440 /* First ensure that we fit completely in registers. */
2441 if (needed_intregs)
2443 emit_cmp_and_jump_insns (expand_expr
2444 (gpr, NULL_RTX, SImode, EXPAND_NORMAL),
2445 GEN_INT ((REGPARM_MAX - needed_intregs +
2446 1) * 8), GE, const1_rtx, SImode,
2447 1, lab_false);
2449 if (needed_sseregs)
2451 emit_cmp_and_jump_insns (expand_expr
2452 (fpr, NULL_RTX, SImode, EXPAND_NORMAL),
2453 GEN_INT ((SSE_REGPARM_MAX -
2454 needed_sseregs + 1) * 16 +
2455 REGPARM_MAX * 8), GE, const1_rtx,
2456 SImode, 1, lab_false);
2459 /* Compute index to start of area used for integer regs. */
2460 if (needed_intregs)
2462 t = build (PLUS_EXPR, ptr_type_node, sav, gpr);
2463 r = expand_expr (t, int_addr_rtx, Pmode, EXPAND_NORMAL);
2464 if (r != int_addr_rtx)
2465 emit_move_insn (int_addr_rtx, r);
2467 if (needed_sseregs)
2469 t = build (PLUS_EXPR, ptr_type_node, sav, fpr);
2470 r = expand_expr (t, sse_addr_rtx, Pmode, EXPAND_NORMAL);
2471 if (r != sse_addr_rtx)
2472 emit_move_insn (sse_addr_rtx, r);
2474 if (need_temp)
2476 int i;
2477 rtx mem;
2479 /* Never use the memory itself, as it has the alias set. */
2480 addr_rtx = XEXP (assign_temp (type, 0, 1, 0), 0);
2481 mem = gen_rtx_MEM (BLKmode, addr_rtx);
2482 set_mem_alias_set (mem, get_varargs_alias_set ());
2483 set_mem_align (mem, BITS_PER_UNIT);
2485 for (i = 0; i < XVECLEN (container, 0); i++)
2487 rtx slot = XVECEXP (container, 0, i);
2488 rtx reg = XEXP (slot, 0);
2489 enum machine_mode mode = GET_MODE (reg);
2490 rtx src_addr;
2491 rtx src_mem;
2492 int src_offset;
2493 rtx dest_mem;
2495 if (SSE_REGNO_P (REGNO (reg)))
2497 src_addr = sse_addr_rtx;
2498 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
2500 else
2502 src_addr = int_addr_rtx;
2503 src_offset = REGNO (reg) * 8;
2505 src_mem = gen_rtx_MEM (mode, src_addr);
2506 set_mem_alias_set (src_mem, get_varargs_alias_set ());
2507 src_mem = adjust_address (src_mem, mode, src_offset);
2508 dest_mem = adjust_address (mem, mode, INTVAL (XEXP (slot, 1)));
2509 emit_move_insn (dest_mem, src_mem);
2513 if (needed_intregs)
2516 build (PLUS_EXPR, TREE_TYPE (gpr), gpr,
2517 build_int_2 (needed_intregs * 8, 0));
2518 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
2519 TREE_SIDE_EFFECTS (t) = 1;
2520 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2522 if (needed_sseregs)
2525 build (PLUS_EXPR, TREE_TYPE (fpr), fpr,
2526 build_int_2 (needed_sseregs * 16, 0));
2527 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
2528 TREE_SIDE_EFFECTS (t) = 1;
2529 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2532 emit_jump_insn (gen_jump (lab_over));
2533 emit_barrier ();
2534 emit_label (lab_false);
2537 /* ... otherwise out of the overflow area. */
2539 /* Care for on-stack alignment if needed. */
2540 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
2541 t = ovf;
2542 else
2544 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
2545 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf, build_int_2 (align - 1, 0));
2546 t = build (BIT_AND_EXPR, TREE_TYPE (t), t, build_int_2 (-align, -1));
2548 t = save_expr (t);
2550 r = expand_expr (t, addr_rtx, Pmode, EXPAND_NORMAL);
2551 if (r != addr_rtx)
2552 emit_move_insn (addr_rtx, r);
2555 build (PLUS_EXPR, TREE_TYPE (t), t,
2556 build_int_2 (rsize * UNITS_PER_WORD, 0));
2557 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2558 TREE_SIDE_EFFECTS (t) = 1;
2559 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2561 if (container)
2562 emit_label (lab_over);
2564 return addr_rtx;
2567 /* Return nonzero if OP is general operand representable on x86_64. */
2570 x86_64_general_operand (op, mode)
2571 rtx op;
2572 enum machine_mode mode;
2574 if (!TARGET_64BIT)
2575 return general_operand (op, mode);
2576 if (nonimmediate_operand (op, mode))
2577 return 1;
2578 return x86_64_sign_extended_value (op);
2581 /* Return nonzero if OP is general operand representable on x86_64
2582 as eighter sign extended or zero extended constant. */
2585 x86_64_szext_general_operand (op, mode)
2586 rtx op;
2587 enum machine_mode mode;
2589 if (!TARGET_64BIT)
2590 return general_operand (op, mode);
2591 if (nonimmediate_operand (op, mode))
2592 return 1;
2593 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
2596 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
2599 x86_64_nonmemory_operand (op, mode)
2600 rtx op;
2601 enum machine_mode mode;
2603 if (!TARGET_64BIT)
2604 return nonmemory_operand (op, mode);
2605 if (register_operand (op, mode))
2606 return 1;
2607 return x86_64_sign_extended_value (op);
2610 /* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
2613 x86_64_movabs_operand (op, mode)
2614 rtx op;
2615 enum machine_mode mode;
2617 if (!TARGET_64BIT || !flag_pic)
2618 return nonmemory_operand (op, mode);
2619 if (register_operand (op, mode) || x86_64_sign_extended_value (op))
2620 return 1;
2621 if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op))
2622 return 1;
2623 return 0;
2626 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
2629 x86_64_szext_nonmemory_operand (op, mode)
2630 rtx op;
2631 enum machine_mode mode;
2633 if (!TARGET_64BIT)
2634 return nonmemory_operand (op, mode);
2635 if (register_operand (op, mode))
2636 return 1;
2637 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
2640 /* Return nonzero if OP is immediate operand representable on x86_64. */
2643 x86_64_immediate_operand (op, mode)
2644 rtx op;
2645 enum machine_mode mode;
2647 if (!TARGET_64BIT)
2648 return immediate_operand (op, mode);
2649 return x86_64_sign_extended_value (op);
2652 /* Return nonzero if OP is immediate operand representable on x86_64. */
2655 x86_64_zext_immediate_operand (op, mode)
2656 rtx op;
2657 enum machine_mode mode ATTRIBUTE_UNUSED;
2659 return x86_64_zero_extended_value (op);
2662 /* Return nonzero if OP is (const_int 1), else return zero. */
2665 const_int_1_operand (op, mode)
2666 rtx op;
2667 enum machine_mode mode ATTRIBUTE_UNUSED;
2669 return (GET_CODE (op) == CONST_INT && INTVAL (op) == 1);
2672 /* Returns 1 if OP is either a symbol reference or a sum of a symbol
2673 reference and a constant. */
2676 symbolic_operand (op, mode)
2677 register rtx op;
2678 enum machine_mode mode ATTRIBUTE_UNUSED;
2680 switch (GET_CODE (op))
2682 case SYMBOL_REF:
2683 case LABEL_REF:
2684 return 1;
2686 case CONST:
2687 op = XEXP (op, 0);
2688 if (GET_CODE (op) == SYMBOL_REF
2689 || GET_CODE (op) == LABEL_REF
2690 || (GET_CODE (op) == UNSPEC
2691 && (XINT (op, 1) == 6
2692 || XINT (op, 1) == 7
2693 || XINT (op, 1) == 15)))
2694 return 1;
2695 if (GET_CODE (op) != PLUS
2696 || GET_CODE (XEXP (op, 1)) != CONST_INT)
2697 return 0;
2699 op = XEXP (op, 0);
2700 if (GET_CODE (op) == SYMBOL_REF
2701 || GET_CODE (op) == LABEL_REF)
2702 return 1;
2703 /* Only @GOTOFF gets offsets. */
2704 if (GET_CODE (op) != UNSPEC
2705 || XINT (op, 1) != 7)
2706 return 0;
2708 op = XVECEXP (op, 0, 0);
2709 if (GET_CODE (op) == SYMBOL_REF
2710 || GET_CODE (op) == LABEL_REF)
2711 return 1;
2712 return 0;
2714 default:
2715 return 0;
2719 /* Return true if the operand contains a @GOT or @GOTOFF reference. */
2722 pic_symbolic_operand (op, mode)
2723 register rtx op;
2724 enum machine_mode mode ATTRIBUTE_UNUSED;
2726 if (GET_CODE (op) != CONST)
2727 return 0;
2728 op = XEXP (op, 0);
2729 if (TARGET_64BIT)
2731 if (GET_CODE (XEXP (op, 0)) == UNSPEC)
2732 return 1;
2734 else
2736 if (GET_CODE (op) == UNSPEC)
2737 return 1;
2738 if (GET_CODE (op) != PLUS
2739 || GET_CODE (XEXP (op, 1)) != CONST_INT)
2740 return 0;
2741 op = XEXP (op, 0);
2742 if (GET_CODE (op) == UNSPEC)
2743 return 1;
2745 return 0;
2748 /* Return true if OP is a symbolic operand that resolves locally. */
2750 static int
2751 local_symbolic_operand (op, mode)
2752 rtx op;
2753 enum machine_mode mode ATTRIBUTE_UNUSED;
2755 if (GET_CODE (op) == LABEL_REF)
2756 return 1;
2758 if (GET_CODE (op) == CONST
2759 && GET_CODE (XEXP (op, 0)) == PLUS
2760 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
2761 op = XEXP (XEXP (op, 0), 0);
2763 if (GET_CODE (op) != SYMBOL_REF)
2764 return 0;
2766 /* These we've been told are local by varasm and encode_section_info
2767 respectively. */
2768 if (CONSTANT_POOL_ADDRESS_P (op) || SYMBOL_REF_FLAG (op))
2769 return 1;
2771 /* There is, however, a not insubstantial body of code in the rest of
2772 the compiler that assumes it can just stick the results of
2773 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */
2774 /* ??? This is a hack. Should update the body of the compiler to
2775 always create a DECL an invoke ENCODE_SECTION_INFO. */
2776 if (strncmp (XSTR (op, 0), internal_label_prefix,
2777 internal_label_prefix_len) == 0)
2778 return 1;
2780 return 0;
2783 /* Test for a valid operand for a call instruction. Don't allow the
2784 arg pointer register or virtual regs since they may decay into
2785 reg + const, which the patterns can't handle. */
2788 call_insn_operand (op, mode)
2789 rtx op;
2790 enum machine_mode mode ATTRIBUTE_UNUSED;
2792 /* Disallow indirect through a virtual register. This leads to
2793 compiler aborts when trying to eliminate them. */
2794 if (GET_CODE (op) == REG
2795 && (op == arg_pointer_rtx
2796 || op == frame_pointer_rtx
2797 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
2798 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
2799 return 0;
2801 /* Disallow `call 1234'. Due to varying assembler lameness this
2802 gets either rejected or translated to `call .+1234'. */
2803 if (GET_CODE (op) == CONST_INT)
2804 return 0;
2806 /* Explicitly allow SYMBOL_REF even if pic. */
2807 if (GET_CODE (op) == SYMBOL_REF)
2808 return 1;
2810 /* Half-pic doesn't allow anything but registers and constants.
2811 We've just taken care of the later. */
2812 if (HALF_PIC_P ())
2813 return register_operand (op, Pmode);
2815 /* Otherwise we can allow any general_operand in the address. */
2816 return general_operand (op, Pmode);
2820 constant_call_address_operand (op, mode)
2821 rtx op;
2822 enum machine_mode mode ATTRIBUTE_UNUSED;
2824 if (GET_CODE (op) == CONST
2825 && GET_CODE (XEXP (op, 0)) == PLUS
2826 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
2827 op = XEXP (XEXP (op, 0), 0);
2828 return GET_CODE (op) == SYMBOL_REF;
2831 /* Match exactly zero and one. */
2834 const0_operand (op, mode)
2835 register rtx op;
2836 enum machine_mode mode;
2838 return op == CONST0_RTX (mode);
2842 const1_operand (op, mode)
2843 register rtx op;
2844 enum machine_mode mode ATTRIBUTE_UNUSED;
2846 return op == const1_rtx;
2849 /* Match 2, 4, or 8. Used for leal multiplicands. */
2852 const248_operand (op, mode)
2853 register rtx op;
2854 enum machine_mode mode ATTRIBUTE_UNUSED;
2856 return (GET_CODE (op) == CONST_INT
2857 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
2860 /* True if this is a constant appropriate for an increment or decremenmt. */
2863 incdec_operand (op, mode)
2864 register rtx op;
2865 enum machine_mode mode ATTRIBUTE_UNUSED;
2867 /* On Pentium4, the inc and dec operations causes extra dependency on flag
2868 registers, since carry flag is not set. */
2869 if (TARGET_PENTIUM4 && !optimize_size)
2870 return 0;
2871 return op == const1_rtx || op == constm1_rtx;
2874 /* Return nonzero if OP is acceptable as operand of DImode shift
2875 expander. */
2878 shiftdi_operand (op, mode)
2879 rtx op;
2880 enum machine_mode mode ATTRIBUTE_UNUSED;
2882 if (TARGET_64BIT)
2883 return nonimmediate_operand (op, mode);
2884 else
2885 return register_operand (op, mode);
2888 /* Return false if this is the stack pointer, or any other fake
2889 register eliminable to the stack pointer. Otherwise, this is
2890 a register operand.
2892 This is used to prevent esp from being used as an index reg.
2893 Which would only happen in pathological cases. */
2896 reg_no_sp_operand (op, mode)
2897 register rtx op;
2898 enum machine_mode mode;
2900 rtx t = op;
2901 if (GET_CODE (t) == SUBREG)
2902 t = SUBREG_REG (t);
2903 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
2904 return 0;
2906 return register_operand (op, mode);
2910 mmx_reg_operand (op, mode)
2911 register rtx op;
2912 enum machine_mode mode ATTRIBUTE_UNUSED;
2914 return MMX_REG_P (op);
2917 /* Return false if this is any eliminable register. Otherwise
2918 general_operand. */
2921 general_no_elim_operand (op, mode)
2922 register rtx op;
2923 enum machine_mode mode;
2925 rtx t = op;
2926 if (GET_CODE (t) == SUBREG)
2927 t = SUBREG_REG (t);
2928 if (t == arg_pointer_rtx || t == frame_pointer_rtx
2929 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
2930 || t == virtual_stack_dynamic_rtx)
2931 return 0;
2932 if (REG_P (t)
2933 && REGNO (t) >= FIRST_VIRTUAL_REGISTER
2934 && REGNO (t) <= LAST_VIRTUAL_REGISTER)
2935 return 0;
2937 return general_operand (op, mode);
2940 /* Return false if this is any eliminable register. Otherwise
2941 register_operand or const_int. */
2944 nonmemory_no_elim_operand (op, mode)
2945 register rtx op;
2946 enum machine_mode mode;
2948 rtx t = op;
2949 if (GET_CODE (t) == SUBREG)
2950 t = SUBREG_REG (t);
2951 if (t == arg_pointer_rtx || t == frame_pointer_rtx
2952 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
2953 || t == virtual_stack_dynamic_rtx)
2954 return 0;
2956 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
2959 /* Return true if op is a Q_REGS class register. */
2962 q_regs_operand (op, mode)
2963 register rtx op;
2964 enum machine_mode mode;
2966 if (mode != VOIDmode && GET_MODE (op) != mode)
2967 return 0;
2968 if (GET_CODE (op) == SUBREG)
2969 op = SUBREG_REG (op);
2970 return QI_REG_P (op);
2973 /* Return true if op is a NON_Q_REGS class register. */
2976 non_q_regs_operand (op, mode)
2977 register rtx op;
2978 enum machine_mode mode;
2980 if (mode != VOIDmode && GET_MODE (op) != mode)
2981 return 0;
2982 if (GET_CODE (op) == SUBREG)
2983 op = SUBREG_REG (op);
2984 return NON_QI_REG_P (op);
2987 /* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
2988 insns. */
2990 sse_comparison_operator (op, mode)
2991 rtx op;
2992 enum machine_mode mode ATTRIBUTE_UNUSED;
2994 enum rtx_code code = GET_CODE (op);
2995 switch (code)
2997 /* Operations supported directly. */
2998 case EQ:
2999 case LT:
3000 case LE:
3001 case UNORDERED:
3002 case NE:
3003 case UNGE:
3004 case UNGT:
3005 case ORDERED:
3006 return 1;
3007 /* These are equivalent to ones above in non-IEEE comparisons. */
3008 case UNEQ:
3009 case UNLT:
3010 case UNLE:
3011 case LTGT:
3012 case GE:
3013 case GT:
3014 return !TARGET_IEEE_FP;
3015 default:
3016 return 0;
3019 /* Return 1 if OP is a valid comparison operator in valid mode. */
3021 ix86_comparison_operator (op, mode)
3022 register rtx op;
3023 enum machine_mode mode;
3025 enum machine_mode inmode;
3026 enum rtx_code code = GET_CODE (op);
3027 if (mode != VOIDmode && GET_MODE (op) != mode)
3028 return 0;
3029 if (GET_RTX_CLASS (code) != '<')
3030 return 0;
3031 inmode = GET_MODE (XEXP (op, 0));
3033 if (inmode == CCFPmode || inmode == CCFPUmode)
3035 enum rtx_code second_code, bypass_code;
3036 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3037 return (bypass_code == NIL && second_code == NIL);
3039 switch (code)
3041 case EQ: case NE:
3042 return 1;
3043 case LT: case GE:
3044 if (inmode == CCmode || inmode == CCGCmode
3045 || inmode == CCGOCmode || inmode == CCNOmode)
3046 return 1;
3047 return 0;
3048 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
3049 if (inmode == CCmode)
3050 return 1;
3051 return 0;
3052 case GT: case LE:
3053 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
3054 return 1;
3055 return 0;
3056 default:
3057 return 0;
3061 /* Return 1 if OP is a comparison operator that can be issued by fcmov. */
3064 fcmov_comparison_operator (op, mode)
3065 register rtx op;
3066 enum machine_mode mode;
3068 enum machine_mode inmode;
3069 enum rtx_code code = GET_CODE (op);
3070 if (mode != VOIDmode && GET_MODE (op) != mode)
3071 return 0;
3072 if (GET_RTX_CLASS (code) != '<')
3073 return 0;
3074 inmode = GET_MODE (XEXP (op, 0));
3075 if (inmode == CCFPmode || inmode == CCFPUmode)
3077 enum rtx_code second_code, bypass_code;
3078 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3079 if (bypass_code != NIL || second_code != NIL)
3080 return 0;
3081 code = ix86_fp_compare_code_to_integer (code);
3083 /* i387 supports just limited amount of conditional codes. */
3084 switch (code)
3086 case LTU: case GTU: case LEU: case GEU:
3087 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
3088 return 1;
3089 return 0;
3090 case ORDERED: case UNORDERED:
3091 case EQ: case NE:
3092 return 1;
3093 default:
3094 return 0;
3098 /* Return 1 if OP is a binary operator that can be promoted to wider mode. */
3101 promotable_binary_operator (op, mode)
3102 register rtx op;
3103 enum machine_mode mode ATTRIBUTE_UNUSED;
3105 switch (GET_CODE (op))
3107 case MULT:
3108 /* Modern CPUs have same latency for HImode and SImode multiply,
3109 but 386 and 486 do HImode multiply faster. */
3110 return ix86_cpu > PROCESSOR_I486;
3111 case PLUS:
3112 case AND:
3113 case IOR:
3114 case XOR:
3115 case ASHIFT:
3116 return 1;
3117 default:
3118 return 0;
3122 /* Nearly general operand, but accept any const_double, since we wish
3123 to be able to drop them into memory rather than have them get pulled
3124 into registers. */
3127 cmp_fp_expander_operand (op, mode)
3128 register rtx op;
3129 enum machine_mode mode;
3131 if (mode != VOIDmode && mode != GET_MODE (op))
3132 return 0;
3133 if (GET_CODE (op) == CONST_DOUBLE)
3134 return 1;
3135 return general_operand (op, mode);
3138 /* Match an SI or HImode register for a zero_extract. */
3141 ext_register_operand (op, mode)
3142 register rtx op;
3143 enum machine_mode mode ATTRIBUTE_UNUSED;
3145 int regno;
3146 if ((!TARGET_64BIT || GET_MODE (op) != DImode)
3147 && GET_MODE (op) != SImode && GET_MODE (op) != HImode)
3148 return 0;
3150 if (!register_operand (op, VOIDmode))
3151 return 0;
3153 /* Be curefull to accept only registers having upper parts. */
3154 regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op));
3155 return (regno > LAST_VIRTUAL_REGISTER || regno < 4);
3158 /* Return 1 if this is a valid binary floating-point operation.
3159 OP is the expression matched, and MODE is its mode. */
3162 binary_fp_operator (op, mode)
3163 register rtx op;
3164 enum machine_mode mode;
3166 if (mode != VOIDmode && mode != GET_MODE (op))
3167 return 0;
3169 switch (GET_CODE (op))
3171 case PLUS:
3172 case MINUS:
3173 case MULT:
3174 case DIV:
3175 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
3177 default:
3178 return 0;
3183 mult_operator(op, mode)
3184 register rtx op;
3185 enum machine_mode mode ATTRIBUTE_UNUSED;
3187 return GET_CODE (op) == MULT;
3191 div_operator(op, mode)
3192 register rtx op;
3193 enum machine_mode mode ATTRIBUTE_UNUSED;
3195 return GET_CODE (op) == DIV;
3199 arith_or_logical_operator (op, mode)
3200 rtx op;
3201 enum machine_mode mode;
3203 return ((mode == VOIDmode || GET_MODE (op) == mode)
3204 && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
3205 || GET_RTX_CLASS (GET_CODE (op)) == '2'));
3208 /* Returns 1 if OP is memory operand with a displacement. */
3211 memory_displacement_operand (op, mode)
3212 register rtx op;
3213 enum machine_mode mode;
3215 struct ix86_address parts;
3217 if (! memory_operand (op, mode))
3218 return 0;
3220 if (! ix86_decompose_address (XEXP (op, 0), &parts))
3221 abort ();
3223 return parts.disp != NULL_RTX;
3226 /* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
3227 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
3229 ??? It seems likely that this will only work because cmpsi is an
3230 expander, and no actual insns use this. */
3233 cmpsi_operand (op, mode)
3234 rtx op;
3235 enum machine_mode mode;
3237 if (nonimmediate_operand (op, mode))
3238 return 1;
3240 if (GET_CODE (op) == AND
3241 && GET_MODE (op) == SImode
3242 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
3243 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
3244 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
3245 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
3246 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
3247 && GET_CODE (XEXP (op, 1)) == CONST_INT)
3248 return 1;
3250 return 0;
3253 /* Returns 1 if OP is memory operand that can not be represented by the
3254 modRM array. */
3257 long_memory_operand (op, mode)
3258 register rtx op;
3259 enum machine_mode mode;
3261 if (! memory_operand (op, mode))
3262 return 0;
3264 return memory_address_length (op) != 0;
3267 /* Return nonzero if the rtx is known aligned. */
3270 aligned_operand (op, mode)
3271 rtx op;
3272 enum machine_mode mode;
3274 struct ix86_address parts;
3276 if (!general_operand (op, mode))
3277 return 0;
3279 /* Registers and immediate operands are always "aligned". */
3280 if (GET_CODE (op) != MEM)
3281 return 1;
3283 /* Don't even try to do any aligned optimizations with volatiles. */
3284 if (MEM_VOLATILE_P (op))
3285 return 0;
3287 op = XEXP (op, 0);
3289 /* Pushes and pops are only valid on the stack pointer. */
3290 if (GET_CODE (op) == PRE_DEC
3291 || GET_CODE (op) == POST_INC)
3292 return 1;
3294 /* Decode the address. */
3295 if (! ix86_decompose_address (op, &parts))
3296 abort ();
3298 /* Look for some component that isn't known to be aligned. */
3299 if (parts.index)
3301 if (parts.scale < 4
3302 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
3303 return 0;
3305 if (parts.base)
3307 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
3308 return 0;
3310 if (parts.disp)
3312 if (GET_CODE (parts.disp) != CONST_INT
3313 || (INTVAL (parts.disp) & 3) != 0)
3314 return 0;
3317 /* Didn't find one -- this must be an aligned address. */
3318 return 1;
3321 /* Return true if the constant is something that can be loaded with
3322 a special instruction. Only handle 0.0 and 1.0; others are less
3323 worthwhile. */
3326 standard_80387_constant_p (x)
3327 rtx x;
3329 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
3330 return -1;
3331 /* Note that on the 80387, other constants, such as pi, that we should support
3332 too. On some machines, these are much slower to load as standard constant,
3333 than to load from doubles in memory. */
3334 if (x == CONST0_RTX (GET_MODE (x)))
3335 return 1;
3336 if (x == CONST1_RTX (GET_MODE (x)))
3337 return 2;
3338 return 0;
3341 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
3344 standard_sse_constant_p (x)
3345 rtx x;
3347 if (GET_CODE (x) != CONST_DOUBLE)
3348 return -1;
3349 return (x == CONST0_RTX (GET_MODE (x)));
3352 /* Returns 1 if OP contains a symbol reference */
3355 symbolic_reference_mentioned_p (op)
3356 rtx op;
3358 register const char *fmt;
3359 register int i;
3361 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
3362 return 1;
3364 fmt = GET_RTX_FORMAT (GET_CODE (op));
3365 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
3367 if (fmt[i] == 'E')
3369 register int j;
3371 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
3372 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
3373 return 1;
3376 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
3377 return 1;
3380 return 0;
3383 /* Return 1 if it is appropriate to emit `ret' instructions in the
3384 body of a function. Do this only if the epilogue is simple, needing a
3385 couple of insns. Prior to reloading, we can't tell how many registers
3386 must be saved, so return 0 then. Return 0 if there is no frame
3387 marker to de-allocate.
3389 If NON_SAVING_SETJMP is defined and true, then it is not possible
3390 for the epilogue to be simple, so return 0. This is a special case
3391 since NON_SAVING_SETJMP will not cause regs_ever_live to change
3392 until final, but jump_optimize may need to know sooner if a
3393 `return' is OK. */
3396 ix86_can_use_return_insn_p ()
3398 struct ix86_frame frame;
3400 #ifdef NON_SAVING_SETJMP
3401 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
3402 return 0;
3403 #endif
3404 #ifdef FUNCTION_BLOCK_PROFILER_EXIT
3405 if (profile_block_flag == 2)
3406 return 0;
3407 #endif
3409 if (! reload_completed || frame_pointer_needed)
3410 return 0;
3412 /* Don't allow more than 32 pop, since that's all we can do
3413 with one instruction. */
3414 if (current_function_pops_args
3415 && current_function_args_size >= 32768)
3416 return 0;
3418 ix86_compute_frame_layout (&frame);
3419 return frame.to_allocate == 0 && frame.nregs == 0;
3422 /* Return 1 if VALUE can be stored in the sign extended immediate field. */
3424 x86_64_sign_extended_value (value)
3425 rtx value;
3427 switch (GET_CODE (value))
3429 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
3430 to be at least 32 and this all acceptable constants are
3431 represented as CONST_INT. */
3432 case CONST_INT:
3433 if (HOST_BITS_PER_WIDE_INT == 32)
3434 return 1;
3435 else
3437 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode);
3438 return trunc_int_for_mode (val, SImode) == val;
3440 break;
3442 /* For certain code models, the symbolic references are known to fit. */
3443 case SYMBOL_REF:
3444 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL;
3446 /* For certain code models, the code is near as well. */
3447 case LABEL_REF:
3448 return ix86_cmodel != CM_LARGE && ix86_cmodel != CM_SMALL_PIC;
3450 /* We also may accept the offsetted memory references in certain special
3451 cases. */
3452 case CONST:
3453 if (GET_CODE (XEXP (value, 0)) == UNSPEC
3454 && XVECLEN (XEXP (value, 0), 0) == 1
3455 && XINT (XEXP (value, 0), 1) == 15)
3456 return 1;
3457 else if (GET_CODE (XEXP (value, 0)) == PLUS)
3459 rtx op1 = XEXP (XEXP (value, 0), 0);
3460 rtx op2 = XEXP (XEXP (value, 0), 1);
3461 HOST_WIDE_INT offset;
3463 if (ix86_cmodel == CM_LARGE)
3464 return 0;
3465 if (GET_CODE (op2) != CONST_INT)
3466 return 0;
3467 offset = trunc_int_for_mode (INTVAL (op2), DImode);
3468 switch (GET_CODE (op1))
3470 case SYMBOL_REF:
3471 /* For CM_SMALL assume that latest object is 1MB before
3472 end of 31bits boundary. We may also accept pretty
3473 large negative constants knowing that all objects are
3474 in the positive half of address space. */
3475 if (ix86_cmodel == CM_SMALL
3476 && offset < 1024*1024*1024
3477 && trunc_int_for_mode (offset, SImode) == offset)
3478 return 1;
3479 /* For CM_KERNEL we know that all object resist in the
3480 negative half of 32bits address space. We may not
3481 accept negative offsets, since they may be just off
3482 and we may accept pretty large possitive ones. */
3483 if (ix86_cmodel == CM_KERNEL
3484 && offset > 0
3485 && trunc_int_for_mode (offset, SImode) == offset)
3486 return 1;
3487 break;
3488 case LABEL_REF:
3489 /* These conditions are similar to SYMBOL_REF ones, just the
3490 constraints for code models differ. */
3491 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
3492 && offset < 1024*1024*1024
3493 && trunc_int_for_mode (offset, SImode) == offset)
3494 return 1;
3495 if (ix86_cmodel == CM_KERNEL
3496 && offset > 0
3497 && trunc_int_for_mode (offset, SImode) == offset)
3498 return 1;
3499 break;
3500 default:
3501 return 0;
3504 return 0;
3505 default:
3506 return 0;
3510 /* Return 1 if VALUE can be stored in the zero extended immediate field. */
3512 x86_64_zero_extended_value (value)
3513 rtx value;
3515 switch (GET_CODE (value))
3517 case CONST_DOUBLE:
3518 if (HOST_BITS_PER_WIDE_INT == 32)
3519 return (GET_MODE (value) == VOIDmode
3520 && !CONST_DOUBLE_HIGH (value));
3521 else
3522 return 0;
3523 case CONST_INT:
3524 if (HOST_BITS_PER_WIDE_INT == 32)
3525 return INTVAL (value) >= 0;
3526 else
3527 return !(INTVAL (value) & ~(HOST_WIDE_INT)0xffffffff);
3528 break;
3530 /* For certain code models, the symbolic references are known to fit. */
3531 case SYMBOL_REF:
3532 return ix86_cmodel == CM_SMALL;
3534 /* For certain code models, the code is near as well. */
3535 case LABEL_REF:
3536 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM;
3538 /* We also may accept the offsetted memory references in certain special
3539 cases. */
3540 case CONST:
3541 if (GET_CODE (XEXP (value, 0)) == PLUS)
3543 rtx op1 = XEXP (XEXP (value, 0), 0);
3544 rtx op2 = XEXP (XEXP (value, 0), 1);
3546 if (ix86_cmodel == CM_LARGE)
3547 return 0;
3548 switch (GET_CODE (op1))
3550 case SYMBOL_REF:
3551 return 0;
3552 /* For small code model we may accept pretty large possitive
3553 offsets, since one bit is available for free. Negative
3554 offsets are limited by the size of NULL pointer area
3555 specified by the ABI. */
3556 if (ix86_cmodel == CM_SMALL
3557 && GET_CODE (op2) == CONST_INT
3558 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
3559 && (trunc_int_for_mode (INTVAL (op2), SImode)
3560 == INTVAL (op2)))
3561 return 1;
3562 /* ??? For the kernel, we may accept adjustment of
3563 -0x10000000, since we know that it will just convert
3564 negative address space to possitive, but perhaps this
3565 is not worthwhile. */
3566 break;
3567 case LABEL_REF:
3568 /* These conditions are similar to SYMBOL_REF ones, just the
3569 constraints for code models differ. */
3570 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
3571 && GET_CODE (op2) == CONST_INT
3572 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
3573 && (trunc_int_for_mode (INTVAL (op2), SImode)
3574 == INTVAL (op2)))
3575 return 1;
3576 break;
3577 default:
3578 return 0;
3581 return 0;
3582 default:
3583 return 0;
3587 /* Value should be nonzero if functions must have frame pointers.
3588 Zero means the frame pointer need not be set up (and parms may
3589 be accessed via the stack pointer) in functions that seem suitable. */
3592 ix86_frame_pointer_required ()
3594 /* If we accessed previous frames, then the generated code expects
3595 to be able to access the saved ebp value in our frame. */
3596 if (cfun->machine->accesses_prev_frame)
3597 return 1;
3599 /* Several x86 os'es need a frame pointer for other reasons,
3600 usually pertaining to setjmp. */
3601 if (SUBTARGET_FRAME_POINTER_REQUIRED)
3602 return 1;
3604 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
3605 the frame pointer by default. Turn it back on now if we've not
3606 got a leaf function. */
3607 if (TARGET_OMIT_LEAF_FRAME_POINTER && ! leaf_function_p ())
3608 return 1;
3610 return 0;
3613 /* Record that the current function accesses previous call frames. */
3615 void
3616 ix86_setup_frame_addresses ()
3618 cfun->machine->accesses_prev_frame = 1;
3621 static char pic_label_name[32];
3623 /* This function generates code for -fpic that loads %ebx with
3624 the return address of the caller and then returns. */
3626 void
3627 ix86_asm_file_end (file)
3628 FILE *file;
3630 rtx xops[2];
3632 if (! TARGET_DEEP_BRANCH_PREDICTION || pic_label_name[0] == 0)
3633 return;
3635 /* ??? Binutils 2.10 and earlier has a linkonce elimination bug related
3636 to updating relocations to a section being discarded such that this
3637 doesn't work. Ought to detect this at configure time. */
3638 #if 0
3639 /* The trick here is to create a linkonce section containing the
3640 pic label thunk, but to refer to it with an internal label.
3641 Because the label is internal, we don't have inter-dso name
3642 binding issues on hosts that don't support ".hidden".
3644 In order to use these macros, however, we must create a fake
3645 function decl. */
3646 if (targetm.have_named_sections)
3648 tree decl = build_decl (FUNCTION_DECL,
3649 get_identifier ("i686.get_pc_thunk"),
3650 error_mark_node);
3651 DECL_ONE_ONLY (decl) = 1;
3652 UNIQUE_SECTION (decl, 0);
3653 named_section (decl, NULL);
3655 else
3656 #else
3657 text_section ();
3658 #endif
3660 /* This used to call ASM_DECLARE_FUNCTION_NAME() but since it's an
3661 internal (non-global) label that's being emitted, it didn't make
3662 sense to have .type information for local labels. This caused
3663 the SCO OpenServer 5.0.4 ELF assembler grief (why are you giving
3664 me debug info for a label that you're declaring non-global?) this
3665 was changed to call ASM_OUTPUT_LABEL() instead. */
3667 ASM_OUTPUT_LABEL (file, pic_label_name);
3669 xops[0] = pic_offset_table_rtx;
3670 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
3671 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
3672 output_asm_insn ("ret", xops);
3675 void
3676 load_pic_register ()
3678 rtx gotsym, pclab;
3680 if (TARGET_64BIT)
3681 abort();
3683 gotsym = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
3685 if (TARGET_DEEP_BRANCH_PREDICTION)
3687 if (! pic_label_name[0])
3688 ASM_GENERATE_INTERNAL_LABEL (pic_label_name, "LPR", 0);
3689 pclab = gen_rtx_MEM (QImode, gen_rtx_SYMBOL_REF (Pmode, pic_label_name));
3691 else
3693 pclab = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
3696 emit_insn (gen_prologue_get_pc (pic_offset_table_rtx, pclab));
3698 if (! TARGET_DEEP_BRANCH_PREDICTION)
3699 emit_insn (gen_popsi1 (pic_offset_table_rtx));
3701 emit_insn (gen_prologue_set_got (pic_offset_table_rtx, gotsym, pclab));
3704 /* Generate an "push" pattern for input ARG. */
3706 static rtx
3707 gen_push (arg)
3708 rtx arg;
3710 return gen_rtx_SET (VOIDmode,
3711 gen_rtx_MEM (Pmode,
3712 gen_rtx_PRE_DEC (Pmode,
3713 stack_pointer_rtx)),
3714 arg);
3717 /* Return 1 if we need to save REGNO. */
3718 static int
3719 ix86_save_reg (regno, maybe_eh_return)
3720 int regno;
3721 int maybe_eh_return;
3723 if (flag_pic
3724 && ! TARGET_64BIT
3725 && regno == PIC_OFFSET_TABLE_REGNUM
3726 && (current_function_uses_pic_offset_table
3727 || current_function_uses_const_pool
3728 || current_function_calls_eh_return))
3729 return 1;
3731 if (current_function_calls_eh_return && maybe_eh_return)
3733 unsigned i;
3734 for (i = 0; ; i++)
3736 unsigned test = EH_RETURN_DATA_REGNO(i);
3737 if (test == INVALID_REGNUM)
3738 break;
3739 if (test == (unsigned) regno)
3740 return 1;
3744 return (regs_ever_live[regno]
3745 && !call_used_regs[regno]
3746 && !fixed_regs[regno]
3747 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
3750 /* Return number of registers to be saved on the stack. */
3752 static int
3753 ix86_nsaved_regs ()
3755 int nregs = 0;
3756 int regno;
3758 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
3759 if (ix86_save_reg (regno, true))
3760 nregs++;
3761 return nregs;
3764 /* Return the offset between two registers, one to be eliminated, and the other
3765 its replacement, at the start of a routine. */
3767 HOST_WIDE_INT
3768 ix86_initial_elimination_offset (from, to)
3769 int from;
3770 int to;
3772 struct ix86_frame frame;
3773 ix86_compute_frame_layout (&frame);
3775 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3776 return frame.hard_frame_pointer_offset;
3777 else if (from == FRAME_POINTER_REGNUM
3778 && to == HARD_FRAME_POINTER_REGNUM)
3779 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
3780 else
3782 if (to != STACK_POINTER_REGNUM)
3783 abort ();
3784 else if (from == ARG_POINTER_REGNUM)
3785 return frame.stack_pointer_offset;
3786 else if (from != FRAME_POINTER_REGNUM)
3787 abort ();
3788 else
3789 return frame.stack_pointer_offset - frame.frame_pointer_offset;
3793 /* Fill structure ix86_frame about frame of currently computed function. */
3795 static void
3796 ix86_compute_frame_layout (frame)
3797 struct ix86_frame *frame;
3799 HOST_WIDE_INT total_size;
3800 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
3801 int offset;
3802 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
3803 HOST_WIDE_INT size = get_frame_size ();
3805 frame->nregs = ix86_nsaved_regs ();
3806 total_size = size;
3808 /* Skip return value and save base pointer. */
3809 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
3811 frame->hard_frame_pointer_offset = offset;
3813 /* Do some sanity checking of stack_alignment_needed and
3814 preferred_alignment, since i386 port is the only using those features
3815 that may break easily. */
3817 if (size && !stack_alignment_needed)
3818 abort ();
3819 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
3820 abort ();
3821 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
3822 abort ();
3823 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
3824 abort ();
3826 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
3827 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
3829 /* Register save area */
3830 offset += frame->nregs * UNITS_PER_WORD;
3832 /* Va-arg area */
3833 if (ix86_save_varrargs_registers)
3835 offset += X86_64_VARARGS_SIZE;
3836 frame->va_arg_size = X86_64_VARARGS_SIZE;
3838 else
3839 frame->va_arg_size = 0;
3841 /* Align start of frame for local function. */
3842 frame->padding1 = ((offset + stack_alignment_needed - 1)
3843 & -stack_alignment_needed) - offset;
3845 offset += frame->padding1;
3847 /* Frame pointer points here. */
3848 frame->frame_pointer_offset = offset;
3850 offset += size;
3852 /* Add outgoing arguments area. */
3853 if (ACCUMULATE_OUTGOING_ARGS)
3855 offset += current_function_outgoing_args_size;
3856 frame->outgoing_arguments_size = current_function_outgoing_args_size;
3858 else
3859 frame->outgoing_arguments_size = 0;
3861 /* Align stack boundary. */
3862 frame->padding2 = ((offset + preferred_alignment - 1)
3863 & -preferred_alignment) - offset;
3865 offset += frame->padding2;
3867 /* We've reached end of stack frame. */
3868 frame->stack_pointer_offset = offset;
3870 /* Size prologue needs to allocate. */
3871 frame->to_allocate =
3872 (size + frame->padding1 + frame->padding2
3873 + frame->outgoing_arguments_size + frame->va_arg_size);
3875 if (TARGET_64BIT && TARGET_RED_ZONE && current_function_sp_is_unchanging
3876 && current_function_is_leaf)
3878 frame->red_zone_size = frame->to_allocate;
3879 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
3880 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
3882 else
3883 frame->red_zone_size = 0;
3884 frame->to_allocate -= frame->red_zone_size;
3885 frame->stack_pointer_offset -= frame->red_zone_size;
3886 #if 0
3887 fprintf (stderr, "nregs: %i\n", frame->nregs);
3888 fprintf (stderr, "size: %i\n", size);
3889 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
3890 fprintf (stderr, "padding1: %i\n", frame->padding1);
3891 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
3892 fprintf (stderr, "padding2: %i\n", frame->padding2);
3893 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
3894 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
3895 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
3896 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
3897 frame->hard_frame_pointer_offset);
3898 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
3899 #endif
3902 /* Emit code to save registers in the prologue. */
3904 static void
3905 ix86_emit_save_regs ()
3907 register int regno;
3908 rtx insn;
3910 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
3911 if (ix86_save_reg (regno, true))
3913 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
3914 RTX_FRAME_RELATED_P (insn) = 1;
3918 /* Emit code to save registers using MOV insns. First register
3919 is restored from POINTER + OFFSET. */
3920 static void
3921 ix86_emit_save_regs_using_mov (pointer, offset)
3922 rtx pointer;
3923 HOST_WIDE_INT offset;
3925 int regno;
3926 rtx insn;
3928 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
3929 if (ix86_save_reg (regno, true))
3931 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
3932 Pmode, offset),
3933 gen_rtx_REG (Pmode, regno));
3934 RTX_FRAME_RELATED_P (insn) = 1;
3935 offset += UNITS_PER_WORD;
3939 /* Expand the prologue into a bunch of separate insns. */
3941 void
3942 ix86_expand_prologue ()
3944 rtx insn;
3945 int pic_reg_used = (flag_pic && (current_function_uses_pic_offset_table
3946 || current_function_uses_const_pool)
3947 && !TARGET_64BIT);
3948 struct ix86_frame frame;
3949 int use_mov = 0;
3950 HOST_WIDE_INT allocate;
3952 if (!optimize_size)
3954 use_fast_prologue_epilogue
3955 = !expensive_function_p (FAST_PROLOGUE_INSN_COUNT);
3956 if (TARGET_PROLOGUE_USING_MOVE)
3957 use_mov = use_fast_prologue_epilogue;
3959 ix86_compute_frame_layout (&frame);
3961 /* Note: AT&T enter does NOT have reversed args. Enter is probably
3962 slower on all targets. Also sdb doesn't like it. */
3964 if (frame_pointer_needed)
3966 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
3967 RTX_FRAME_RELATED_P (insn) = 1;
3969 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
3970 RTX_FRAME_RELATED_P (insn) = 1;
3973 allocate = frame.to_allocate;
3974 /* In case we are dealing only with single register and empty frame,
3975 push is equivalent of the mov+add sequence. */
3976 if (allocate == 0 && frame.nregs <= 1)
3977 use_mov = 0;
3979 if (!use_mov)
3980 ix86_emit_save_regs ();
3981 else
3982 allocate += frame.nregs * UNITS_PER_WORD;
3984 if (allocate == 0)
3986 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
3988 insn = emit_insn (gen_pro_epilogue_adjust_stack
3989 (stack_pointer_rtx, stack_pointer_rtx,
3990 GEN_INT (-allocate)));
3991 RTX_FRAME_RELATED_P (insn) = 1;
3993 else
3995 /* ??? Is this only valid for Win32? */
3997 rtx arg0, sym;
3999 if (TARGET_64BIT)
4000 abort();
4002 arg0 = gen_rtx_REG (SImode, 0);
4003 emit_move_insn (arg0, GEN_INT (allocate));
4005 sym = gen_rtx_MEM (FUNCTION_MODE,
4006 gen_rtx_SYMBOL_REF (Pmode, "_alloca"));
4007 insn = emit_call_insn (gen_call (sym, const0_rtx, constm1_rtx));
4009 CALL_INSN_FUNCTION_USAGE (insn)
4010 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, arg0),
4011 CALL_INSN_FUNCTION_USAGE (insn));
4013 if (use_mov)
4015 if (!frame_pointer_needed || !frame.to_allocate)
4016 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
4017 else
4018 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
4019 -frame.nregs * UNITS_PER_WORD);
4022 #ifdef SUBTARGET_PROLOGUE
4023 SUBTARGET_PROLOGUE;
4024 #endif
4026 if (pic_reg_used)
4027 load_pic_register ();
4029 /* If we are profiling, make sure no instructions are scheduled before
4030 the call to mcount. However, if -fpic, the above call will have
4031 done that. */
4032 if ((profile_flag || profile_block_flag) && ! pic_reg_used)
4033 emit_insn (gen_blockage ());
4036 /* Emit code to restore saved registers using MOV insns. First register
4037 is restored from POINTER + OFFSET. */
4038 static void
4039 ix86_emit_restore_regs_using_mov (pointer, offset, maybe_eh_return)
4040 rtx pointer;
4041 int offset;
4042 int maybe_eh_return;
4044 int regno;
4046 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4047 if (ix86_save_reg (regno, maybe_eh_return))
4049 emit_move_insn (gen_rtx_REG (Pmode, regno),
4050 adjust_address (gen_rtx_MEM (Pmode, pointer),
4051 Pmode, offset));
4052 offset += UNITS_PER_WORD;
4056 /* Restore function stack, frame, and registers. */
4058 void
4059 ix86_expand_epilogue (style)
4060 int style;
4062 int regno;
4063 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
4064 struct ix86_frame frame;
4065 HOST_WIDE_INT offset;
4067 ix86_compute_frame_layout (&frame);
4069 /* Calculate start of saved registers relative to ebp. Special care
4070 must be taken for the normal return case of a function using
4071 eh_return: the eax and edx registers are marked as saved, but not
4072 restored along this path. */
4073 offset = frame.nregs;
4074 if (current_function_calls_eh_return && style != 2)
4075 offset -= 2;
4076 offset *= -UNITS_PER_WORD;
4078 #ifdef FUNCTION_BLOCK_PROFILER_EXIT
4079 if (profile_block_flag == 2)
4081 FUNCTION_BLOCK_PROFILER_EXIT;
4083 #endif
4085 /* If we're only restoring one register and sp is not valid then
4086 using a move instruction to restore the register since it's
4087 less work than reloading sp and popping the register.
4089 The default code result in stack adjustment using add/lea instruction,
4090 while this code results in LEAVE instruction (or discrete equivalent),
4091 so it is profitable in some other cases as well. Especially when there
4092 are no registers to restore. We also use this code when TARGET_USE_LEAVE
4093 and there is exactly one register to pop. This heruistic may need some
4094 tuning in future. */
4095 if ((!sp_valid && frame.nregs <= 1)
4096 || (TARGET_EPILOGUE_USING_MOVE
4097 && use_fast_prologue_epilogue
4098 && (frame.nregs > 1 || frame.to_allocate))
4099 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
4100 || (frame_pointer_needed && TARGET_USE_LEAVE
4101 && use_fast_prologue_epilogue && frame.nregs == 1)
4102 || current_function_calls_eh_return)
4104 /* Restore registers. We can use ebp or esp to address the memory
4105 locations. If both are available, default to ebp, since offsets
4106 are known to be small. Only exception is esp pointing directly to the
4107 end of block of saved registers, where we may simplify addressing
4108 mode. */
4110 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
4111 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
4112 frame.to_allocate, style == 2);
4113 else
4114 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
4115 offset, style == 2);
4117 /* eh_return epilogues need %ecx added to the stack pointer. */
4118 if (style == 2)
4120 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
4122 if (frame_pointer_needed)
4124 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
4125 tmp = plus_constant (tmp, UNITS_PER_WORD);
4126 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
4128 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
4129 emit_move_insn (hard_frame_pointer_rtx, tmp);
4131 emit_insn (gen_pro_epilogue_adjust_stack
4132 (stack_pointer_rtx, sa, const0_rtx));
4134 else
4136 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
4137 tmp = plus_constant (tmp, (frame.to_allocate
4138 + frame.nregs * UNITS_PER_WORD));
4139 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
4142 else if (!frame_pointer_needed)
4143 emit_insn (gen_pro_epilogue_adjust_stack
4144 (stack_pointer_rtx, stack_pointer_rtx,
4145 GEN_INT (frame.to_allocate
4146 + frame.nregs * UNITS_PER_WORD)));
4147 /* If not an i386, mov & pop is faster than "leave". */
4148 else if (TARGET_USE_LEAVE || optimize_size || !use_fast_prologue_epilogue)
4149 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4150 else
4152 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
4153 hard_frame_pointer_rtx,
4154 const0_rtx));
4155 if (TARGET_64BIT)
4156 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4157 else
4158 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4161 else
4163 /* First step is to deallocate the stack frame so that we can
4164 pop the registers. */
4165 if (!sp_valid)
4167 if (!frame_pointer_needed)
4168 abort ();
4169 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
4170 hard_frame_pointer_rtx,
4171 GEN_INT (offset)));
4173 else if (frame.to_allocate)
4174 emit_insn (gen_pro_epilogue_adjust_stack
4175 (stack_pointer_rtx, stack_pointer_rtx,
4176 GEN_INT (frame.to_allocate)));
4178 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4179 if (ix86_save_reg (regno, false))
4181 if (TARGET_64BIT)
4182 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
4183 else
4184 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
4186 if (frame_pointer_needed)
4188 /* Leave results in shorter dependency chains on CPUs that are
4189 able to grok it fast. */
4190 if (TARGET_USE_LEAVE)
4191 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4192 else if (TARGET_64BIT)
4193 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4194 else
4195 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4199 /* Sibcall epilogues don't want a return instruction. */
4200 if (style == 0)
4201 return;
4203 if (current_function_pops_args && current_function_args_size)
4205 rtx popc = GEN_INT (current_function_pops_args);
4207 /* i386 can only pop 64K bytes. If asked to pop more, pop
4208 return address, do explicit add, and jump indirectly to the
4209 caller. */
4211 if (current_function_pops_args >= 65536)
4213 rtx ecx = gen_rtx_REG (SImode, 2);
4215 /* There are is no "pascal" calling convention in 64bit ABI. */
4216 if (TARGET_64BIT)
4217 abort();
4219 emit_insn (gen_popsi1 (ecx));
4220 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
4221 emit_jump_insn (gen_return_indirect_internal (ecx));
4223 else
4224 emit_jump_insn (gen_return_pop_internal (popc));
4226 else
4227 emit_jump_insn (gen_return_internal ());
4230 /* Extract the parts of an RTL expression that is a valid memory address
4231 for an instruction. Return false if the structure of the address is
4232 grossly off. */
4234 static int
4235 ix86_decompose_address (addr, out)
4236 register rtx addr;
4237 struct ix86_address *out;
4239 rtx base = NULL_RTX;
4240 rtx index = NULL_RTX;
4241 rtx disp = NULL_RTX;
4242 HOST_WIDE_INT scale = 1;
4243 rtx scale_rtx = NULL_RTX;
4245 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
4246 base = addr;
4247 else if (GET_CODE (addr) == PLUS)
4249 rtx op0 = XEXP (addr, 0);
4250 rtx op1 = XEXP (addr, 1);
4251 enum rtx_code code0 = GET_CODE (op0);
4252 enum rtx_code code1 = GET_CODE (op1);
4254 if (code0 == REG || code0 == SUBREG)
4256 if (code1 == REG || code1 == SUBREG)
4257 index = op0, base = op1; /* index + base */
4258 else
4259 base = op0, disp = op1; /* base + displacement */
4261 else if (code0 == MULT)
4263 index = XEXP (op0, 0);
4264 scale_rtx = XEXP (op0, 1);
4265 if (code1 == REG || code1 == SUBREG)
4266 base = op1; /* index*scale + base */
4267 else
4268 disp = op1; /* index*scale + disp */
4270 else if (code0 == PLUS && GET_CODE (XEXP (op0, 0)) == MULT)
4272 index = XEXP (XEXP (op0, 0), 0); /* index*scale + base + disp */
4273 scale_rtx = XEXP (XEXP (op0, 0), 1);
4274 base = XEXP (op0, 1);
4275 disp = op1;
4277 else if (code0 == PLUS)
4279 index = XEXP (op0, 0); /* index + base + disp */
4280 base = XEXP (op0, 1);
4281 disp = op1;
4283 else
4284 return FALSE;
4286 else if (GET_CODE (addr) == MULT)
4288 index = XEXP (addr, 0); /* index*scale */
4289 scale_rtx = XEXP (addr, 1);
4291 else if (GET_CODE (addr) == ASHIFT)
4293 rtx tmp;
4295 /* We're called for lea too, which implements ashift on occasion. */
4296 index = XEXP (addr, 0);
4297 tmp = XEXP (addr, 1);
4298 if (GET_CODE (tmp) != CONST_INT)
4299 return FALSE;
4300 scale = INTVAL (tmp);
4301 if ((unsigned HOST_WIDE_INT) scale > 3)
4302 return FALSE;
4303 scale = 1 << scale;
4305 else
4306 disp = addr; /* displacement */
4308 /* Extract the integral value of scale. */
4309 if (scale_rtx)
4311 if (GET_CODE (scale_rtx) != CONST_INT)
4312 return FALSE;
4313 scale = INTVAL (scale_rtx);
4316 /* Allow arg pointer and stack pointer as index if there is not scaling */
4317 if (base && index && scale == 1
4318 && (index == arg_pointer_rtx || index == frame_pointer_rtx
4319 || index == stack_pointer_rtx))
4321 rtx tmp = base;
4322 base = index;
4323 index = tmp;
4326 /* Special case: %ebp cannot be encoded as a base without a displacement. */
4327 if ((base == hard_frame_pointer_rtx
4328 || base == frame_pointer_rtx
4329 || base == arg_pointer_rtx) && !disp)
4330 disp = const0_rtx;
4332 /* Special case: on K6, [%esi] makes the instruction vector decoded.
4333 Avoid this by transforming to [%esi+0]. */
4334 if (ix86_cpu == PROCESSOR_K6 && !optimize_size
4335 && base && !index && !disp
4336 && REG_P (base)
4337 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
4338 disp = const0_rtx;
4340 /* Special case: encode reg+reg instead of reg*2. */
4341 if (!base && index && scale && scale == 2)
4342 base = index, scale = 1;
4344 /* Special case: scaling cannot be encoded without base or displacement. */
4345 if (!base && !disp && index && scale != 1)
4346 disp = const0_rtx;
4348 out->base = base;
4349 out->index = index;
4350 out->disp = disp;
4351 out->scale = scale;
4353 return TRUE;
4356 /* Return cost of the memory address x.
4357 For i386, it is better to use a complex address than let gcc copy
4358 the address into a reg and make a new pseudo. But not if the address
4359 requires to two regs - that would mean more pseudos with longer
4360 lifetimes. */
4362 ix86_address_cost (x)
4363 rtx x;
4365 struct ix86_address parts;
4366 int cost = 1;
4368 if (!ix86_decompose_address (x, &parts))
4369 abort ();
4371 /* More complex memory references are better. */
4372 if (parts.disp && parts.disp != const0_rtx)
4373 cost--;
4375 /* Attempt to minimize number of registers in the address. */
4376 if ((parts.base
4377 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
4378 || (parts.index
4379 && (!REG_P (parts.index)
4380 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
4381 cost++;
4383 if (parts.base
4384 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
4385 && parts.index
4386 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
4387 && parts.base != parts.index)
4388 cost++;
4390 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
4391 since it's predecode logic can't detect the length of instructions
4392 and it degenerates to vector decoded. Increase cost of such
4393 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
4394 to split such addresses or even refuse such addresses at all.
4396 Following addressing modes are affected:
4397 [base+scale*index]
4398 [scale*index+disp]
4399 [base+index]
4401 The first and last case may be avoidable by explicitly coding the zero in
4402 memory address, but I don't have AMD-K6 machine handy to check this
4403 theory. */
4405 if (TARGET_K6
4406 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
4407 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
4408 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
4409 cost += 10;
4411 return cost;
4414 /* If X is a machine specific address (i.e. a symbol or label being
4415 referenced as a displacement from the GOT implemented using an
4416 UNSPEC), then return the base term. Otherwise return X. */
4419 ix86_find_base_term (x)
4420 rtx x;
4422 rtx term;
4424 if (TARGET_64BIT)
4426 if (GET_CODE (x) != CONST)
4427 return x;
4428 term = XEXP (x, 0);
4429 if (GET_CODE (term) == PLUS
4430 && (GET_CODE (XEXP (term, 1)) == CONST_INT
4431 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
4432 term = XEXP (term, 0);
4433 if (GET_CODE (term) != UNSPEC
4434 || XVECLEN (term, 0) != 1
4435 || XINT (term, 1) != 15)
4436 return x;
4438 term = XVECEXP (term, 0, 0);
4440 if (GET_CODE (term) != SYMBOL_REF
4441 && GET_CODE (term) != LABEL_REF)
4442 return x;
4444 return term;
4447 if (GET_CODE (x) != PLUS
4448 || XEXP (x, 0) != pic_offset_table_rtx
4449 || GET_CODE (XEXP (x, 1)) != CONST)
4450 return x;
4452 term = XEXP (XEXP (x, 1), 0);
4454 if (GET_CODE (term) == PLUS && GET_CODE (XEXP (term, 1)) == CONST_INT)
4455 term = XEXP (term, 0);
4457 if (GET_CODE (term) != UNSPEC
4458 || XVECLEN (term, 0) != 1
4459 || XINT (term, 1) != 7)
4460 return x;
4462 term = XVECEXP (term, 0, 0);
4464 if (GET_CODE (term) != SYMBOL_REF
4465 && GET_CODE (term) != LABEL_REF)
4466 return x;
4468 return term;
4471 /* Determine if a given CONST RTX is a valid memory displacement
4472 in PIC mode. */
4475 legitimate_pic_address_disp_p (disp)
4476 register rtx disp;
4478 /* In 64bit mode we can allow direct addresses of symbols and labels
4479 when they are not dynamic symbols. */
4480 if (TARGET_64BIT)
4482 rtx x = disp;
4483 if (GET_CODE (disp) == CONST)
4484 x = XEXP (disp, 0);
4485 /* ??? Handle PIC code models */
4486 if (GET_CODE (x) == PLUS
4487 && (GET_CODE (XEXP (x, 1)) == CONST_INT
4488 && ix86_cmodel == CM_SMALL_PIC
4489 && INTVAL (XEXP (x, 1)) < 1024*1024*1024
4490 && INTVAL (XEXP (x, 1)) > -1024*1024*1024))
4491 x = XEXP (x, 0);
4492 if (local_symbolic_operand (x, Pmode))
4493 return 1;
4495 if (GET_CODE (disp) != CONST)
4496 return 0;
4497 disp = XEXP (disp, 0);
4499 if (TARGET_64BIT)
4501 /* We are unsafe to allow PLUS expressions. This limit allowed distance
4502 of GOT tables. We should not need these anyway. */
4503 if (GET_CODE (disp) != UNSPEC
4504 || XVECLEN (disp, 0) != 1
4505 || XINT (disp, 1) != 15)
4506 return 0;
4508 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
4509 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
4510 return 0;
4511 return 1;
4514 if (GET_CODE (disp) == PLUS)
4516 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
4517 return 0;
4518 disp = XEXP (disp, 0);
4521 if (GET_CODE (disp) != UNSPEC
4522 || XVECLEN (disp, 0) != 1)
4523 return 0;
4525 /* Must be @GOT or @GOTOFF. */
4526 switch (XINT (disp, 1))
4528 case 6: /* @GOT */
4529 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
4531 case 7: /* @GOTOFF */
4532 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
4535 return 0;
4538 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
4539 memory address for an instruction. The MODE argument is the machine mode
4540 for the MEM expression that wants to use this address.
4542 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
4543 convert common non-canonical forms to canonical form so that they will
4544 be recognized. */
4547 legitimate_address_p (mode, addr, strict)
4548 enum machine_mode mode;
4549 register rtx addr;
4550 int strict;
4552 struct ix86_address parts;
4553 rtx base, index, disp;
4554 HOST_WIDE_INT scale;
4555 const char *reason = NULL;
4556 rtx reason_rtx = NULL_RTX;
4558 if (TARGET_DEBUG_ADDR)
4560 fprintf (stderr,
4561 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
4562 GET_MODE_NAME (mode), strict);
4563 debug_rtx (addr);
4566 if (! ix86_decompose_address (addr, &parts))
4568 reason = "decomposition failed";
4569 goto report_error;
4572 base = parts.base;
4573 index = parts.index;
4574 disp = parts.disp;
4575 scale = parts.scale;
4577 /* Validate base register.
4579 Don't allow SUBREG's here, it can lead to spill failures when the base
4580 is one word out of a two word structure, which is represented internally
4581 as a DImode int. */
4583 if (base)
4585 reason_rtx = base;
4587 if (GET_CODE (base) != REG)
4589 reason = "base is not a register";
4590 goto report_error;
4593 if (GET_MODE (base) != Pmode)
4595 reason = "base is not in Pmode";
4596 goto report_error;
4599 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (base))
4600 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (base)))
4602 reason = "base is not valid";
4603 goto report_error;
4607 /* Validate index register.
4609 Don't allow SUBREG's here, it can lead to spill failures when the index
4610 is one word out of a two word structure, which is represented internally
4611 as a DImode int. */
4613 if (index)
4615 reason_rtx = index;
4617 if (GET_CODE (index) != REG)
4619 reason = "index is not a register";
4620 goto report_error;
4623 if (GET_MODE (index) != Pmode)
4625 reason = "index is not in Pmode";
4626 goto report_error;
4629 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (index))
4630 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (index)))
4632 reason = "index is not valid";
4633 goto report_error;
4637 /* Validate scale factor. */
4638 if (scale != 1)
4640 reason_rtx = GEN_INT (scale);
4641 if (!index)
4643 reason = "scale without index";
4644 goto report_error;
4647 if (scale != 2 && scale != 4 && scale != 8)
4649 reason = "scale is not a valid multiplier";
4650 goto report_error;
4654 /* Validate displacement. */
4655 if (disp)
4657 reason_rtx = disp;
4659 if (!CONSTANT_ADDRESS_P (disp))
4661 reason = "displacement is not constant";
4662 goto report_error;
4665 if (TARGET_64BIT)
4667 if (!x86_64_sign_extended_value (disp))
4669 reason = "displacement is out of range";
4670 goto report_error;
4673 else
4675 if (GET_CODE (disp) == CONST_DOUBLE)
4677 reason = "displacement is a const_double";
4678 goto report_error;
4682 if (flag_pic && SYMBOLIC_CONST (disp))
4684 if (TARGET_64BIT && (index || base))
4686 reason = "non-constant pic memory reference";
4687 goto report_error;
4689 if (! legitimate_pic_address_disp_p (disp))
4691 reason = "displacement is an invalid pic construct";
4692 goto report_error;
4695 /* This code used to verify that a symbolic pic displacement
4696 includes the pic_offset_table_rtx register.
4698 While this is good idea, unfortunately these constructs may
4699 be created by "adds using lea" optimization for incorrect
4700 code like:
4702 int a;
4703 int foo(int i)
4705 return *(&a+i);
4708 This code is nonsensical, but results in addressing
4709 GOT table with pic_offset_table_rtx base. We can't
4710 just refuse it easily, since it gets matched by
4711 "addsi3" pattern, that later gets split to lea in the
4712 case output register differs from input. While this
4713 can be handled by separate addsi pattern for this case
4714 that never results in lea, this seems to be easier and
4715 correct fix for crash to disable this test. */
4717 else if (HALF_PIC_P ())
4719 if (! HALF_PIC_ADDRESS_P (disp)
4720 || (base != NULL_RTX || index != NULL_RTX))
4722 reason = "displacement is an invalid half-pic reference";
4723 goto report_error;
4728 /* Everything looks valid. */
4729 if (TARGET_DEBUG_ADDR)
4730 fprintf (stderr, "Success.\n");
4731 return TRUE;
4733 report_error:
4734 if (TARGET_DEBUG_ADDR)
4736 fprintf (stderr, "Error: %s\n", reason);
4737 debug_rtx (reason_rtx);
4739 return FALSE;
4742 /* Return an unique alias set for the GOT. */
4744 static HOST_WIDE_INT
4745 ix86_GOT_alias_set ()
4747 static HOST_WIDE_INT set = -1;
4748 if (set == -1)
4749 set = new_alias_set ();
4750 return set;
4753 /* Return a legitimate reference for ORIG (an address) using the
4754 register REG. If REG is 0, a new pseudo is generated.
4756 There are two types of references that must be handled:
4758 1. Global data references must load the address from the GOT, via
4759 the PIC reg. An insn is emitted to do this load, and the reg is
4760 returned.
4762 2. Static data references, constant pool addresses, and code labels
4763 compute the address as an offset from the GOT, whose base is in
4764 the PIC reg. Static data objects have SYMBOL_REF_FLAG set to
4765 differentiate them from global data objects. The returned
4766 address is the PIC reg + an unspec constant.
4768 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
4769 reg also appears in the address. */
4772 legitimize_pic_address (orig, reg)
4773 rtx orig;
4774 rtx reg;
4776 rtx addr = orig;
4777 rtx new = orig;
4778 rtx base;
4780 if (local_symbolic_operand (addr, Pmode))
4782 /* In 64bit mode we can address such objects directly. */
4783 if (TARGET_64BIT)
4784 new = addr;
4785 else
4787 /* This symbol may be referenced via a displacement from the PIC
4788 base address (@GOTOFF). */
4790 current_function_uses_pic_offset_table = 1;
4791 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), 7);
4792 new = gen_rtx_CONST (Pmode, new);
4793 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
4795 if (reg != 0)
4797 emit_move_insn (reg, new);
4798 new = reg;
4802 else if (GET_CODE (addr) == SYMBOL_REF)
4804 if (TARGET_64BIT)
4806 current_function_uses_pic_offset_table = 1;
4807 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), 15);
4808 new = gen_rtx_CONST (Pmode, new);
4809 new = gen_rtx_MEM (Pmode, new);
4810 RTX_UNCHANGING_P (new) = 1;
4811 set_mem_alias_set (new, ix86_GOT_alias_set ());
4813 if (reg == 0)
4814 reg = gen_reg_rtx (Pmode);
4815 /* Use directly gen_movsi, otherwise the address is loaded
4816 into register for CSE. We don't want to CSE this addresses,
4817 instead we CSE addresses from the GOT table, so skip this. */
4818 emit_insn (gen_movsi (reg, new));
4819 new = reg;
4821 else
4823 /* This symbol must be referenced via a load from the
4824 Global Offset Table (@GOT). */
4826 current_function_uses_pic_offset_table = 1;
4827 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), 6);
4828 new = gen_rtx_CONST (Pmode, new);
4829 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
4830 new = gen_rtx_MEM (Pmode, new);
4831 RTX_UNCHANGING_P (new) = 1;
4832 set_mem_alias_set (new, ix86_GOT_alias_set ());
4834 if (reg == 0)
4835 reg = gen_reg_rtx (Pmode);
4836 emit_move_insn (reg, new);
4837 new = reg;
4840 else
4842 if (GET_CODE (addr) == CONST)
4844 addr = XEXP (addr, 0);
4845 if (GET_CODE (addr) == UNSPEC)
4847 /* Check that the unspec is one of the ones we generate? */
4849 else if (GET_CODE (addr) != PLUS)
4850 abort ();
4852 if (GET_CODE (addr) == PLUS)
4854 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
4856 /* Check first to see if this is a constant offset from a @GOTOFF
4857 symbol reference. */
4858 if (local_symbolic_operand (op0, Pmode)
4859 && GET_CODE (op1) == CONST_INT)
4861 if (!TARGET_64BIT)
4863 current_function_uses_pic_offset_table = 1;
4864 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0), 7);
4865 new = gen_rtx_PLUS (Pmode, new, op1);
4866 new = gen_rtx_CONST (Pmode, new);
4867 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
4869 if (reg != 0)
4871 emit_move_insn (reg, new);
4872 new = reg;
4875 else
4877 /* ??? We need to limit offsets here. */
4880 else
4882 base = legitimize_pic_address (XEXP (addr, 0), reg);
4883 new = legitimize_pic_address (XEXP (addr, 1),
4884 base == reg ? NULL_RTX : reg);
4886 if (GET_CODE (new) == CONST_INT)
4887 new = plus_constant (base, INTVAL (new));
4888 else
4890 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
4892 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
4893 new = XEXP (new, 1);
4895 new = gen_rtx_PLUS (Pmode, base, new);
4900 return new;
4903 /* Try machine-dependent ways of modifying an illegitimate address
4904 to be legitimate. If we find one, return the new, valid address.
4905 This macro is used in only one place: `memory_address' in explow.c.
4907 OLDX is the address as it was before break_out_memory_refs was called.
4908 In some cases it is useful to look at this to decide what needs to be done.
4910 MODE and WIN are passed so that this macro can use
4911 GO_IF_LEGITIMATE_ADDRESS.
4913 It is always safe for this macro to do nothing. It exists to recognize
4914 opportunities to optimize the output.
4916 For the 80386, we handle X+REG by loading X into a register R and
4917 using R+REG. R will go in a general reg and indexing will be used.
4918 However, if REG is a broken-out memory address or multiplication,
4919 nothing needs to be done because REG can certainly go in a general reg.
4921 When -fpic is used, special handling is needed for symbolic references.
4922 See comments by legitimize_pic_address in i386.c for details. */
4925 legitimize_address (x, oldx, mode)
4926 register rtx x;
4927 register rtx oldx ATTRIBUTE_UNUSED;
4928 enum machine_mode mode;
4930 int changed = 0;
4931 unsigned log;
4933 if (TARGET_DEBUG_ADDR)
4935 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
4936 GET_MODE_NAME (mode));
4937 debug_rtx (x);
4940 if (flag_pic && SYMBOLIC_CONST (x))
4941 return legitimize_pic_address (x, 0);
4943 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
4944 if (GET_CODE (x) == ASHIFT
4945 && GET_CODE (XEXP (x, 1)) == CONST_INT
4946 && (log = (unsigned)exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
4948 changed = 1;
4949 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
4950 GEN_INT (1 << log));
4953 if (GET_CODE (x) == PLUS)
4955 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
4957 if (GET_CODE (XEXP (x, 0)) == ASHIFT
4958 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
4959 && (log = (unsigned)exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
4961 changed = 1;
4962 XEXP (x, 0) = gen_rtx_MULT (Pmode,
4963 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
4964 GEN_INT (1 << log));
4967 if (GET_CODE (XEXP (x, 1)) == ASHIFT
4968 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
4969 && (log = (unsigned)exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
4971 changed = 1;
4972 XEXP (x, 1) = gen_rtx_MULT (Pmode,
4973 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
4974 GEN_INT (1 << log));
4977 /* Put multiply first if it isn't already. */
4978 if (GET_CODE (XEXP (x, 1)) == MULT)
4980 rtx tmp = XEXP (x, 0);
4981 XEXP (x, 0) = XEXP (x, 1);
4982 XEXP (x, 1) = tmp;
4983 changed = 1;
4986 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
4987 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
4988 created by virtual register instantiation, register elimination, and
4989 similar optimizations. */
4990 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
4992 changed = 1;
4993 x = gen_rtx_PLUS (Pmode,
4994 gen_rtx_PLUS (Pmode, XEXP (x, 0),
4995 XEXP (XEXP (x, 1), 0)),
4996 XEXP (XEXP (x, 1), 1));
4999 /* Canonicalize
5000 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
5001 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
5002 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
5003 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5004 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
5005 && CONSTANT_P (XEXP (x, 1)))
5007 rtx constant;
5008 rtx other = NULL_RTX;
5010 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5012 constant = XEXP (x, 1);
5013 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
5015 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
5017 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
5018 other = XEXP (x, 1);
5020 else
5021 constant = 0;
5023 if (constant)
5025 changed = 1;
5026 x = gen_rtx_PLUS (Pmode,
5027 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
5028 XEXP (XEXP (XEXP (x, 0), 1), 0)),
5029 plus_constant (other, INTVAL (constant)));
5033 if (changed && legitimate_address_p (mode, x, FALSE))
5034 return x;
5036 if (GET_CODE (XEXP (x, 0)) == MULT)
5038 changed = 1;
5039 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
5042 if (GET_CODE (XEXP (x, 1)) == MULT)
5044 changed = 1;
5045 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
5048 if (changed
5049 && GET_CODE (XEXP (x, 1)) == REG
5050 && GET_CODE (XEXP (x, 0)) == REG)
5051 return x;
5053 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
5055 changed = 1;
5056 x = legitimize_pic_address (x, 0);
5059 if (changed && legitimate_address_p (mode, x, FALSE))
5060 return x;
5062 if (GET_CODE (XEXP (x, 0)) == REG)
5064 register rtx temp = gen_reg_rtx (Pmode);
5065 register rtx val = force_operand (XEXP (x, 1), temp);
5066 if (val != temp)
5067 emit_move_insn (temp, val);
5069 XEXP (x, 1) = temp;
5070 return x;
5073 else if (GET_CODE (XEXP (x, 1)) == REG)
5075 register rtx temp = gen_reg_rtx (Pmode);
5076 register rtx val = force_operand (XEXP (x, 0), temp);
5077 if (val != temp)
5078 emit_move_insn (temp, val);
5080 XEXP (x, 0) = temp;
5081 return x;
5085 return x;
5088 /* Print an integer constant expression in assembler syntax. Addition
5089 and subtraction are the only arithmetic that may appear in these
5090 expressions. FILE is the stdio stream to write to, X is the rtx, and
5091 CODE is the operand print code from the output string. */
5093 static void
5094 output_pic_addr_const (file, x, code)
5095 FILE *file;
5096 rtx x;
5097 int code;
5099 char buf[256];
5101 switch (GET_CODE (x))
5103 case PC:
5104 if (flag_pic)
5105 putc ('.', file);
5106 else
5107 abort ();
5108 break;
5110 case SYMBOL_REF:
5111 assemble_name (file, XSTR (x, 0));
5112 if (code == 'P' && ! SYMBOL_REF_FLAG (x))
5113 fputs ("@PLT", file);
5114 break;
5116 case LABEL_REF:
5117 x = XEXP (x, 0);
5118 /* FALLTHRU */
5119 case CODE_LABEL:
5120 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
5121 assemble_name (asm_out_file, buf);
5122 break;
5124 case CONST_INT:
5125 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
5126 break;
5128 case CONST:
5129 /* This used to output parentheses around the expression,
5130 but that does not work on the 386 (either ATT or BSD assembler). */
5131 output_pic_addr_const (file, XEXP (x, 0), code);
5132 break;
5134 case CONST_DOUBLE:
5135 if (GET_MODE (x) == VOIDmode)
5137 /* We can use %d if the number is <32 bits and positive. */
5138 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
5139 fprintf (file, "0x%lx%08lx",
5140 (unsigned long) CONST_DOUBLE_HIGH (x),
5141 (unsigned long) CONST_DOUBLE_LOW (x));
5142 else
5143 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
5145 else
5146 /* We can't handle floating point constants;
5147 PRINT_OPERAND must handle them. */
5148 output_operand_lossage ("floating constant misused");
5149 break;
5151 case PLUS:
5152 /* Some assemblers need integer constants to appear first. */
5153 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
5155 output_pic_addr_const (file, XEXP (x, 0), code);
5156 putc ('+', file);
5157 output_pic_addr_const (file, XEXP (x, 1), code);
5159 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5161 output_pic_addr_const (file, XEXP (x, 1), code);
5162 putc ('+', file);
5163 output_pic_addr_const (file, XEXP (x, 0), code);
5165 else
5166 abort ();
5167 break;
5169 case MINUS:
5170 putc (ASSEMBLER_DIALECT ? '(' : '[', file);
5171 output_pic_addr_const (file, XEXP (x, 0), code);
5172 putc ('-', file);
5173 output_pic_addr_const (file, XEXP (x, 1), code);
5174 putc (ASSEMBLER_DIALECT ? ')' : ']', file);
5175 break;
5177 case UNSPEC:
5178 if (XVECLEN (x, 0) != 1)
5179 abort ();
5180 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
5181 switch (XINT (x, 1))
5183 case 6:
5184 fputs ("@GOT", file);
5185 break;
5186 case 7:
5187 fputs ("@GOTOFF", file);
5188 break;
5189 case 8:
5190 fputs ("@PLT", file);
5191 break;
5192 case 15:
5193 fputs ("@GOTPCREL(%RIP)", file);
5194 break;
5195 default:
5196 output_operand_lossage ("invalid UNSPEC as operand");
5197 break;
5199 break;
5201 default:
5202 output_operand_lossage ("invalid expression as operand");
5206 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
5207 We need to handle our special PIC relocations. */
5209 void
5210 i386_dwarf_output_addr_const (file, x)
5211 FILE *file;
5212 rtx x;
5214 #ifdef ASM_QUAD
5215 fprintf (file, "%s", TARGET_64BIT ? ASM_QUAD : ASM_LONG);
5216 #else
5217 if (TARGET_64BIT)
5218 abort ();
5219 fprintf (file, "%s", ASM_LONG);
5220 #endif
5221 if (flag_pic)
5222 output_pic_addr_const (file, x, '\0');
5223 else
5224 output_addr_const (file, x);
5225 fputc ('\n', file);
5228 /* In the name of slightly smaller debug output, and to cater to
5229 general assembler losage, recognize PIC+GOTOFF and turn it back
5230 into a direct symbol reference. */
5233 i386_simplify_dwarf_addr (orig_x)
5234 rtx orig_x;
5236 rtx x = orig_x;
5238 if (TARGET_64BIT)
5240 if (GET_CODE (x) != CONST
5241 || GET_CODE (XEXP (x, 0)) != UNSPEC
5242 || XINT (XEXP (x, 0), 1) != 15)
5243 return orig_x;
5244 return XVECEXP (XEXP (x, 0), 0, 0);
5247 if (GET_CODE (x) != PLUS
5248 || GET_CODE (XEXP (x, 0)) != REG
5249 || GET_CODE (XEXP (x, 1)) != CONST)
5250 return orig_x;
5252 x = XEXP (XEXP (x, 1), 0);
5253 if (GET_CODE (x) == UNSPEC
5254 && (XINT (x, 1) == 6
5255 || XINT (x, 1) == 7))
5256 return XVECEXP (x, 0, 0);
5258 if (GET_CODE (x) == PLUS
5259 && GET_CODE (XEXP (x, 0)) == UNSPEC
5260 && GET_CODE (XEXP (x, 1)) == CONST_INT
5261 && (XINT (XEXP (x, 0), 1) == 6
5262 || XINT (XEXP (x, 0), 1) == 7))
5263 return gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
5265 return orig_x;
5268 static void
5269 put_condition_code (code, mode, reverse, fp, file)
5270 enum rtx_code code;
5271 enum machine_mode mode;
5272 int reverse, fp;
5273 FILE *file;
5275 const char *suffix;
5277 if (mode == CCFPmode || mode == CCFPUmode)
5279 enum rtx_code second_code, bypass_code;
5280 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
5281 if (bypass_code != NIL || second_code != NIL)
5282 abort();
5283 code = ix86_fp_compare_code_to_integer (code);
5284 mode = CCmode;
5286 if (reverse)
5287 code = reverse_condition (code);
5289 switch (code)
5291 case EQ:
5292 suffix = "e";
5293 break;
5294 case NE:
5295 suffix = "ne";
5296 break;
5297 case GT:
5298 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
5299 abort ();
5300 suffix = "g";
5301 break;
5302 case GTU:
5303 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
5304 Those same assemblers have the same but opposite losage on cmov. */
5305 if (mode != CCmode)
5306 abort ();
5307 suffix = fp ? "nbe" : "a";
5308 break;
5309 case LT:
5310 if (mode == CCNOmode || mode == CCGOCmode)
5311 suffix = "s";
5312 else if (mode == CCmode || mode == CCGCmode)
5313 suffix = "l";
5314 else
5315 abort ();
5316 break;
5317 case LTU:
5318 if (mode != CCmode)
5319 abort ();
5320 suffix = "b";
5321 break;
5322 case GE:
5323 if (mode == CCNOmode || mode == CCGOCmode)
5324 suffix = "ns";
5325 else if (mode == CCmode || mode == CCGCmode)
5326 suffix = "ge";
5327 else
5328 abort ();
5329 break;
5330 case GEU:
5331 /* ??? As above. */
5332 if (mode != CCmode)
5333 abort ();
5334 suffix = fp ? "nb" : "ae";
5335 break;
5336 case LE:
5337 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
5338 abort ();
5339 suffix = "le";
5340 break;
5341 case LEU:
5342 if (mode != CCmode)
5343 abort ();
5344 suffix = "be";
5345 break;
5346 case UNORDERED:
5347 suffix = fp ? "u" : "p";
5348 break;
5349 case ORDERED:
5350 suffix = fp ? "nu" : "np";
5351 break;
5352 default:
5353 abort ();
5355 fputs (suffix, file);
5358 void
5359 print_reg (x, code, file)
5360 rtx x;
5361 int code;
5362 FILE *file;
5364 if (REGNO (x) == ARG_POINTER_REGNUM
5365 || REGNO (x) == FRAME_POINTER_REGNUM
5366 || REGNO (x) == FLAGS_REG
5367 || REGNO (x) == FPSR_REG)
5368 abort ();
5370 if (ASSEMBLER_DIALECT == 0 || USER_LABEL_PREFIX[0] == 0)
5371 putc ('%', file);
5373 if (code == 'w' || MMX_REG_P (x))
5374 code = 2;
5375 else if (code == 'b')
5376 code = 1;
5377 else if (code == 'k')
5378 code = 4;
5379 else if (code == 'q')
5380 code = 8;
5381 else if (code == 'y')
5382 code = 3;
5383 else if (code == 'h')
5384 code = 0;
5385 else
5386 code = GET_MODE_SIZE (GET_MODE (x));
5388 /* Irritatingly, AMD extended registers use different naming convention
5389 from the normal registers. */
5390 if (REX_INT_REG_P (x))
5392 if (!TARGET_64BIT)
5393 abort ();
5394 switch (code)
5396 case 0:
5397 error ("extended registers have no high halves");
5398 break;
5399 case 1:
5400 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
5401 break;
5402 case 2:
5403 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
5404 break;
5405 case 4:
5406 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
5407 break;
5408 case 8:
5409 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
5410 break;
5411 default:
5412 error ("unsupported operand size for extended register");
5413 break;
5415 return;
5417 switch (code)
5419 case 3:
5420 if (STACK_TOP_P (x))
5422 fputs ("st(0)", file);
5423 break;
5425 /* FALLTHRU */
5426 case 8:
5427 case 4:
5428 case 12:
5429 if (! ANY_FP_REG_P (x))
5430 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
5431 /* FALLTHRU */
5432 case 16:
5433 case 2:
5434 fputs (hi_reg_name[REGNO (x)], file);
5435 break;
5436 case 1:
5437 fputs (qi_reg_name[REGNO (x)], file);
5438 break;
5439 case 0:
5440 fputs (qi_high_reg_name[REGNO (x)], file);
5441 break;
5442 default:
5443 abort ();
5447 /* Meaning of CODE:
5448 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
5449 C -- print opcode suffix for set/cmov insn.
5450 c -- like C, but print reversed condition
5451 F,f -- likewise, but for floating-point.
5452 R -- print the prefix for register names.
5453 z -- print the opcode suffix for the size of the current operand.
5454 * -- print a star (in certain assembler syntax)
5455 A -- print an absolute memory reference.
5456 w -- print the operand as if it's a "word" (HImode) even if it isn't.
5457 s -- print a shift double count, followed by the assemblers argument
5458 delimiter.
5459 b -- print the QImode name of the register for the indicated operand.
5460 %b0 would print %al if operands[0] is reg 0.
5461 w -- likewise, print the HImode name of the register.
5462 k -- likewise, print the SImode name of the register.
5463 q -- likewise, print the DImode name of the register.
5464 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
5465 y -- print "st(0)" instead of "st" as a register.
5466 D -- print condition for SSE cmp instruction.
5467 P -- if PIC, print an @PLT suffix.
5468 X -- don't print any sort of PIC '@' suffix for a symbol.
5471 void
5472 print_operand (file, x, code)
5473 FILE *file;
5474 rtx x;
5475 int code;
5477 if (code)
5479 switch (code)
5481 case '*':
5482 if (ASSEMBLER_DIALECT == 0)
5483 putc ('*', file);
5484 return;
5486 case 'A':
5487 if (ASSEMBLER_DIALECT == 0)
5488 putc ('*', file);
5489 else if (ASSEMBLER_DIALECT == 1)
5491 /* Intel syntax. For absolute addresses, registers should not
5492 be surrounded by braces. */
5493 if (GET_CODE (x) != REG)
5495 putc ('[', file);
5496 PRINT_OPERAND (file, x, 0);
5497 putc (']', file);
5498 return;
5502 PRINT_OPERAND (file, x, 0);
5503 return;
5506 case 'L':
5507 if (ASSEMBLER_DIALECT == 0)
5508 putc ('l', file);
5509 return;
5511 case 'W':
5512 if (ASSEMBLER_DIALECT == 0)
5513 putc ('w', file);
5514 return;
5516 case 'B':
5517 if (ASSEMBLER_DIALECT == 0)
5518 putc ('b', file);
5519 return;
5521 case 'Q':
5522 if (ASSEMBLER_DIALECT == 0)
5523 putc ('l', file);
5524 return;
5526 case 'S':
5527 if (ASSEMBLER_DIALECT == 0)
5528 putc ('s', file);
5529 return;
5531 case 'T':
5532 if (ASSEMBLER_DIALECT == 0)
5533 putc ('t', file);
5534 return;
5536 case 'z':
5537 /* 387 opcodes don't get size suffixes if the operands are
5538 registers. */
5540 if (STACK_REG_P (x))
5541 return;
5543 /* this is the size of op from size of operand */
5544 switch (GET_MODE_SIZE (GET_MODE (x)))
5546 case 2:
5547 #ifdef HAVE_GAS_FILDS_FISTS
5548 putc ('s', file);
5549 #endif
5550 return;
5552 case 4:
5553 if (GET_MODE (x) == SFmode)
5555 putc ('s', file);
5556 return;
5558 else
5559 putc ('l', file);
5560 return;
5562 case 12:
5563 case 16:
5564 putc ('t', file);
5565 return;
5567 case 8:
5568 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
5570 #ifdef GAS_MNEMONICS
5571 putc ('q', file);
5572 #else
5573 putc ('l', file);
5574 putc ('l', file);
5575 #endif
5577 else
5578 putc ('l', file);
5579 return;
5581 default:
5582 abort ();
5585 case 'b':
5586 case 'w':
5587 case 'k':
5588 case 'q':
5589 case 'h':
5590 case 'y':
5591 case 'X':
5592 case 'P':
5593 break;
5595 case 's':
5596 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
5598 PRINT_OPERAND (file, x, 0);
5599 putc (',', file);
5601 return;
5603 case 'D':
5604 /* Little bit of braindamage here. The SSE compare instructions
5605 does use completely different names for the comparisons that the
5606 fp conditional moves. */
5607 switch (GET_CODE (x))
5609 case EQ:
5610 case UNEQ:
5611 fputs ("eq", file);
5612 break;
5613 case LT:
5614 case UNLT:
5615 fputs ("lt", file);
5616 break;
5617 case LE:
5618 case UNLE:
5619 fputs ("le", file);
5620 break;
5621 case UNORDERED:
5622 fputs ("unord", file);
5623 break;
5624 case NE:
5625 case LTGT:
5626 fputs ("neq", file);
5627 break;
5628 case UNGE:
5629 case GE:
5630 fputs ("nlt", file);
5631 break;
5632 case UNGT:
5633 case GT:
5634 fputs ("nle", file);
5635 break;
5636 case ORDERED:
5637 fputs ("ord", file);
5638 break;
5639 default:
5640 abort ();
5641 break;
5643 return;
5644 case 'C':
5645 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
5646 return;
5647 case 'F':
5648 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
5649 return;
5651 /* Like above, but reverse condition */
5652 case 'c':
5653 /* Check to see if argument to %c is really a constant
5654 and not a condition code which needs to be reversed. */
5655 if (GET_RTX_CLASS (GET_CODE (x)) != '<')
5657 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
5658 return;
5660 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
5661 return;
5662 case 'f':
5663 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
5664 return;
5665 case '+':
5667 rtx x;
5669 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
5670 return;
5672 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
5673 if (x)
5675 int pred_val = INTVAL (XEXP (x, 0));
5677 if (pred_val < REG_BR_PROB_BASE * 45 / 100
5678 || pred_val > REG_BR_PROB_BASE * 55 / 100)
5680 int taken = pred_val > REG_BR_PROB_BASE / 2;
5681 int cputaken = final_forward_branch_p (current_output_insn) == 0;
5683 /* Emit hints only in the case default branch prediction
5684 heruistics would fail. */
5685 if (taken != cputaken)
5687 /* We use 3e (DS) prefix for taken branches and
5688 2e (CS) prefix for not taken branches. */
5689 if (taken)
5690 fputs ("ds ; ", file);
5691 else
5692 fputs ("cs ; ", file);
5696 return;
5698 default:
5700 char str[50];
5701 sprintf (str, "invalid operand code `%c'", code);
5702 output_operand_lossage (str);
5707 if (GET_CODE (x) == REG)
5709 PRINT_REG (x, code, file);
5712 else if (GET_CODE (x) == MEM)
5714 /* No `byte ptr' prefix for call instructions. */
5715 if (ASSEMBLER_DIALECT != 0 && code != 'X' && code != 'P')
5717 const char * size;
5718 switch (GET_MODE_SIZE (GET_MODE (x)))
5720 case 1: size = "BYTE"; break;
5721 case 2: size = "WORD"; break;
5722 case 4: size = "DWORD"; break;
5723 case 8: size = "QWORD"; break;
5724 case 12: size = "XWORD"; break;
5725 case 16: size = "XMMWORD"; break;
5726 default:
5727 abort ();
5730 /* Check for explicit size override (codes 'b', 'w' and 'k') */
5731 if (code == 'b')
5732 size = "BYTE";
5733 else if (code == 'w')
5734 size = "WORD";
5735 else if (code == 'k')
5736 size = "DWORD";
5738 fputs (size, file);
5739 fputs (" PTR ", file);
5742 x = XEXP (x, 0);
5743 if (flag_pic && CONSTANT_ADDRESS_P (x))
5744 output_pic_addr_const (file, x, code);
5745 /* Avoid (%rip) for call operands. */
5746 else if (CONSTANT_ADDRESS_P (x) && code =='P'
5747 && GET_CODE (x) != CONST_INT)
5748 output_addr_const (file, x);
5749 else
5750 output_address (x);
5753 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
5755 REAL_VALUE_TYPE r;
5756 long l;
5758 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
5759 REAL_VALUE_TO_TARGET_SINGLE (r, l);
5761 if (ASSEMBLER_DIALECT == 0)
5762 putc ('$', file);
5763 fprintf (file, "0x%lx", l);
5766 /* These float cases don't actually occur as immediate operands. */
5767 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
5769 REAL_VALUE_TYPE r;
5770 char dstr[30];
5772 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
5773 REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr);
5774 fprintf (file, "%s", dstr);
5777 else if (GET_CODE (x) == CONST_DOUBLE
5778 && (GET_MODE (x) == XFmode || GET_MODE (x) == TFmode))
5780 REAL_VALUE_TYPE r;
5781 char dstr[30];
5783 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
5784 REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr);
5785 fprintf (file, "%s", dstr);
5787 else
5789 if (code != 'P')
5791 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
5793 if (ASSEMBLER_DIALECT == 0)
5794 putc ('$', file);
5796 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
5797 || GET_CODE (x) == LABEL_REF)
5799 if (ASSEMBLER_DIALECT == 0)
5800 putc ('$', file);
5801 else
5802 fputs ("OFFSET FLAT:", file);
5805 if (GET_CODE (x) == CONST_INT)
5806 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
5807 else if (flag_pic)
5808 output_pic_addr_const (file, x, code);
5809 else
5810 output_addr_const (file, x);
5814 /* Print a memory operand whose address is ADDR. */
5816 void
5817 print_operand_address (file, addr)
5818 FILE *file;
5819 register rtx addr;
5821 struct ix86_address parts;
5822 rtx base, index, disp;
5823 int scale;
5825 if (! ix86_decompose_address (addr, &parts))
5826 abort ();
5828 base = parts.base;
5829 index = parts.index;
5830 disp = parts.disp;
5831 scale = parts.scale;
5833 if (!base && !index)
5835 /* Displacement only requires special attention. */
5837 if (GET_CODE (disp) == CONST_INT)
5839 if (ASSEMBLER_DIALECT != 0)
5841 if (USER_LABEL_PREFIX[0] == 0)
5842 putc ('%', file);
5843 fputs ("ds:", file);
5845 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (addr));
5847 else if (flag_pic)
5848 output_pic_addr_const (file, addr, 0);
5849 else
5850 output_addr_const (file, addr);
5852 /* Use one byte shorter RIP relative addressing for 64bit mode. */
5853 if (GET_CODE (disp) != CONST_INT && TARGET_64BIT)
5854 fputs ("(%rip)", file);
5856 else
5858 if (ASSEMBLER_DIALECT == 0)
5860 if (disp)
5862 if (flag_pic)
5863 output_pic_addr_const (file, disp, 0);
5864 else if (GET_CODE (disp) == LABEL_REF)
5865 output_asm_label (disp);
5866 else
5867 output_addr_const (file, disp);
5870 putc ('(', file);
5871 if (base)
5872 PRINT_REG (base, 0, file);
5873 if (index)
5875 putc (',', file);
5876 PRINT_REG (index, 0, file);
5877 if (scale != 1)
5878 fprintf (file, ",%d", scale);
5880 putc (')', file);
5882 else
5884 rtx offset = NULL_RTX;
5886 if (disp)
5888 /* Pull out the offset of a symbol; print any symbol itself. */
5889 if (GET_CODE (disp) == CONST
5890 && GET_CODE (XEXP (disp, 0)) == PLUS
5891 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
5893 offset = XEXP (XEXP (disp, 0), 1);
5894 disp = gen_rtx_CONST (VOIDmode,
5895 XEXP (XEXP (disp, 0), 0));
5898 if (flag_pic)
5899 output_pic_addr_const (file, disp, 0);
5900 else if (GET_CODE (disp) == LABEL_REF)
5901 output_asm_label (disp);
5902 else if (GET_CODE (disp) == CONST_INT)
5903 offset = disp;
5904 else
5905 output_addr_const (file, disp);
5908 putc ('[', file);
5909 if (base)
5911 PRINT_REG (base, 0, file);
5912 if (offset)
5914 if (INTVAL (offset) >= 0)
5915 putc ('+', file);
5916 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
5919 else if (offset)
5920 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
5921 else
5922 putc ('0', file);
5924 if (index)
5926 putc ('+', file);
5927 PRINT_REG (index, 0, file);
5928 if (scale != 1)
5929 fprintf (file, "*%d", scale);
5931 putc (']', file);
5936 /* Split one or more DImode RTL references into pairs of SImode
5937 references. The RTL can be REG, offsettable MEM, integer constant, or
5938 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
5939 split and "num" is its length. lo_half and hi_half are output arrays
5940 that parallel "operands". */
5942 void
5943 split_di (operands, num, lo_half, hi_half)
5944 rtx operands[];
5945 int num;
5946 rtx lo_half[], hi_half[];
5948 while (num--)
5950 rtx op = operands[num];
5952 /* simplify_subreg refuse to split volatile memory addresses,
5953 but we still have to handle it. */
5954 if (GET_CODE (op) == MEM)
5956 lo_half[num] = adjust_address (op, SImode, 0);
5957 hi_half[num] = adjust_address (op, SImode, 4);
5959 else
5961 lo_half[num] = simplify_gen_subreg (SImode, op,
5962 GET_MODE (op) == VOIDmode
5963 ? DImode : GET_MODE (op), 0);
5964 hi_half[num] = simplify_gen_subreg (SImode, op,
5965 GET_MODE (op) == VOIDmode
5966 ? DImode : GET_MODE (op), 4);
5970 /* Split one or more TImode RTL references into pairs of SImode
5971 references. The RTL can be REG, offsettable MEM, integer constant, or
5972 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
5973 split and "num" is its length. lo_half and hi_half are output arrays
5974 that parallel "operands". */
5976 void
5977 split_ti (operands, num, lo_half, hi_half)
5978 rtx operands[];
5979 int num;
5980 rtx lo_half[], hi_half[];
5982 while (num--)
5984 rtx op = operands[num];
5986 /* simplify_subreg refuse to split volatile memory addresses, but we
5987 still have to handle it. */
5988 if (GET_CODE (op) == MEM)
5990 lo_half[num] = adjust_address (op, DImode, 0);
5991 hi_half[num] = adjust_address (op, DImode, 8);
5993 else
5995 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
5996 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
6001 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
6002 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
6003 is the expression of the binary operation. The output may either be
6004 emitted here, or returned to the caller, like all output_* functions.
6006 There is no guarantee that the operands are the same mode, as they
6007 might be within FLOAT or FLOAT_EXTEND expressions. */
6009 #ifndef SYSV386_COMPAT
6010 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
6011 wants to fix the assemblers because that causes incompatibility
6012 with gcc. No-one wants to fix gcc because that causes
6013 incompatibility with assemblers... You can use the option of
6014 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
6015 #define SYSV386_COMPAT 1
6016 #endif
6018 const char *
6019 output_387_binary_op (insn, operands)
6020 rtx insn;
6021 rtx *operands;
6023 static char buf[30];
6024 const char *p;
6025 const char *ssep;
6026 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
6028 #ifdef ENABLE_CHECKING
6029 /* Even if we do not want to check the inputs, this documents input
6030 constraints. Which helps in understanding the following code. */
6031 if (STACK_REG_P (operands[0])
6032 && ((REG_P (operands[1])
6033 && REGNO (operands[0]) == REGNO (operands[1])
6034 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
6035 || (REG_P (operands[2])
6036 && REGNO (operands[0]) == REGNO (operands[2])
6037 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
6038 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
6039 ; /* ok */
6040 else if (!is_sse)
6041 abort ();
6042 #endif
6044 switch (GET_CODE (operands[3]))
6046 case PLUS:
6047 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6048 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6049 p = "fiadd";
6050 else
6051 p = "fadd";
6052 ssep = "add";
6053 break;
6055 case MINUS:
6056 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6057 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6058 p = "fisub";
6059 else
6060 p = "fsub";
6061 ssep = "sub";
6062 break;
6064 case MULT:
6065 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6066 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6067 p = "fimul";
6068 else
6069 p = "fmul";
6070 ssep = "mul";
6071 break;
6073 case DIV:
6074 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6075 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6076 p = "fidiv";
6077 else
6078 p = "fdiv";
6079 ssep = "div";
6080 break;
6082 default:
6083 abort ();
6086 if (is_sse)
6088 strcpy (buf, ssep);
6089 if (GET_MODE (operands[0]) == SFmode)
6090 strcat (buf, "ss\t{%2, %0|%0, %2}");
6091 else
6092 strcat (buf, "sd\t{%2, %0|%0, %2}");
6093 return buf;
6095 strcpy (buf, p);
6097 switch (GET_CODE (operands[3]))
6099 case MULT:
6100 case PLUS:
6101 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
6103 rtx temp = operands[2];
6104 operands[2] = operands[1];
6105 operands[1] = temp;
6108 /* know operands[0] == operands[1]. */
6110 if (GET_CODE (operands[2]) == MEM)
6112 p = "%z2\t%2";
6113 break;
6116 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6118 if (STACK_TOP_P (operands[0]))
6119 /* How is it that we are storing to a dead operand[2]?
6120 Well, presumably operands[1] is dead too. We can't
6121 store the result to st(0) as st(0) gets popped on this
6122 instruction. Instead store to operands[2] (which I
6123 think has to be st(1)). st(1) will be popped later.
6124 gcc <= 2.8.1 didn't have this check and generated
6125 assembly code that the Unixware assembler rejected. */
6126 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6127 else
6128 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
6129 break;
6132 if (STACK_TOP_P (operands[0]))
6133 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
6134 else
6135 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
6136 break;
6138 case MINUS:
6139 case DIV:
6140 if (GET_CODE (operands[1]) == MEM)
6142 p = "r%z1\t%1";
6143 break;
6146 if (GET_CODE (operands[2]) == MEM)
6148 p = "%z2\t%2";
6149 break;
6152 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6154 #if SYSV386_COMPAT
6155 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
6156 derived assemblers, confusingly reverse the direction of
6157 the operation for fsub{r} and fdiv{r} when the
6158 destination register is not st(0). The Intel assembler
6159 doesn't have this brain damage. Read !SYSV386_COMPAT to
6160 figure out what the hardware really does. */
6161 if (STACK_TOP_P (operands[0]))
6162 p = "{p\t%0, %2|rp\t%2, %0}";
6163 else
6164 p = "{rp\t%2, %0|p\t%0, %2}";
6165 #else
6166 if (STACK_TOP_P (operands[0]))
6167 /* As above for fmul/fadd, we can't store to st(0). */
6168 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6169 else
6170 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
6171 #endif
6172 break;
6175 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
6177 #if SYSV386_COMPAT
6178 if (STACK_TOP_P (operands[0]))
6179 p = "{rp\t%0, %1|p\t%1, %0}";
6180 else
6181 p = "{p\t%1, %0|rp\t%0, %1}";
6182 #else
6183 if (STACK_TOP_P (operands[0]))
6184 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
6185 else
6186 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
6187 #endif
6188 break;
6191 if (STACK_TOP_P (operands[0]))
6193 if (STACK_TOP_P (operands[1]))
6194 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
6195 else
6196 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
6197 break;
6199 else if (STACK_TOP_P (operands[1]))
6201 #if SYSV386_COMPAT
6202 p = "{\t%1, %0|r\t%0, %1}";
6203 #else
6204 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
6205 #endif
6207 else
6209 #if SYSV386_COMPAT
6210 p = "{r\t%2, %0|\t%0, %2}";
6211 #else
6212 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
6213 #endif
6215 break;
6217 default:
6218 abort ();
6221 strcat (buf, p);
6222 return buf;
6225 /* Output code to initialize control word copies used by
6226 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
6227 is set to control word rounding downwards. */
6228 void
6229 emit_i387_cw_initialization (normal, round_down)
6230 rtx normal, round_down;
6232 rtx reg = gen_reg_rtx (HImode);
6234 emit_insn (gen_x86_fnstcw_1 (normal));
6235 emit_move_insn (reg, normal);
6236 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
6237 && !TARGET_64BIT)
6238 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
6239 else
6240 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00)));
6241 emit_move_insn (round_down, reg);
6244 /* Output code for INSN to convert a float to a signed int. OPERANDS
6245 are the insn operands. The output may be [HSD]Imode and the input
6246 operand may be [SDX]Fmode. */
6248 const char *
6249 output_fix_trunc (insn, operands)
6250 rtx insn;
6251 rtx *operands;
6253 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
6254 int dimode_p = GET_MODE (operands[0]) == DImode;
6256 /* Jump through a hoop or two for DImode, since the hardware has no
6257 non-popping instruction. We used to do this a different way, but
6258 that was somewhat fragile and broke with post-reload splitters. */
6259 if (dimode_p && !stack_top_dies)
6260 output_asm_insn ("fld\t%y1", operands);
6262 if (!STACK_TOP_P (operands[1]))
6263 abort ();
6265 if (GET_CODE (operands[0]) != MEM)
6266 abort ();
6268 output_asm_insn ("fldcw\t%3", operands);
6269 if (stack_top_dies || dimode_p)
6270 output_asm_insn ("fistp%z0\t%0", operands);
6271 else
6272 output_asm_insn ("fist%z0\t%0", operands);
6273 output_asm_insn ("fldcw\t%2", operands);
6275 return "";
6278 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
6279 should be used and 2 when fnstsw should be used. UNORDERED_P is true
6280 when fucom should be used. */
6282 const char *
6283 output_fp_compare (insn, operands, eflags_p, unordered_p)
6284 rtx insn;
6285 rtx *operands;
6286 int eflags_p, unordered_p;
6288 int stack_top_dies;
6289 rtx cmp_op0 = operands[0];
6290 rtx cmp_op1 = operands[1];
6291 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
6293 if (eflags_p == 2)
6295 cmp_op0 = cmp_op1;
6296 cmp_op1 = operands[2];
6298 if (is_sse)
6300 if (GET_MODE (operands[0]) == SFmode)
6301 if (unordered_p)
6302 return "ucomiss\t{%1, %0|%0, %1}";
6303 else
6304 return "comiss\t{%1, %0|%0, %y}";
6305 else
6306 if (unordered_p)
6307 return "ucomisd\t{%1, %0|%0, %1}";
6308 else
6309 return "comisd\t{%1, %0|%0, %y}";
6312 if (! STACK_TOP_P (cmp_op0))
6313 abort ();
6315 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
6317 if (STACK_REG_P (cmp_op1)
6318 && stack_top_dies
6319 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
6320 && REGNO (cmp_op1) != FIRST_STACK_REG)
6322 /* If both the top of the 387 stack dies, and the other operand
6323 is also a stack register that dies, then this must be a
6324 `fcompp' float compare */
6326 if (eflags_p == 1)
6328 /* There is no double popping fcomi variant. Fortunately,
6329 eflags is immune from the fstp's cc clobbering. */
6330 if (unordered_p)
6331 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
6332 else
6333 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
6334 return "fstp\t%y0";
6336 else
6338 if (eflags_p == 2)
6340 if (unordered_p)
6341 return "fucompp\n\tfnstsw\t%0";
6342 else
6343 return "fcompp\n\tfnstsw\t%0";
6345 else
6347 if (unordered_p)
6348 return "fucompp";
6349 else
6350 return "fcompp";
6354 else
6356 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
6358 static const char * const alt[24] =
6360 "fcom%z1\t%y1",
6361 "fcomp%z1\t%y1",
6362 "fucom%z1\t%y1",
6363 "fucomp%z1\t%y1",
6365 "ficom%z1\t%y1",
6366 "ficomp%z1\t%y1",
6367 NULL,
6368 NULL,
6370 "fcomi\t{%y1, %0|%0, %y1}",
6371 "fcomip\t{%y1, %0|%0, %y1}",
6372 "fucomi\t{%y1, %0|%0, %y1}",
6373 "fucomip\t{%y1, %0|%0, %y1}",
6375 NULL,
6376 NULL,
6377 NULL,
6378 NULL,
6380 "fcom%z2\t%y2\n\tfnstsw\t%0",
6381 "fcomp%z2\t%y2\n\tfnstsw\t%0",
6382 "fucom%z2\t%y2\n\tfnstsw\t%0",
6383 "fucomp%z2\t%y2\n\tfnstsw\t%0",
6385 "ficom%z2\t%y2\n\tfnstsw\t%0",
6386 "ficomp%z2\t%y2\n\tfnstsw\t%0",
6387 NULL,
6388 NULL
6391 int mask;
6392 const char *ret;
6394 mask = eflags_p << 3;
6395 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
6396 mask |= unordered_p << 1;
6397 mask |= stack_top_dies;
6399 if (mask >= 24)
6400 abort ();
6401 ret = alt[mask];
6402 if (ret == NULL)
6403 abort ();
6405 return ret;
6409 /* Output assembler code to FILE to initialize basic-block profiling.
6411 If profile_block_flag == 2
6413 Output code to call the subroutine `__bb_init_trace_func'
6414 and pass two parameters to it. The first parameter is
6415 the address of a block allocated in the object module.
6416 The second parameter is the number of the first basic block
6417 of the function.
6419 The name of the block is a local symbol made with this statement:
6421 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 0);
6423 Of course, since you are writing the definition of
6424 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
6425 can take a short cut in the definition of this macro and use the
6426 name that you know will result.
6428 The number of the first basic block of the function is
6429 passed to the macro in BLOCK_OR_LABEL.
6431 If described in a virtual assembler language the code to be
6432 output looks like:
6434 parameter1 <- LPBX0
6435 parameter2 <- BLOCK_OR_LABEL
6436 call __bb_init_trace_func
6438 else if profile_block_flag != 0
6440 Output code to call the subroutine `__bb_init_func'
6441 and pass one single parameter to it, which is the same
6442 as the first parameter to `__bb_init_trace_func'.
6444 The first word of this parameter is a flag which will be nonzero if
6445 the object module has already been initialized. So test this word
6446 first, and do not call `__bb_init_func' if the flag is nonzero.
6447 Note: When profile_block_flag == 2 the test need not be done
6448 but `__bb_init_trace_func' *must* be called.
6450 BLOCK_OR_LABEL may be used to generate a label number as a
6451 branch destination in case `__bb_init_func' will not be called.
6453 If described in a virtual assembler language the code to be
6454 output looks like:
6456 cmp (LPBX0),0
6457 jne local_label
6458 parameter1 <- LPBX0
6459 call __bb_init_func
6460 local_label:
6463 void
6464 ix86_output_function_block_profiler (file, block_or_label)
6465 FILE *file;
6466 int block_or_label;
6468 static int num_func = 0;
6469 rtx xops[8];
6470 char block_table[80], false_label[80];
6472 ASM_GENERATE_INTERNAL_LABEL (block_table, "LPBX", 0);
6474 xops[1] = gen_rtx_SYMBOL_REF (VOIDmode, block_table);
6475 xops[5] = stack_pointer_rtx;
6476 xops[7] = gen_rtx_REG (Pmode, 0); /* eax */
6478 CONSTANT_POOL_ADDRESS_P (xops[1]) = TRUE;
6480 switch (profile_block_flag)
6482 case 2:
6483 xops[2] = GEN_INT (block_or_label);
6484 xops[3] = gen_rtx_MEM (Pmode,
6485 gen_rtx_SYMBOL_REF (VOIDmode, "__bb_init_trace_func"));
6486 xops[6] = GEN_INT (8);
6488 output_asm_insn ("push{l}\t%2", xops);
6489 if (!flag_pic)
6490 output_asm_insn ("push{l}\t%1", xops);
6491 else
6493 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a1}", xops);
6494 output_asm_insn ("push{l}\t%7", xops);
6496 output_asm_insn ("call\t%P3", xops);
6497 output_asm_insn ("add{l}\t{%6, %5|%5, %6}", xops);
6498 break;
6500 default:
6501 ASM_GENERATE_INTERNAL_LABEL (false_label, "LPBZ", num_func);
6503 xops[0] = const0_rtx;
6504 xops[2] = gen_rtx_MEM (Pmode,
6505 gen_rtx_SYMBOL_REF (VOIDmode, false_label));
6506 xops[3] = gen_rtx_MEM (Pmode,
6507 gen_rtx_SYMBOL_REF (VOIDmode, "__bb_init_func"));
6508 xops[4] = gen_rtx_MEM (Pmode, xops[1]);
6509 xops[6] = GEN_INT (4);
6511 CONSTANT_POOL_ADDRESS_P (xops[2]) = TRUE;
6513 output_asm_insn ("cmp{l}\t{%0, %4|%4, %0}", xops);
6514 output_asm_insn ("jne\t%2", xops);
6516 if (!flag_pic)
6517 output_asm_insn ("push{l}\t%1", xops);
6518 else
6520 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a2}", xops);
6521 output_asm_insn ("push{l}\t%7", xops);
6523 output_asm_insn ("call\t%P3", xops);
6524 output_asm_insn ("add{l}\t{%6, %5|%5, %6}", xops);
6525 ASM_OUTPUT_INTERNAL_LABEL (file, "LPBZ", num_func);
6526 num_func++;
6527 break;
6531 /* Output assembler code to FILE to increment a counter associated
6532 with basic block number BLOCKNO.
6534 If profile_block_flag == 2
6536 Output code to initialize the global structure `__bb' and
6537 call the function `__bb_trace_func' which will increment the
6538 counter.
6540 `__bb' consists of two words. In the first word the number
6541 of the basic block has to be stored. In the second word
6542 the address of a block allocated in the object module
6543 has to be stored.
6545 The basic block number is given by BLOCKNO.
6547 The address of the block is given by the label created with
6549 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 0);
6551 by FUNCTION_BLOCK_PROFILER.
6553 Of course, since you are writing the definition of
6554 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
6555 can take a short cut in the definition of this macro and use the
6556 name that you know will result.
6558 If described in a virtual assembler language the code to be
6559 output looks like:
6561 move BLOCKNO -> (__bb)
6562 move LPBX0 -> (__bb+4)
6563 call __bb_trace_func
6565 Note that function `__bb_trace_func' must not change the
6566 machine state, especially the flag register. To grant
6567 this, you must output code to save and restore registers
6568 either in this macro or in the macros MACHINE_STATE_SAVE
6569 and MACHINE_STATE_RESTORE. The last two macros will be
6570 used in the function `__bb_trace_func', so you must make
6571 sure that the function prologue does not change any
6572 register prior to saving it with MACHINE_STATE_SAVE.
6574 else if profile_block_flag != 0
6576 Output code to increment the counter directly.
6577 Basic blocks are numbered separately from zero within each
6578 compiled object module. The count associated with block number
6579 BLOCKNO is at index BLOCKNO in an array of words; the name of
6580 this array is a local symbol made with this statement:
6582 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 2);
6584 Of course, since you are writing the definition of
6585 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
6586 can take a short cut in the definition of this macro and use the
6587 name that you know will result.
6589 If described in a virtual assembler language the code to be
6590 output looks like:
6592 inc (LPBX2+4*BLOCKNO)
6595 void
6596 ix86_output_block_profiler (file, blockno)
6597 FILE *file ATTRIBUTE_UNUSED;
6598 int blockno;
6600 rtx xops[8], cnt_rtx;
6601 char counts[80];
6602 char *block_table = counts;
6604 switch (profile_block_flag)
6606 case 2:
6607 ASM_GENERATE_INTERNAL_LABEL (block_table, "LPBX", 0);
6609 xops[1] = gen_rtx_SYMBOL_REF (VOIDmode, block_table);
6610 xops[2] = GEN_INT (blockno);
6611 xops[3] = gen_rtx_MEM (Pmode,
6612 gen_rtx_SYMBOL_REF (VOIDmode, "__bb_trace_func"));
6613 xops[4] = gen_rtx_SYMBOL_REF (VOIDmode, "__bb");
6614 xops[5] = plus_constant (xops[4], 4);
6615 xops[0] = gen_rtx_MEM (SImode, xops[4]);
6616 xops[6] = gen_rtx_MEM (SImode, xops[5]);
6618 CONSTANT_POOL_ADDRESS_P (xops[1]) = TRUE;
6620 output_asm_insn ("pushf", xops);
6621 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
6622 if (flag_pic)
6624 xops[7] = gen_rtx_REG (Pmode, 0); /* eax */
6625 output_asm_insn ("push{l}\t%7", xops);
6626 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a1}", xops);
6627 output_asm_insn ("mov{l}\t{%7, %6|%6, %7}", xops);
6628 output_asm_insn ("pop{l}\t%7", xops);
6630 else
6631 output_asm_insn ("mov{l}\t{%1, %6|%6, %1}", xops);
6632 output_asm_insn ("call\t%P3", xops);
6633 output_asm_insn ("popf", xops);
6635 break;
6637 default:
6638 ASM_GENERATE_INTERNAL_LABEL (counts, "LPBX", 2);
6639 cnt_rtx = gen_rtx_SYMBOL_REF (VOIDmode, counts);
6640 SYMBOL_REF_FLAG (cnt_rtx) = TRUE;
6642 if (blockno)
6643 cnt_rtx = plus_constant (cnt_rtx, blockno*4);
6645 if (flag_pic)
6646 cnt_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, cnt_rtx);
6648 xops[0] = gen_rtx_MEM (SImode, cnt_rtx);
6649 output_asm_insn ("inc{l}\t%0", xops);
6651 break;
6655 void
6656 ix86_output_addr_vec_elt (file, value)
6657 FILE *file;
6658 int value;
6660 const char *directive = ASM_LONG;
6662 if (TARGET_64BIT)
6664 #ifdef ASM_QUAD
6665 directive = ASM_QUAD;
6666 #else
6667 abort ();
6668 #endif
6671 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
6674 void
6675 ix86_output_addr_diff_elt (file, value, rel)
6676 FILE *file;
6677 int value, rel;
6679 if (TARGET_64BIT)
6680 fprintf (file, "%s%s%d-.+4+(.-%s%d)\n",
6681 ASM_LONG, LPREFIX, value, LPREFIX, rel);
6682 else if (HAVE_AS_GOTOFF_IN_DATA)
6683 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
6684 else
6685 asm_fprintf (file, "%s%U_GLOBAL_OFFSET_TABLE_+[.-%s%d]\n",
6686 ASM_LONG, LPREFIX, value);
6689 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
6690 for the target. */
6692 void
6693 ix86_expand_clear (dest)
6694 rtx dest;
6696 rtx tmp;
6698 /* We play register width games, which are only valid after reload. */
6699 if (!reload_completed)
6700 abort ();
6702 /* Avoid HImode and its attendant prefix byte. */
6703 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
6704 dest = gen_rtx_REG (SImode, REGNO (dest));
6706 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
6708 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
6709 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
6711 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
6712 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
6715 emit_insn (tmp);
6718 void
6719 ix86_expand_move (mode, operands)
6720 enum machine_mode mode;
6721 rtx operands[];
6723 int strict = (reload_in_progress || reload_completed);
6724 rtx insn;
6726 if (flag_pic && mode == Pmode && symbolic_operand (operands[1], Pmode))
6728 /* Emit insns to move operands[1] into operands[0]. */
6730 if (GET_CODE (operands[0]) == MEM)
6731 operands[1] = force_reg (Pmode, operands[1]);
6732 else
6734 rtx temp = operands[0];
6735 if (GET_CODE (temp) != REG)
6736 temp = gen_reg_rtx (Pmode);
6737 temp = legitimize_pic_address (operands[1], temp);
6738 if (temp == operands[0])
6739 return;
6740 operands[1] = temp;
6743 else
6745 if (GET_CODE (operands[0]) == MEM
6746 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
6747 || !push_operand (operands[0], mode))
6748 && GET_CODE (operands[1]) == MEM)
6749 operands[1] = force_reg (mode, operands[1]);
6751 if (push_operand (operands[0], mode)
6752 && ! general_no_elim_operand (operands[1], mode))
6753 operands[1] = copy_to_mode_reg (mode, operands[1]);
6755 /* Force large constants in 64bit compilation into register
6756 to get them CSEed. */
6757 if (TARGET_64BIT && mode == DImode
6758 && immediate_operand (operands[1], mode)
6759 && !x86_64_zero_extended_value (operands[1])
6760 && !register_operand (operands[0], mode)
6761 && optimize && !reload_completed && !reload_in_progress)
6762 operands[1] = copy_to_mode_reg (mode, operands[1]);
6764 if (FLOAT_MODE_P (mode))
6766 /* If we are loading a floating point constant to a register,
6767 force the value to memory now, since we'll get better code
6768 out the back end. */
6770 if (strict)
6772 else if (GET_CODE (operands[1]) == CONST_DOUBLE
6773 && register_operand (operands[0], mode))
6774 operands[1] = validize_mem (force_const_mem (mode, operands[1]));
6778 insn = gen_rtx_SET (VOIDmode, operands[0], operands[1]);
6780 emit_insn (insn);
6783 /* Attempt to expand a binary operator. Make the expansion closer to the
6784 actual machine, then just general_operand, which will allow 3 separate
6785 memory references (one output, two input) in a single insn. */
6787 void
6788 ix86_expand_binary_operator (code, mode, operands)
6789 enum rtx_code code;
6790 enum machine_mode mode;
6791 rtx operands[];
6793 int matching_memory;
6794 rtx src1, src2, dst, op, clob;
6796 dst = operands[0];
6797 src1 = operands[1];
6798 src2 = operands[2];
6800 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
6801 if (GET_RTX_CLASS (code) == 'c'
6802 && (rtx_equal_p (dst, src2)
6803 || immediate_operand (src1, mode)))
6805 rtx temp = src1;
6806 src1 = src2;
6807 src2 = temp;
6810 /* If the destination is memory, and we do not have matching source
6811 operands, do things in registers. */
6812 matching_memory = 0;
6813 if (GET_CODE (dst) == MEM)
6815 if (rtx_equal_p (dst, src1))
6816 matching_memory = 1;
6817 else if (GET_RTX_CLASS (code) == 'c'
6818 && rtx_equal_p (dst, src2))
6819 matching_memory = 2;
6820 else
6821 dst = gen_reg_rtx (mode);
6824 /* Both source operands cannot be in memory. */
6825 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
6827 if (matching_memory != 2)
6828 src2 = force_reg (mode, src2);
6829 else
6830 src1 = force_reg (mode, src1);
6833 /* If the operation is not commutable, source 1 cannot be a constant
6834 or non-matching memory. */
6835 if ((CONSTANT_P (src1)
6836 || (!matching_memory && GET_CODE (src1) == MEM))
6837 && GET_RTX_CLASS (code) != 'c')
6838 src1 = force_reg (mode, src1);
6840 /* If optimizing, copy to regs to improve CSE */
6841 if (optimize && ! no_new_pseudos)
6843 if (GET_CODE (dst) == MEM)
6844 dst = gen_reg_rtx (mode);
6845 if (GET_CODE (src1) == MEM)
6846 src1 = force_reg (mode, src1);
6847 if (GET_CODE (src2) == MEM)
6848 src2 = force_reg (mode, src2);
6851 /* Emit the instruction. */
6853 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
6854 if (reload_in_progress)
6856 /* Reload doesn't know about the flags register, and doesn't know that
6857 it doesn't want to clobber it. We can only do this with PLUS. */
6858 if (code != PLUS)
6859 abort ();
6860 emit_insn (op);
6862 else
6864 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
6865 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
6868 /* Fix up the destination if needed. */
6869 if (dst != operands[0])
6870 emit_move_insn (operands[0], dst);
6873 /* Return TRUE or FALSE depending on whether the binary operator meets the
6874 appropriate constraints. */
6877 ix86_binary_operator_ok (code, mode, operands)
6878 enum rtx_code code;
6879 enum machine_mode mode ATTRIBUTE_UNUSED;
6880 rtx operands[3];
6882 /* Both source operands cannot be in memory. */
6883 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
6884 return 0;
6885 /* If the operation is not commutable, source 1 cannot be a constant. */
6886 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
6887 return 0;
6888 /* If the destination is memory, we must have a matching source operand. */
6889 if (GET_CODE (operands[0]) == MEM
6890 && ! (rtx_equal_p (operands[0], operands[1])
6891 || (GET_RTX_CLASS (code) == 'c'
6892 && rtx_equal_p (operands[0], operands[2]))))
6893 return 0;
6894 /* If the operation is not commutable and the source 1 is memory, we must
6895 have a matching destionation. */
6896 if (GET_CODE (operands[1]) == MEM
6897 && GET_RTX_CLASS (code) != 'c'
6898 && ! rtx_equal_p (operands[0], operands[1]))
6899 return 0;
6900 return 1;
6903 /* Attempt to expand a unary operator. Make the expansion closer to the
6904 actual machine, then just general_operand, which will allow 2 separate
6905 memory references (one output, one input) in a single insn. */
6907 void
6908 ix86_expand_unary_operator (code, mode, operands)
6909 enum rtx_code code;
6910 enum machine_mode mode;
6911 rtx operands[];
6913 int matching_memory;
6914 rtx src, dst, op, clob;
6916 dst = operands[0];
6917 src = operands[1];
6919 /* If the destination is memory, and we do not have matching source
6920 operands, do things in registers. */
6921 matching_memory = 0;
6922 if (GET_CODE (dst) == MEM)
6924 if (rtx_equal_p (dst, src))
6925 matching_memory = 1;
6926 else
6927 dst = gen_reg_rtx (mode);
6930 /* When source operand is memory, destination must match. */
6931 if (!matching_memory && GET_CODE (src) == MEM)
6932 src = force_reg (mode, src);
6934 /* If optimizing, copy to regs to improve CSE */
6935 if (optimize && ! no_new_pseudos)
6937 if (GET_CODE (dst) == MEM)
6938 dst = gen_reg_rtx (mode);
6939 if (GET_CODE (src) == MEM)
6940 src = force_reg (mode, src);
6943 /* Emit the instruction. */
6945 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
6946 if (reload_in_progress || code == NOT)
6948 /* Reload doesn't know about the flags register, and doesn't know that
6949 it doesn't want to clobber it. */
6950 if (code != NOT)
6951 abort ();
6952 emit_insn (op);
6954 else
6956 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
6957 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
6960 /* Fix up the destination if needed. */
6961 if (dst != operands[0])
6962 emit_move_insn (operands[0], dst);
6965 /* Return TRUE or FALSE depending on whether the unary operator meets the
6966 appropriate constraints. */
6969 ix86_unary_operator_ok (code, mode, operands)
6970 enum rtx_code code ATTRIBUTE_UNUSED;
6971 enum machine_mode mode ATTRIBUTE_UNUSED;
6972 rtx operands[2] ATTRIBUTE_UNUSED;
6974 /* If one of operands is memory, source and destination must match. */
6975 if ((GET_CODE (operands[0]) == MEM
6976 || GET_CODE (operands[1]) == MEM)
6977 && ! rtx_equal_p (operands[0], operands[1]))
6978 return FALSE;
6979 return TRUE;
6982 /* Return TRUE or FALSE depending on whether the first SET in INSN
6983 has source and destination with matching CC modes, and that the
6984 CC mode is at least as constrained as REQ_MODE. */
6987 ix86_match_ccmode (insn, req_mode)
6988 rtx insn;
6989 enum machine_mode req_mode;
6991 rtx set;
6992 enum machine_mode set_mode;
6994 set = PATTERN (insn);
6995 if (GET_CODE (set) == PARALLEL)
6996 set = XVECEXP (set, 0, 0);
6997 if (GET_CODE (set) != SET)
6998 abort ();
6999 if (GET_CODE (SET_SRC (set)) != COMPARE)
7000 abort ();
7002 set_mode = GET_MODE (SET_DEST (set));
7003 switch (set_mode)
7005 case CCNOmode:
7006 if (req_mode != CCNOmode
7007 && (req_mode != CCmode
7008 || XEXP (SET_SRC (set), 1) != const0_rtx))
7009 return 0;
7010 break;
7011 case CCmode:
7012 if (req_mode == CCGCmode)
7013 return 0;
7014 /* FALLTHRU */
7015 case CCGCmode:
7016 if (req_mode == CCGOCmode || req_mode == CCNOmode)
7017 return 0;
7018 /* FALLTHRU */
7019 case CCGOCmode:
7020 if (req_mode == CCZmode)
7021 return 0;
7022 /* FALLTHRU */
7023 case CCZmode:
7024 break;
7026 default:
7027 abort ();
7030 return (GET_MODE (SET_SRC (set)) == set_mode);
7033 /* Generate insn patterns to do an integer compare of OPERANDS. */
7035 static rtx
7036 ix86_expand_int_compare (code, op0, op1)
7037 enum rtx_code code;
7038 rtx op0, op1;
7040 enum machine_mode cmpmode;
7041 rtx tmp, flags;
7043 cmpmode = SELECT_CC_MODE (code, op0, op1);
7044 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
7046 /* This is very simple, but making the interface the same as in the
7047 FP case makes the rest of the code easier. */
7048 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
7049 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
7051 /* Return the test that should be put into the flags user, i.e.
7052 the bcc, scc, or cmov instruction. */
7053 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
7056 /* Figure out whether to use ordered or unordered fp comparisons.
7057 Return the appropriate mode to use. */
7059 enum machine_mode
7060 ix86_fp_compare_mode (code)
7061 enum rtx_code code ATTRIBUTE_UNUSED;
7063 /* ??? In order to make all comparisons reversible, we do all comparisons
7064 non-trapping when compiling for IEEE. Once gcc is able to distinguish
7065 all forms trapping and nontrapping comparisons, we can make inequality
7066 comparisons trapping again, since it results in better code when using
7067 FCOM based compares. */
7068 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
7071 enum machine_mode
7072 ix86_cc_mode (code, op0, op1)
7073 enum rtx_code code;
7074 rtx op0, op1;
7076 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
7077 return ix86_fp_compare_mode (code);
7078 switch (code)
7080 /* Only zero flag is needed. */
7081 case EQ: /* ZF=0 */
7082 case NE: /* ZF!=0 */
7083 return CCZmode;
7084 /* Codes needing carry flag. */
7085 case GEU: /* CF=0 */
7086 case GTU: /* CF=0 & ZF=0 */
7087 case LTU: /* CF=1 */
7088 case LEU: /* CF=1 | ZF=1 */
7089 return CCmode;
7090 /* Codes possibly doable only with sign flag when
7091 comparing against zero. */
7092 case GE: /* SF=OF or SF=0 */
7093 case LT: /* SF<>OF or SF=1 */
7094 if (op1 == const0_rtx)
7095 return CCGOCmode;
7096 else
7097 /* For other cases Carry flag is not required. */
7098 return CCGCmode;
7099 /* Codes doable only with sign flag when comparing
7100 against zero, but we miss jump instruction for it
7101 so we need to use relational tests agains overflow
7102 that thus needs to be zero. */
7103 case GT: /* ZF=0 & SF=OF */
7104 case LE: /* ZF=1 | SF<>OF */
7105 if (op1 == const0_rtx)
7106 return CCNOmode;
7107 else
7108 return CCGCmode;
7109 /* strcmp pattern do (use flags) and combine may ask us for proper
7110 mode. */
7111 case USE:
7112 return CCmode;
7113 default:
7114 abort ();
7118 /* Return true if we should use an FCOMI instruction for this fp comparison. */
7121 ix86_use_fcomi_compare (code)
7122 enum rtx_code code ATTRIBUTE_UNUSED;
7124 enum rtx_code swapped_code = swap_condition (code);
7125 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
7126 || (ix86_fp_comparison_cost (swapped_code)
7127 == ix86_fp_comparison_fcomi_cost (swapped_code)));
7130 /* Swap, force into registers, or otherwise massage the two operands
7131 to a fp comparison. The operands are updated in place; the new
7132 comparsion code is returned. */
7134 static enum rtx_code
7135 ix86_prepare_fp_compare_args (code, pop0, pop1)
7136 enum rtx_code code;
7137 rtx *pop0, *pop1;
7139 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
7140 rtx op0 = *pop0, op1 = *pop1;
7141 enum machine_mode op_mode = GET_MODE (op0);
7142 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
7144 /* All of the unordered compare instructions only work on registers.
7145 The same is true of the XFmode compare instructions. The same is
7146 true of the fcomi compare instructions. */
7148 if (!is_sse
7149 && (fpcmp_mode == CCFPUmode
7150 || op_mode == XFmode
7151 || op_mode == TFmode
7152 || ix86_use_fcomi_compare (code)))
7154 op0 = force_reg (op_mode, op0);
7155 op1 = force_reg (op_mode, op1);
7157 else
7159 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
7160 things around if they appear profitable, otherwise force op0
7161 into a register. */
7163 if (standard_80387_constant_p (op0) == 0
7164 || (GET_CODE (op0) == MEM
7165 && ! (standard_80387_constant_p (op1) == 0
7166 || GET_CODE (op1) == MEM)))
7168 rtx tmp;
7169 tmp = op0, op0 = op1, op1 = tmp;
7170 code = swap_condition (code);
7173 if (GET_CODE (op0) != REG)
7174 op0 = force_reg (op_mode, op0);
7176 if (CONSTANT_P (op1))
7178 if (standard_80387_constant_p (op1))
7179 op1 = force_reg (op_mode, op1);
7180 else
7181 op1 = validize_mem (force_const_mem (op_mode, op1));
7185 /* Try to rearrange the comparison to make it cheaper. */
7186 if (ix86_fp_comparison_cost (code)
7187 > ix86_fp_comparison_cost (swap_condition (code))
7188 && (GET_CODE (op0) == REG || !reload_completed))
7190 rtx tmp;
7191 tmp = op0, op0 = op1, op1 = tmp;
7192 code = swap_condition (code);
7193 if (GET_CODE (op0) != REG)
7194 op0 = force_reg (op_mode, op0);
7197 *pop0 = op0;
7198 *pop1 = op1;
7199 return code;
7202 /* Convert comparison codes we use to represent FP comparison to integer
7203 code that will result in proper branch. Return UNKNOWN if no such code
7204 is available. */
7205 static enum rtx_code
7206 ix86_fp_compare_code_to_integer (code)
7207 enum rtx_code code;
7209 switch (code)
7211 case GT:
7212 return GTU;
7213 case GE:
7214 return GEU;
7215 case ORDERED:
7216 case UNORDERED:
7217 return code;
7218 break;
7219 case UNEQ:
7220 return EQ;
7221 break;
7222 case UNLT:
7223 return LTU;
7224 break;
7225 case UNLE:
7226 return LEU;
7227 break;
7228 case LTGT:
7229 return NE;
7230 break;
7231 default:
7232 return UNKNOWN;
7236 /* Split comparison code CODE into comparisons we can do using branch
7237 instructions. BYPASS_CODE is comparison code for branch that will
7238 branch around FIRST_CODE and SECOND_CODE. If some of branches
7239 is not required, set value to NIL.
7240 We never require more than two branches. */
7241 static void
7242 ix86_fp_comparison_codes (code, bypass_code, first_code, second_code)
7243 enum rtx_code code, *bypass_code, *first_code, *second_code;
7245 *first_code = code;
7246 *bypass_code = NIL;
7247 *second_code = NIL;
7249 /* The fcomi comparison sets flags as follows:
7251 cmp ZF PF CF
7252 > 0 0 0
7253 < 0 0 1
7254 = 1 0 0
7255 un 1 1 1 */
7257 switch (code)
7259 case GT: /* GTU - CF=0 & ZF=0 */
7260 case GE: /* GEU - CF=0 */
7261 case ORDERED: /* PF=0 */
7262 case UNORDERED: /* PF=1 */
7263 case UNEQ: /* EQ - ZF=1 */
7264 case UNLT: /* LTU - CF=1 */
7265 case UNLE: /* LEU - CF=1 | ZF=1 */
7266 case LTGT: /* EQ - ZF=0 */
7267 break;
7268 case LT: /* LTU - CF=1 - fails on unordered */
7269 *first_code = UNLT;
7270 *bypass_code = UNORDERED;
7271 break;
7272 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
7273 *first_code = UNLE;
7274 *bypass_code = UNORDERED;
7275 break;
7276 case EQ: /* EQ - ZF=1 - fails on unordered */
7277 *first_code = UNEQ;
7278 *bypass_code = UNORDERED;
7279 break;
7280 case NE: /* NE - ZF=0 - fails on unordered */
7281 *first_code = LTGT;
7282 *second_code = UNORDERED;
7283 break;
7284 case UNGE: /* GEU - CF=0 - fails on unordered */
7285 *first_code = GE;
7286 *second_code = UNORDERED;
7287 break;
7288 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
7289 *first_code = GT;
7290 *second_code = UNORDERED;
7291 break;
7292 default:
7293 abort ();
7295 if (!TARGET_IEEE_FP)
7297 *second_code = NIL;
7298 *bypass_code = NIL;
7302 /* Return cost of comparison done fcom + arithmetics operations on AX.
7303 All following functions do use number of instructions as an cost metrics.
7304 In future this should be tweaked to compute bytes for optimize_size and
7305 take into account performance of various instructions on various CPUs. */
7306 static int
7307 ix86_fp_comparison_arithmetics_cost (code)
7308 enum rtx_code code;
7310 if (!TARGET_IEEE_FP)
7311 return 4;
7312 /* The cost of code output by ix86_expand_fp_compare. */
7313 switch (code)
7315 case UNLE:
7316 case UNLT:
7317 case LTGT:
7318 case GT:
7319 case GE:
7320 case UNORDERED:
7321 case ORDERED:
7322 case UNEQ:
7323 return 4;
7324 break;
7325 case LT:
7326 case NE:
7327 case EQ:
7328 case UNGE:
7329 return 5;
7330 break;
7331 case LE:
7332 case UNGT:
7333 return 6;
7334 break;
7335 default:
7336 abort ();
7340 /* Return cost of comparison done using fcomi operation.
7341 See ix86_fp_comparison_arithmetics_cost for the metrics. */
7342 static int
7343 ix86_fp_comparison_fcomi_cost (code)
7344 enum rtx_code code;
7346 enum rtx_code bypass_code, first_code, second_code;
7347 /* Return arbitarily high cost when instruction is not supported - this
7348 prevents gcc from using it. */
7349 if (!TARGET_CMOVE)
7350 return 1024;
7351 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
7352 return (bypass_code != NIL || second_code != NIL) + 2;
7355 /* Return cost of comparison done using sahf operation.
7356 See ix86_fp_comparison_arithmetics_cost for the metrics. */
7357 static int
7358 ix86_fp_comparison_sahf_cost (code)
7359 enum rtx_code code;
7361 enum rtx_code bypass_code, first_code, second_code;
7362 /* Return arbitarily high cost when instruction is not preferred - this
7363 avoids gcc from using it. */
7364 if (!TARGET_USE_SAHF && !optimize_size)
7365 return 1024;
7366 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
7367 return (bypass_code != NIL || second_code != NIL) + 3;
7370 /* Compute cost of the comparison done using any method.
7371 See ix86_fp_comparison_arithmetics_cost for the metrics. */
7372 static int
7373 ix86_fp_comparison_cost (code)
7374 enum rtx_code code;
7376 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
7377 int min;
7379 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
7380 sahf_cost = ix86_fp_comparison_sahf_cost (code);
7382 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
7383 if (min > sahf_cost)
7384 min = sahf_cost;
7385 if (min > fcomi_cost)
7386 min = fcomi_cost;
7387 return min;
7390 /* Generate insn patterns to do a floating point compare of OPERANDS. */
7392 static rtx
7393 ix86_expand_fp_compare (code, op0, op1, scratch, second_test, bypass_test)
7394 enum rtx_code code;
7395 rtx op0, op1, scratch;
7396 rtx *second_test;
7397 rtx *bypass_test;
7399 enum machine_mode fpcmp_mode, intcmp_mode;
7400 rtx tmp, tmp2;
7401 int cost = ix86_fp_comparison_cost (code);
7402 enum rtx_code bypass_code, first_code, second_code;
7404 fpcmp_mode = ix86_fp_compare_mode (code);
7405 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
7407 if (second_test)
7408 *second_test = NULL_RTX;
7409 if (bypass_test)
7410 *bypass_test = NULL_RTX;
7412 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
7414 /* Do fcomi/sahf based test when profitable. */
7415 if ((bypass_code == NIL || bypass_test)
7416 && (second_code == NIL || second_test)
7417 && ix86_fp_comparison_arithmetics_cost (code) > cost)
7419 if (TARGET_CMOVE)
7421 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
7422 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
7423 tmp);
7424 emit_insn (tmp);
7426 else
7428 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
7429 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), 9);
7430 if (!scratch)
7431 scratch = gen_reg_rtx (HImode);
7432 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
7433 emit_insn (gen_x86_sahf_1 (scratch));
7436 /* The FP codes work out to act like unsigned. */
7437 intcmp_mode = fpcmp_mode;
7438 code = first_code;
7439 if (bypass_code != NIL)
7440 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
7441 gen_rtx_REG (intcmp_mode, FLAGS_REG),
7442 const0_rtx);
7443 if (second_code != NIL)
7444 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
7445 gen_rtx_REG (intcmp_mode, FLAGS_REG),
7446 const0_rtx);
7448 else
7450 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
7451 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
7452 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), 9);
7453 if (!scratch)
7454 scratch = gen_reg_rtx (HImode);
7455 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
7457 /* In the unordered case, we have to check C2 for NaN's, which
7458 doesn't happen to work out to anything nice combination-wise.
7459 So do some bit twiddling on the value we've got in AH to come
7460 up with an appropriate set of condition codes. */
7462 intcmp_mode = CCNOmode;
7463 switch (code)
7465 case GT:
7466 case UNGT:
7467 if (code == GT || !TARGET_IEEE_FP)
7469 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
7470 code = EQ;
7472 else
7474 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
7475 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
7476 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
7477 intcmp_mode = CCmode;
7478 code = GEU;
7480 break;
7481 case LT:
7482 case UNLT:
7483 if (code == LT && TARGET_IEEE_FP)
7485 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
7486 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
7487 intcmp_mode = CCmode;
7488 code = EQ;
7490 else
7492 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
7493 code = NE;
7495 break;
7496 case GE:
7497 case UNGE:
7498 if (code == GE || !TARGET_IEEE_FP)
7500 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
7501 code = EQ;
7503 else
7505 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
7506 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
7507 GEN_INT (0x01)));
7508 code = NE;
7510 break;
7511 case LE:
7512 case UNLE:
7513 if (code == LE && TARGET_IEEE_FP)
7515 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
7516 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
7517 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
7518 intcmp_mode = CCmode;
7519 code = LTU;
7521 else
7523 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
7524 code = NE;
7526 break;
7527 case EQ:
7528 case UNEQ:
7529 if (code == EQ && TARGET_IEEE_FP)
7531 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
7532 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
7533 intcmp_mode = CCmode;
7534 code = EQ;
7536 else
7538 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
7539 code = NE;
7540 break;
7542 break;
7543 case NE:
7544 case LTGT:
7545 if (code == NE && TARGET_IEEE_FP)
7547 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
7548 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
7549 GEN_INT (0x40)));
7550 code = NE;
7552 else
7554 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
7555 code = EQ;
7557 break;
7559 case UNORDERED:
7560 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
7561 code = NE;
7562 break;
7563 case ORDERED:
7564 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
7565 code = EQ;
7566 break;
7568 default:
7569 abort ();
7573 /* Return the test that should be put into the flags user, i.e.
7574 the bcc, scc, or cmov instruction. */
7575 return gen_rtx_fmt_ee (code, VOIDmode,
7576 gen_rtx_REG (intcmp_mode, FLAGS_REG),
7577 const0_rtx);
7581 ix86_expand_compare (code, second_test, bypass_test)
7582 enum rtx_code code;
7583 rtx *second_test, *bypass_test;
7585 rtx op0, op1, ret;
7586 op0 = ix86_compare_op0;
7587 op1 = ix86_compare_op1;
7589 if (second_test)
7590 *second_test = NULL_RTX;
7591 if (bypass_test)
7592 *bypass_test = NULL_RTX;
7594 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
7595 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
7596 second_test, bypass_test);
7597 else
7598 ret = ix86_expand_int_compare (code, op0, op1);
7600 return ret;
7603 /* Return true if the CODE will result in nontrivial jump sequence. */
7604 bool
7605 ix86_fp_jump_nontrivial_p (code)
7606 enum rtx_code code;
7608 enum rtx_code bypass_code, first_code, second_code;
7609 if (!TARGET_CMOVE)
7610 return true;
7611 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
7612 return bypass_code != NIL || second_code != NIL;
7615 void
7616 ix86_expand_branch (code, label)
7617 enum rtx_code code;
7618 rtx label;
7620 rtx tmp;
7622 switch (GET_MODE (ix86_compare_op0))
7624 case QImode:
7625 case HImode:
7626 case SImode:
7627 simple:
7628 tmp = ix86_expand_compare (code, NULL, NULL);
7629 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
7630 gen_rtx_LABEL_REF (VOIDmode, label),
7631 pc_rtx);
7632 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
7633 return;
7635 case SFmode:
7636 case DFmode:
7637 case XFmode:
7638 case TFmode:
7640 rtvec vec;
7641 int use_fcomi;
7642 enum rtx_code bypass_code, first_code, second_code;
7644 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
7645 &ix86_compare_op1);
7647 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
7649 /* Check whether we will use the natural sequence with one jump. If
7650 so, we can expand jump early. Otherwise delay expansion by
7651 creating compound insn to not confuse optimizers. */
7652 if (bypass_code == NIL && second_code == NIL
7653 && TARGET_CMOVE)
7655 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
7656 gen_rtx_LABEL_REF (VOIDmode, label),
7657 pc_rtx, NULL_RTX);
7659 else
7661 tmp = gen_rtx_fmt_ee (code, VOIDmode,
7662 ix86_compare_op0, ix86_compare_op1);
7663 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
7664 gen_rtx_LABEL_REF (VOIDmode, label),
7665 pc_rtx);
7666 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
7668 use_fcomi = ix86_use_fcomi_compare (code);
7669 vec = rtvec_alloc (3 + !use_fcomi);
7670 RTVEC_ELT (vec, 0) = tmp;
7671 RTVEC_ELT (vec, 1)
7672 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
7673 RTVEC_ELT (vec, 2)
7674 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
7675 if (! use_fcomi)
7676 RTVEC_ELT (vec, 3)
7677 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
7679 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
7681 return;
7684 case DImode:
7685 if (TARGET_64BIT)
7686 goto simple;
7687 /* Expand DImode branch into multiple compare+branch. */
7689 rtx lo[2], hi[2], label2;
7690 enum rtx_code code1, code2, code3;
7692 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
7694 tmp = ix86_compare_op0;
7695 ix86_compare_op0 = ix86_compare_op1;
7696 ix86_compare_op1 = tmp;
7697 code = swap_condition (code);
7699 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
7700 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
7702 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
7703 avoid two branches. This costs one extra insn, so disable when
7704 optimizing for size. */
7706 if ((code == EQ || code == NE)
7707 && (!optimize_size
7708 || hi[1] == const0_rtx || lo[1] == const0_rtx))
7710 rtx xor0, xor1;
7712 xor1 = hi[0];
7713 if (hi[1] != const0_rtx)
7714 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
7715 NULL_RTX, 0, OPTAB_WIDEN);
7717 xor0 = lo[0];
7718 if (lo[1] != const0_rtx)
7719 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
7720 NULL_RTX, 0, OPTAB_WIDEN);
7722 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
7723 NULL_RTX, 0, OPTAB_WIDEN);
7725 ix86_compare_op0 = tmp;
7726 ix86_compare_op1 = const0_rtx;
7727 ix86_expand_branch (code, label);
7728 return;
7731 /* Otherwise, if we are doing less-than or greater-or-equal-than,
7732 op1 is a constant and the low word is zero, then we can just
7733 examine the high word. */
7735 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
7736 switch (code)
7738 case LT: case LTU: case GE: case GEU:
7739 ix86_compare_op0 = hi[0];
7740 ix86_compare_op1 = hi[1];
7741 ix86_expand_branch (code, label);
7742 return;
7743 default:
7744 break;
7747 /* Otherwise, we need two or three jumps. */
7749 label2 = gen_label_rtx ();
7751 code1 = code;
7752 code2 = swap_condition (code);
7753 code3 = unsigned_condition (code);
7755 switch (code)
7757 case LT: case GT: case LTU: case GTU:
7758 break;
7760 case LE: code1 = LT; code2 = GT; break;
7761 case GE: code1 = GT; code2 = LT; break;
7762 case LEU: code1 = LTU; code2 = GTU; break;
7763 case GEU: code1 = GTU; code2 = LTU; break;
7765 case EQ: code1 = NIL; code2 = NE; break;
7766 case NE: code2 = NIL; break;
7768 default:
7769 abort ();
7773 * a < b =>
7774 * if (hi(a) < hi(b)) goto true;
7775 * if (hi(a) > hi(b)) goto false;
7776 * if (lo(a) < lo(b)) goto true;
7777 * false:
7780 ix86_compare_op0 = hi[0];
7781 ix86_compare_op1 = hi[1];
7783 if (code1 != NIL)
7784 ix86_expand_branch (code1, label);
7785 if (code2 != NIL)
7786 ix86_expand_branch (code2, label2);
7788 ix86_compare_op0 = lo[0];
7789 ix86_compare_op1 = lo[1];
7790 ix86_expand_branch (code3, label);
7792 if (code2 != NIL)
7793 emit_label (label2);
7794 return;
7797 default:
7798 abort ();
7802 /* Split branch based on floating point condition. */
7803 void
7804 ix86_split_fp_branch (code, op1, op2, target1, target2, tmp)
7805 enum rtx_code code;
7806 rtx op1, op2, target1, target2, tmp;
7808 rtx second, bypass;
7809 rtx label = NULL_RTX;
7810 rtx condition;
7811 int bypass_probability = -1, second_probability = -1, probability = -1;
7812 rtx i;
7814 if (target2 != pc_rtx)
7816 rtx tmp = target2;
7817 code = reverse_condition_maybe_unordered (code);
7818 target2 = target1;
7819 target1 = tmp;
7822 condition = ix86_expand_fp_compare (code, op1, op2,
7823 tmp, &second, &bypass);
7825 if (split_branch_probability >= 0)
7827 /* Distribute the probabilities across the jumps.
7828 Assume the BYPASS and SECOND to be always test
7829 for UNORDERED. */
7830 probability = split_branch_probability;
7832 /* Value of 1 is low enought to make no need for probability
7833 to be updated. Later we may run some experiments and see
7834 if unordered values are more frequent in practice. */
7835 if (bypass)
7836 bypass_probability = 1;
7837 if (second)
7838 second_probability = 1;
7840 if (bypass != NULL_RTX)
7842 label = gen_label_rtx ();
7843 i = emit_jump_insn (gen_rtx_SET
7844 (VOIDmode, pc_rtx,
7845 gen_rtx_IF_THEN_ELSE (VOIDmode,
7846 bypass,
7847 gen_rtx_LABEL_REF (VOIDmode,
7848 label),
7849 pc_rtx)));
7850 if (bypass_probability >= 0)
7851 REG_NOTES (i)
7852 = gen_rtx_EXPR_LIST (REG_BR_PROB,
7853 GEN_INT (bypass_probability),
7854 REG_NOTES (i));
7856 i = emit_jump_insn (gen_rtx_SET
7857 (VOIDmode, pc_rtx,
7858 gen_rtx_IF_THEN_ELSE (VOIDmode,
7859 condition, target1, target2)));
7860 if (probability >= 0)
7861 REG_NOTES (i)
7862 = gen_rtx_EXPR_LIST (REG_BR_PROB,
7863 GEN_INT (probability),
7864 REG_NOTES (i));
7865 if (second != NULL_RTX)
7867 i = emit_jump_insn (gen_rtx_SET
7868 (VOIDmode, pc_rtx,
7869 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
7870 target2)));
7871 if (second_probability >= 0)
7872 REG_NOTES (i)
7873 = gen_rtx_EXPR_LIST (REG_BR_PROB,
7874 GEN_INT (second_probability),
7875 REG_NOTES (i));
7877 if (label != NULL_RTX)
7878 emit_label (label);
7882 ix86_expand_setcc (code, dest)
7883 enum rtx_code code;
7884 rtx dest;
7886 rtx ret, tmp, tmpreg;
7887 rtx second_test, bypass_test;
7889 if (GET_MODE (ix86_compare_op0) == DImode
7890 && !TARGET_64BIT)
7891 return 0; /* FAIL */
7893 if (GET_MODE (dest) != QImode)
7894 abort ();
7896 ret = ix86_expand_compare (code, &second_test, &bypass_test);
7897 PUT_MODE (ret, QImode);
7899 tmp = dest;
7900 tmpreg = dest;
7902 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
7903 if (bypass_test || second_test)
7905 rtx test = second_test;
7906 int bypass = 0;
7907 rtx tmp2 = gen_reg_rtx (QImode);
7908 if (bypass_test)
7910 if (second_test)
7911 abort();
7912 test = bypass_test;
7913 bypass = 1;
7914 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
7916 PUT_MODE (test, QImode);
7917 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
7919 if (bypass)
7920 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
7921 else
7922 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
7925 return 1; /* DONE */
7929 ix86_expand_int_movcc (operands)
7930 rtx operands[];
7932 enum rtx_code code = GET_CODE (operands[1]), compare_code;
7933 rtx compare_seq, compare_op;
7934 rtx second_test, bypass_test;
7935 enum machine_mode mode = GET_MODE (operands[0]);
7937 /* When the compare code is not LTU or GEU, we can not use sbbl case.
7938 In case comparsion is done with immediate, we can convert it to LTU or
7939 GEU by altering the integer. */
7941 if ((code == LEU || code == GTU)
7942 && GET_CODE (ix86_compare_op1) == CONST_INT
7943 && mode != HImode
7944 && (unsigned int)INTVAL (ix86_compare_op1) != 0xffffffff
7945 && GET_CODE (operands[2]) == CONST_INT
7946 && GET_CODE (operands[3]) == CONST_INT)
7948 if (code == LEU)
7949 code = LTU;
7950 else
7951 code = GEU;
7952 ix86_compare_op1 = GEN_INT (INTVAL (ix86_compare_op1) + 1);
7955 start_sequence ();
7956 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
7957 compare_seq = gen_sequence ();
7958 end_sequence ();
7960 compare_code = GET_CODE (compare_op);
7962 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
7963 HImode insns, we'd be swallowed in word prefix ops. */
7965 if (mode != HImode
7966 && (mode != DImode || TARGET_64BIT)
7967 && GET_CODE (operands[2]) == CONST_INT
7968 && GET_CODE (operands[3]) == CONST_INT)
7970 rtx out = operands[0];
7971 HOST_WIDE_INT ct = INTVAL (operands[2]);
7972 HOST_WIDE_INT cf = INTVAL (operands[3]);
7973 HOST_WIDE_INT diff;
7975 if ((compare_code == LTU || compare_code == GEU)
7976 && !second_test && !bypass_test)
7979 /* Detect overlap between destination and compare sources. */
7980 rtx tmp = out;
7982 /* To simplify rest of code, restrict to the GEU case. */
7983 if (compare_code == LTU)
7985 int tmp = ct;
7986 ct = cf;
7987 cf = tmp;
7988 compare_code = reverse_condition (compare_code);
7989 code = reverse_condition (code);
7991 diff = ct - cf;
7993 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
7994 || reg_overlap_mentioned_p (out, ix86_compare_op1))
7995 tmp = gen_reg_rtx (mode);
7997 emit_insn (compare_seq);
7998 if (mode == DImode)
7999 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp));
8000 else
8001 emit_insn (gen_x86_movsicc_0_m1 (tmp));
8003 if (diff == 1)
8006 * cmpl op0,op1
8007 * sbbl dest,dest
8008 * [addl dest, ct]
8010 * Size 5 - 8.
8012 if (ct)
8013 tmp = expand_simple_binop (mode, PLUS,
8014 tmp, GEN_INT (ct),
8015 tmp, 1, OPTAB_DIRECT);
8017 else if (cf == -1)
8020 * cmpl op0,op1
8021 * sbbl dest,dest
8022 * orl $ct, dest
8024 * Size 8.
8026 tmp = expand_simple_binop (mode, IOR,
8027 tmp, GEN_INT (ct),
8028 tmp, 1, OPTAB_DIRECT);
8030 else if (diff == -1 && ct)
8033 * cmpl op0,op1
8034 * sbbl dest,dest
8035 * xorl $-1, dest
8036 * [addl dest, cf]
8038 * Size 8 - 11.
8040 tmp = expand_simple_unop (mode, NOT, tmp, tmp, 1);
8041 if (cf)
8042 tmp = expand_simple_binop (mode, PLUS,
8043 tmp, GEN_INT (cf),
8044 tmp, 1, OPTAB_DIRECT);
8046 else
8049 * cmpl op0,op1
8050 * sbbl dest,dest
8051 * andl cf - ct, dest
8052 * [addl dest, ct]
8054 * Size 8 - 11.
8056 tmp = expand_simple_binop (mode, AND,
8057 tmp,
8058 GEN_INT (trunc_int_for_mode
8059 (cf - ct, mode)),
8060 tmp, 1, OPTAB_DIRECT);
8061 if (ct)
8062 tmp = expand_simple_binop (mode, PLUS,
8063 tmp, GEN_INT (ct),
8064 tmp, 1, OPTAB_DIRECT);
8067 if (tmp != out)
8068 emit_move_insn (out, tmp);
8070 return 1; /* DONE */
8073 diff = ct - cf;
8074 if (diff < 0)
8076 HOST_WIDE_INT tmp;
8077 tmp = ct, ct = cf, cf = tmp;
8078 diff = -diff;
8079 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
8081 /* We may be reversing unordered compare to normal compare, that
8082 is not valid in general (we may convert non-trapping condition
8083 to trapping one), however on i386 we currently emit all
8084 comparisons unordered. */
8085 compare_code = reverse_condition_maybe_unordered (compare_code);
8086 code = reverse_condition_maybe_unordered (code);
8088 else
8090 compare_code = reverse_condition (compare_code);
8091 code = reverse_condition (code);
8094 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
8095 || diff == 3 || diff == 5 || diff == 9)
8096 && (mode != DImode || x86_64_sign_extended_value (GEN_INT (cf))))
8099 * xorl dest,dest
8100 * cmpl op1,op2
8101 * setcc dest
8102 * lea cf(dest*(ct-cf)),dest
8104 * Size 14.
8106 * This also catches the degenerate setcc-only case.
8109 rtx tmp;
8110 int nops;
8112 out = emit_store_flag (out, code, ix86_compare_op0,
8113 ix86_compare_op1, VOIDmode, 0, 1);
8115 nops = 0;
8116 /* On x86_64 the lea instruction operates on Pmode, so we need to get arithmetics
8117 done in proper mode to match. */
8118 if (diff == 1)
8119 tmp = out;
8120 else
8122 rtx out1;
8123 out1 = out;
8124 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
8125 nops++;
8126 if (diff & 1)
8128 tmp = gen_rtx_PLUS (mode, tmp, out1);
8129 nops++;
8132 if (cf != 0)
8134 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
8135 nops++;
8137 if (tmp != out
8138 && (GET_CODE (tmp) != SUBREG || SUBREG_REG (tmp) != out))
8140 if (nops == 1)
8142 rtx clob;
8144 clob = gen_rtx_REG (CCmode, FLAGS_REG);
8145 clob = gen_rtx_CLOBBER (VOIDmode, clob);
8147 tmp = gen_rtx_SET (VOIDmode, out, tmp);
8148 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
8149 emit_insn (tmp);
8151 else
8152 emit_insn (gen_rtx_SET (VOIDmode, out, tmp));
8154 if (out != operands[0])
8155 emit_move_insn (operands[0], out);
8157 return 1; /* DONE */
8161 * General case: Jumpful:
8162 * xorl dest,dest cmpl op1, op2
8163 * cmpl op1, op2 movl ct, dest
8164 * setcc dest jcc 1f
8165 * decl dest movl cf, dest
8166 * andl (cf-ct),dest 1:
8167 * addl ct,dest
8169 * Size 20. Size 14.
8171 * This is reasonably steep, but branch mispredict costs are
8172 * high on modern cpus, so consider failing only if optimizing
8173 * for space.
8175 * %%% Parameterize branch_cost on the tuning architecture, then
8176 * use that. The 80386 couldn't care less about mispredicts.
8179 if (!optimize_size && !TARGET_CMOVE)
8181 if (ct == 0)
8183 ct = cf;
8184 cf = 0;
8185 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
8187 /* We may be reversing unordered compare to normal compare,
8188 that is not valid in general (we may convert non-trapping
8189 condition to trapping one), however on i386 we currently
8190 emit all comparisons unordered. */
8191 compare_code = reverse_condition_maybe_unordered (compare_code);
8192 code = reverse_condition_maybe_unordered (code);
8194 else
8196 compare_code = reverse_condition (compare_code);
8197 code = reverse_condition (code);
8201 out = emit_store_flag (out, code, ix86_compare_op0,
8202 ix86_compare_op1, VOIDmode, 0, 1);
8204 out = expand_simple_binop (mode, PLUS,
8205 out, constm1_rtx,
8206 out, 1, OPTAB_DIRECT);
8207 out = expand_simple_binop (mode, AND,
8208 out,
8209 GEN_INT (trunc_int_for_mode
8210 (cf - ct, mode)),
8211 out, 1, OPTAB_DIRECT);
8212 out = expand_simple_binop (mode, PLUS,
8213 out, GEN_INT (ct),
8214 out, 1, OPTAB_DIRECT);
8215 if (out != operands[0])
8216 emit_move_insn (operands[0], out);
8218 return 1; /* DONE */
8222 if (!TARGET_CMOVE)
8224 /* Try a few things more with specific constants and a variable. */
8226 optab op;
8227 rtx var, orig_out, out, tmp;
8229 if (optimize_size)
8230 return 0; /* FAIL */
8232 /* If one of the two operands is an interesting constant, load a
8233 constant with the above and mask it in with a logical operation. */
8235 if (GET_CODE (operands[2]) == CONST_INT)
8237 var = operands[3];
8238 if (INTVAL (operands[2]) == 0)
8239 operands[3] = constm1_rtx, op = and_optab;
8240 else if (INTVAL (operands[2]) == -1)
8241 operands[3] = const0_rtx, op = ior_optab;
8242 else
8243 return 0; /* FAIL */
8245 else if (GET_CODE (operands[3]) == CONST_INT)
8247 var = operands[2];
8248 if (INTVAL (operands[3]) == 0)
8249 operands[2] = constm1_rtx, op = and_optab;
8250 else if (INTVAL (operands[3]) == -1)
8251 operands[2] = const0_rtx, op = ior_optab;
8252 else
8253 return 0; /* FAIL */
8255 else
8256 return 0; /* FAIL */
8258 orig_out = operands[0];
8259 tmp = gen_reg_rtx (mode);
8260 operands[0] = tmp;
8262 /* Recurse to get the constant loaded. */
8263 if (ix86_expand_int_movcc (operands) == 0)
8264 return 0; /* FAIL */
8266 /* Mask in the interesting variable. */
8267 out = expand_binop (mode, op, var, tmp, orig_out, 0,
8268 OPTAB_WIDEN);
8269 if (out != orig_out)
8270 emit_move_insn (orig_out, out);
8272 return 1; /* DONE */
8276 * For comparison with above,
8278 * movl cf,dest
8279 * movl ct,tmp
8280 * cmpl op1,op2
8281 * cmovcc tmp,dest
8283 * Size 15.
8286 if (! nonimmediate_operand (operands[2], mode))
8287 operands[2] = force_reg (mode, operands[2]);
8288 if (! nonimmediate_operand (operands[3], mode))
8289 operands[3] = force_reg (mode, operands[3]);
8291 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
8293 rtx tmp = gen_reg_rtx (mode);
8294 emit_move_insn (tmp, operands[3]);
8295 operands[3] = tmp;
8297 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
8299 rtx tmp = gen_reg_rtx (mode);
8300 emit_move_insn (tmp, operands[2]);
8301 operands[2] = tmp;
8303 if (! register_operand (operands[2], VOIDmode)
8304 && ! register_operand (operands[3], VOIDmode))
8305 operands[2] = force_reg (mode, operands[2]);
8307 emit_insn (compare_seq);
8308 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
8309 gen_rtx_IF_THEN_ELSE (mode,
8310 compare_op, operands[2],
8311 operands[3])));
8312 if (bypass_test)
8313 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
8314 gen_rtx_IF_THEN_ELSE (mode,
8315 bypass_test,
8316 operands[3],
8317 operands[0])));
8318 if (second_test)
8319 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
8320 gen_rtx_IF_THEN_ELSE (mode,
8321 second_test,
8322 operands[2],
8323 operands[0])));
8325 return 1; /* DONE */
8329 ix86_expand_fp_movcc (operands)
8330 rtx operands[];
8332 enum rtx_code code;
8333 rtx tmp;
8334 rtx compare_op, second_test, bypass_test;
8336 /* For SF/DFmode conditional moves based on comparisons
8337 in same mode, we may want to use SSE min/max instructions. */
8338 if (((TARGET_SSE && GET_MODE (operands[0]) == SFmode)
8339 || (TARGET_SSE2 && GET_MODE (operands[0]) == DFmode))
8340 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
8341 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
8342 && (!TARGET_IEEE_FP
8343 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
8344 /* We may be called from the post-reload splitter. */
8345 && (!REG_P (operands[0])
8346 || SSE_REG_P (operands[0])
8347 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
8349 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
8350 code = GET_CODE (operands[1]);
8352 /* See if we have (cross) match between comparison operands and
8353 conditional move operands. */
8354 if (rtx_equal_p (operands[2], op1))
8356 rtx tmp = op0;
8357 op0 = op1;
8358 op1 = tmp;
8359 code = reverse_condition_maybe_unordered (code);
8361 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
8363 /* Check for min operation. */
8364 if (code == LT)
8366 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
8367 if (memory_operand (op0, VOIDmode))
8368 op0 = force_reg (GET_MODE (operands[0]), op0);
8369 if (GET_MODE (operands[0]) == SFmode)
8370 emit_insn (gen_minsf3 (operands[0], op0, op1));
8371 else
8372 emit_insn (gen_mindf3 (operands[0], op0, op1));
8373 return 1;
8375 /* Check for max operation. */
8376 if (code == GT)
8378 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
8379 if (memory_operand (op0, VOIDmode))
8380 op0 = force_reg (GET_MODE (operands[0]), op0);
8381 if (GET_MODE (operands[0]) == SFmode)
8382 emit_insn (gen_maxsf3 (operands[0], op0, op1));
8383 else
8384 emit_insn (gen_maxdf3 (operands[0], op0, op1));
8385 return 1;
8388 /* Manage condition to be sse_comparison_operator. In case we are
8389 in non-ieee mode, try to canonicalize the destination operand
8390 to be first in the comparison - this helps reload to avoid extra
8391 moves. */
8392 if (!sse_comparison_operator (operands[1], VOIDmode)
8393 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
8395 rtx tmp = ix86_compare_op0;
8396 ix86_compare_op0 = ix86_compare_op1;
8397 ix86_compare_op1 = tmp;
8398 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
8399 VOIDmode, ix86_compare_op0,
8400 ix86_compare_op1);
8402 /* Similary try to manage result to be first operand of conditional
8403 move. We also don't support the NE comparison on SSE, so try to
8404 avoid it. */
8405 if ((rtx_equal_p (operands[0], operands[3])
8406 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
8407 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
8409 rtx tmp = operands[2];
8410 operands[2] = operands[3];
8411 operands[3] = tmp;
8412 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
8413 (GET_CODE (operands[1])),
8414 VOIDmode, ix86_compare_op0,
8415 ix86_compare_op1);
8417 if (GET_MODE (operands[0]) == SFmode)
8418 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
8419 operands[2], operands[3],
8420 ix86_compare_op0, ix86_compare_op1));
8421 else
8422 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
8423 operands[2], operands[3],
8424 ix86_compare_op0, ix86_compare_op1));
8425 return 1;
8428 /* The floating point conditional move instructions don't directly
8429 support conditions resulting from a signed integer comparison. */
8431 code = GET_CODE (operands[1]);
8432 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
8434 /* The floating point conditional move instructions don't directly
8435 support signed integer comparisons. */
8437 if (!fcmov_comparison_operator (compare_op, VOIDmode))
8439 if (second_test != NULL || bypass_test != NULL)
8440 abort();
8441 tmp = gen_reg_rtx (QImode);
8442 ix86_expand_setcc (code, tmp);
8443 code = NE;
8444 ix86_compare_op0 = tmp;
8445 ix86_compare_op1 = const0_rtx;
8446 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
8448 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
8450 tmp = gen_reg_rtx (GET_MODE (operands[0]));
8451 emit_move_insn (tmp, operands[3]);
8452 operands[3] = tmp;
8454 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
8456 tmp = gen_reg_rtx (GET_MODE (operands[0]));
8457 emit_move_insn (tmp, operands[2]);
8458 operands[2] = tmp;
8461 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
8462 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
8463 compare_op,
8464 operands[2],
8465 operands[3])));
8466 if (bypass_test)
8467 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
8468 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
8469 bypass_test,
8470 operands[3],
8471 operands[0])));
8472 if (second_test)
8473 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
8474 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
8475 second_test,
8476 operands[2],
8477 operands[0])));
8479 return 1;
8482 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
8483 works for floating pointer parameters and nonoffsetable memories.
8484 For pushes, it returns just stack offsets; the values will be saved
8485 in the right order. Maximally three parts are generated. */
8487 static int
8488 ix86_split_to_parts (operand, parts, mode)
8489 rtx operand;
8490 rtx *parts;
8491 enum machine_mode mode;
8493 int size;
8495 if (!TARGET_64BIT)
8496 size = mode == TFmode ? 3 : (GET_MODE_SIZE (mode) / 4);
8497 else
8498 size = (GET_MODE_SIZE (mode) + 4) / 8;
8500 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
8501 abort ();
8502 if (size < 2 || size > 3)
8503 abort ();
8505 /* Optimize constant pool reference to immediates. This is used by fp moves,
8506 that force all constants to memory to allow combining. */
8508 if (GET_CODE (operand) == MEM
8509 && GET_CODE (XEXP (operand, 0)) == SYMBOL_REF
8510 && CONSTANT_POOL_ADDRESS_P (XEXP (operand, 0)))
8511 operand = get_pool_constant (XEXP (operand, 0));
8513 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
8515 /* The only non-offsetable memories we handle are pushes. */
8516 if (! push_operand (operand, VOIDmode))
8517 abort ();
8519 operand = copy_rtx (operand);
8520 PUT_MODE (operand, Pmode);
8521 parts[0] = parts[1] = parts[2] = operand;
8523 else if (!TARGET_64BIT)
8525 if (mode == DImode)
8526 split_di (&operand, 1, &parts[0], &parts[1]);
8527 else
8529 if (REG_P (operand))
8531 if (!reload_completed)
8532 abort ();
8533 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
8534 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
8535 if (size == 3)
8536 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
8538 else if (offsettable_memref_p (operand))
8540 operand = adjust_address (operand, SImode, 0);
8541 parts[0] = operand;
8542 parts[1] = adjust_address (operand, SImode, 4);
8543 if (size == 3)
8544 parts[2] = adjust_address (operand, SImode, 8);
8546 else if (GET_CODE (operand) == CONST_DOUBLE)
8548 REAL_VALUE_TYPE r;
8549 long l[4];
8551 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
8552 switch (mode)
8554 case XFmode:
8555 case TFmode:
8556 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
8557 parts[2] = GEN_INT (trunc_int_for_mode (l[2], SImode));
8558 break;
8559 case DFmode:
8560 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
8561 break;
8562 default:
8563 abort ();
8565 parts[1] = GEN_INT (trunc_int_for_mode (l[1], SImode));
8566 parts[0] = GEN_INT (trunc_int_for_mode (l[0], SImode));
8568 else
8569 abort ();
8572 else
8574 if (mode == TImode)
8575 split_ti (&operand, 1, &parts[0], &parts[1]);
8576 if (mode == XFmode || mode == TFmode)
8578 if (REG_P (operand))
8580 if (!reload_completed)
8581 abort ();
8582 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
8583 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
8585 else if (offsettable_memref_p (operand))
8587 operand = adjust_address (operand, DImode, 0);
8588 parts[0] = operand;
8589 parts[1] = adjust_address (operand, SImode, 8);
8591 else if (GET_CODE (operand) == CONST_DOUBLE)
8593 REAL_VALUE_TYPE r;
8594 long l[3];
8596 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
8597 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
8598 /* Do not use shift by 32 to avoid warning on 32bit systems. */
8599 if (HOST_BITS_PER_WIDE_INT >= 64)
8600 parts[0]
8601 = GEN_INT (trunc_int_for_mode
8602 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
8603 + ((((HOST_WIDE_INT)l[1]) << 31) << 1),
8604 DImode));
8605 else
8606 parts[0] = immed_double_const (l[0], l[1], DImode);
8607 parts[1] = GEN_INT (trunc_int_for_mode (l[2], SImode));
8609 else
8610 abort ();
8614 return size;
8617 /* Emit insns to perform a move or push of DI, DF, and XF values.
8618 Return false when normal moves are needed; true when all required
8619 insns have been emitted. Operands 2-4 contain the input values
8620 int the correct order; operands 5-7 contain the output values. */
8622 void
8623 ix86_split_long_move (operands)
8624 rtx operands[];
8626 rtx part[2][3];
8627 int nparts;
8628 int push = 0;
8629 int collisions = 0;
8630 enum machine_mode mode = GET_MODE (operands[0]);
8632 /* The DFmode expanders may ask us to move double.
8633 For 64bit target this is single move. By hiding the fact
8634 here we simplify i386.md splitters. */
8635 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
8637 /* Optimize constant pool reference to immediates. This is used by fp moves,
8638 that force all constants to memory to allow combining. */
8640 if (GET_CODE (operands[1]) == MEM
8641 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
8642 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
8643 operands[1] = get_pool_constant (XEXP (operands[1], 0));
8644 if (push_operand (operands[0], VOIDmode))
8646 operands[0] = copy_rtx (operands[0]);
8647 PUT_MODE (operands[0], Pmode);
8649 else
8650 operands[0] = gen_lowpart (DImode, operands[0]);
8651 operands[1] = gen_lowpart (DImode, operands[1]);
8652 emit_move_insn (operands[0], operands[1]);
8653 return;
8656 /* The only non-offsettable memory we handle is push. */
8657 if (push_operand (operands[0], VOIDmode))
8658 push = 1;
8659 else if (GET_CODE (operands[0]) == MEM
8660 && ! offsettable_memref_p (operands[0]))
8661 abort ();
8663 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
8664 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
8666 /* When emitting push, take care for source operands on the stack. */
8667 if (push && GET_CODE (operands[1]) == MEM
8668 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
8670 if (nparts == 3)
8671 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
8672 XEXP (part[1][2], 0));
8673 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
8674 XEXP (part[1][1], 0));
8677 /* We need to do copy in the right order in case an address register
8678 of the source overlaps the destination. */
8679 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
8681 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
8682 collisions++;
8683 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
8684 collisions++;
8685 if (nparts == 3
8686 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
8687 collisions++;
8689 /* Collision in the middle part can be handled by reordering. */
8690 if (collisions == 1 && nparts == 3
8691 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
8693 rtx tmp;
8694 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
8695 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
8698 /* If there are more collisions, we can't handle it by reordering.
8699 Do an lea to the last part and use only one colliding move. */
8700 else if (collisions > 1)
8702 collisions = 1;
8703 emit_insn (gen_rtx_SET (VOIDmode, part[0][nparts - 1],
8704 XEXP (part[1][0], 0)));
8705 part[1][0] = change_address (part[1][0],
8706 TARGET_64BIT ? DImode : SImode,
8707 part[0][nparts - 1]);
8708 part[1][1] = adjust_address (part[1][0], VOIDmode, UNITS_PER_WORD);
8709 if (nparts == 3)
8710 part[1][2] = adjust_address (part[1][0], VOIDmode, 8);
8714 if (push)
8716 if (!TARGET_64BIT)
8718 if (nparts == 3)
8720 /* We use only first 12 bytes of TFmode value, but for pushing we
8721 are required to adjust stack as if we were pushing real 16byte
8722 value. */
8723 if (mode == TFmode && !TARGET_64BIT)
8724 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
8725 GEN_INT (-4)));
8726 emit_move_insn (part[0][2], part[1][2]);
8729 else
8731 /* In 64bit mode we don't have 32bit push available. In case this is
8732 register, it is OK - we will just use larger counterpart. We also
8733 retype memory - these comes from attempt to avoid REX prefix on
8734 moving of second half of TFmode value. */
8735 if (GET_MODE (part[1][1]) == SImode)
8737 if (GET_CODE (part[1][1]) == MEM)
8738 part[1][1] = adjust_address (part[1][1], DImode, 0);
8739 else if (REG_P (part[1][1]))
8740 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
8741 else
8742 abort();
8743 if (GET_MODE (part[1][0]) == SImode)
8744 part[1][0] = part[1][1];
8747 emit_move_insn (part[0][1], part[1][1]);
8748 emit_move_insn (part[0][0], part[1][0]);
8749 return;
8752 /* Choose correct order to not overwrite the source before it is copied. */
8753 if ((REG_P (part[0][0])
8754 && REG_P (part[1][1])
8755 && (REGNO (part[0][0]) == REGNO (part[1][1])
8756 || (nparts == 3
8757 && REGNO (part[0][0]) == REGNO (part[1][2]))))
8758 || (collisions > 0
8759 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
8761 if (nparts == 3)
8763 operands[2] = part[0][2];
8764 operands[3] = part[0][1];
8765 operands[4] = part[0][0];
8766 operands[5] = part[1][2];
8767 operands[6] = part[1][1];
8768 operands[7] = part[1][0];
8770 else
8772 operands[2] = part[0][1];
8773 operands[3] = part[0][0];
8774 operands[5] = part[1][1];
8775 operands[6] = part[1][0];
8778 else
8780 if (nparts == 3)
8782 operands[2] = part[0][0];
8783 operands[3] = part[0][1];
8784 operands[4] = part[0][2];
8785 operands[5] = part[1][0];
8786 operands[6] = part[1][1];
8787 operands[7] = part[1][2];
8789 else
8791 operands[2] = part[0][0];
8792 operands[3] = part[0][1];
8793 operands[5] = part[1][0];
8794 operands[6] = part[1][1];
8797 emit_move_insn (operands[2], operands[5]);
8798 emit_move_insn (operands[3], operands[6]);
8799 if (nparts == 3)
8800 emit_move_insn (operands[4], operands[7]);
8802 return;
8805 void
8806 ix86_split_ashldi (operands, scratch)
8807 rtx *operands, scratch;
8809 rtx low[2], high[2];
8810 int count;
8812 if (GET_CODE (operands[2]) == CONST_INT)
8814 split_di (operands, 2, low, high);
8815 count = INTVAL (operands[2]) & 63;
8817 if (count >= 32)
8819 emit_move_insn (high[0], low[1]);
8820 emit_move_insn (low[0], const0_rtx);
8822 if (count > 32)
8823 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
8825 else
8827 if (!rtx_equal_p (operands[0], operands[1]))
8828 emit_move_insn (operands[0], operands[1]);
8829 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
8830 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
8833 else
8835 if (!rtx_equal_p (operands[0], operands[1]))
8836 emit_move_insn (operands[0], operands[1]);
8838 split_di (operands, 1, low, high);
8840 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
8841 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
8843 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
8845 if (! no_new_pseudos)
8846 scratch = force_reg (SImode, const0_rtx);
8847 else
8848 emit_move_insn (scratch, const0_rtx);
8850 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
8851 scratch));
8853 else
8854 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
8858 void
8859 ix86_split_ashrdi (operands, scratch)
8860 rtx *operands, scratch;
8862 rtx low[2], high[2];
8863 int count;
8865 if (GET_CODE (operands[2]) == CONST_INT)
8867 split_di (operands, 2, low, high);
8868 count = INTVAL (operands[2]) & 63;
8870 if (count >= 32)
8872 emit_move_insn (low[0], high[1]);
8874 if (! reload_completed)
8875 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
8876 else
8878 emit_move_insn (high[0], low[0]);
8879 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
8882 if (count > 32)
8883 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
8885 else
8887 if (!rtx_equal_p (operands[0], operands[1]))
8888 emit_move_insn (operands[0], operands[1]);
8889 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
8890 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
8893 else
8895 if (!rtx_equal_p (operands[0], operands[1]))
8896 emit_move_insn (operands[0], operands[1]);
8898 split_di (operands, 1, low, high);
8900 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
8901 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
8903 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
8905 if (! no_new_pseudos)
8906 scratch = gen_reg_rtx (SImode);
8907 emit_move_insn (scratch, high[0]);
8908 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
8909 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
8910 scratch));
8912 else
8913 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
8917 void
8918 ix86_split_lshrdi (operands, scratch)
8919 rtx *operands, scratch;
8921 rtx low[2], high[2];
8922 int count;
8924 if (GET_CODE (operands[2]) == CONST_INT)
8926 split_di (operands, 2, low, high);
8927 count = INTVAL (operands[2]) & 63;
8929 if (count >= 32)
8931 emit_move_insn (low[0], high[1]);
8932 emit_move_insn (high[0], const0_rtx);
8934 if (count > 32)
8935 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
8937 else
8939 if (!rtx_equal_p (operands[0], operands[1]))
8940 emit_move_insn (operands[0], operands[1]);
8941 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
8942 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
8945 else
8947 if (!rtx_equal_p (operands[0], operands[1]))
8948 emit_move_insn (operands[0], operands[1]);
8950 split_di (operands, 1, low, high);
8952 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
8953 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
8955 /* Heh. By reversing the arguments, we can reuse this pattern. */
8956 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
8958 if (! no_new_pseudos)
8959 scratch = force_reg (SImode, const0_rtx);
8960 else
8961 emit_move_insn (scratch, const0_rtx);
8963 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
8964 scratch));
8966 else
8967 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
8971 /* Helper function for the string operations below. Dest VARIABLE whether
8972 it is aligned to VALUE bytes. If true, jump to the label. */
8973 static rtx
8974 ix86_expand_aligntest (variable, value)
8975 rtx variable;
8976 int value;
8978 rtx label = gen_label_rtx ();
8979 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
8980 if (GET_MODE (variable) == DImode)
8981 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
8982 else
8983 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
8984 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
8985 1, label);
8986 return label;
8989 /* Adjust COUNTER by the VALUE. */
8990 static void
8991 ix86_adjust_counter (countreg, value)
8992 rtx countreg;
8993 HOST_WIDE_INT value;
8995 if (GET_MODE (countreg) == DImode)
8996 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
8997 else
8998 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
9001 /* Zero extend possibly SImode EXP to Pmode register. */
9003 ix86_zero_extend_to_Pmode (exp)
9004 rtx exp;
9006 rtx r;
9007 if (GET_MODE (exp) == VOIDmode)
9008 return force_reg (Pmode, exp);
9009 if (GET_MODE (exp) == Pmode)
9010 return copy_to_mode_reg (Pmode, exp);
9011 r = gen_reg_rtx (Pmode);
9012 emit_insn (gen_zero_extendsidi2 (r, exp));
9013 return r;
9016 /* Expand string move (memcpy) operation. Use i386 string operations when
9017 profitable. expand_clrstr contains similar code. */
9019 ix86_expand_movstr (dst, src, count_exp, align_exp)
9020 rtx dst, src, count_exp, align_exp;
9022 rtx srcreg, destreg, countreg;
9023 enum machine_mode counter_mode;
9024 HOST_WIDE_INT align = 0;
9025 unsigned HOST_WIDE_INT count = 0;
9026 rtx insns;
9028 start_sequence ();
9030 if (GET_CODE (align_exp) == CONST_INT)
9031 align = INTVAL (align_exp);
9033 /* This simple hack avoids all inlining code and simplifies code below. */
9034 if (!TARGET_ALIGN_STRINGOPS)
9035 align = 64;
9037 if (GET_CODE (count_exp) == CONST_INT)
9038 count = INTVAL (count_exp);
9040 /* Figure out proper mode for counter. For 32bits it is always SImode,
9041 for 64bits use SImode when possible, otherwise DImode.
9042 Set count to number of bytes copied when known at compile time. */
9043 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
9044 || x86_64_zero_extended_value (count_exp))
9045 counter_mode = SImode;
9046 else
9047 counter_mode = DImode;
9049 if (counter_mode != SImode && counter_mode != DImode)
9050 abort ();
9052 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
9053 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
9055 emit_insn (gen_cld ());
9057 /* When optimizing for size emit simple rep ; movsb instruction for
9058 counts not divisible by 4. */
9060 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
9062 countreg = ix86_zero_extend_to_Pmode (count_exp);
9063 if (TARGET_64BIT)
9064 emit_insn (gen_rep_movqi_rex64 (destreg, srcreg, countreg,
9065 destreg, srcreg, countreg));
9066 else
9067 emit_insn (gen_rep_movqi (destreg, srcreg, countreg,
9068 destreg, srcreg, countreg));
9071 /* For constant aligned (or small unaligned) copies use rep movsl
9072 followed by code copying the rest. For PentiumPro ensure 8 byte
9073 alignment to allow rep movsl acceleration. */
9075 else if (count != 0
9076 && (align >= 8
9077 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
9078 || optimize_size || count < (unsigned int)64))
9080 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
9081 if (count & ~(size - 1))
9083 countreg = copy_to_mode_reg (counter_mode,
9084 GEN_INT ((count >> (size == 4 ? 2 : 3))
9085 & (TARGET_64BIT ? -1 : 0x3fffffff)));
9086 countreg = ix86_zero_extend_to_Pmode (countreg);
9087 if (size == 4)
9089 if (TARGET_64BIT)
9090 emit_insn (gen_rep_movsi_rex64 (destreg, srcreg, countreg,
9091 destreg, srcreg, countreg));
9092 else
9093 emit_insn (gen_rep_movsi (destreg, srcreg, countreg,
9094 destreg, srcreg, countreg));
9096 else
9097 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg,
9098 destreg, srcreg, countreg));
9100 if (size == 8 && (count & 0x04))
9101 emit_insn (gen_strmovsi (destreg, srcreg));
9102 if (count & 0x02)
9103 emit_insn (gen_strmovhi (destreg, srcreg));
9104 if (count & 0x01)
9105 emit_insn (gen_strmovqi (destreg, srcreg));
9107 /* The generic code based on the glibc implementation:
9108 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
9109 allowing accelerated copying there)
9110 - copy the data using rep movsl
9111 - copy the rest. */
9112 else
9114 rtx countreg2;
9115 rtx label = NULL;
9117 /* In case we don't know anything about the alignment, default to
9118 library version, since it is usually equally fast and result in
9119 shorter code. */
9120 if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD)
9122 end_sequence ();
9123 return 0;
9126 if (TARGET_SINGLE_STRINGOP)
9127 emit_insn (gen_cld ());
9129 countreg2 = gen_reg_rtx (Pmode);
9130 countreg = copy_to_mode_reg (counter_mode, count_exp);
9132 /* We don't use loops to align destination and to copy parts smaller
9133 than 4 bytes, because gcc is able to optimize such code better (in
9134 the case the destination or the count really is aligned, gcc is often
9135 able to predict the branches) and also it is friendlier to the
9136 hardware branch prediction.
9138 Using loops is benefical for generic case, because we can
9139 handle small counts using the loops. Many CPUs (such as Athlon)
9140 have large REP prefix setup costs.
9142 This is quite costy. Maybe we can revisit this decision later or
9143 add some customizability to this code. */
9145 if (count == 0
9146 && align < (TARGET_PENTIUMPRO && (count == 0
9147 || count >= (unsigned int)260)
9148 ? 8 : UNITS_PER_WORD))
9150 label = gen_label_rtx ();
9151 emit_cmp_and_jump_insns (countreg, GEN_INT (UNITS_PER_WORD - 1),
9152 LEU, 0, counter_mode, 1, label);
9154 if (align <= 1)
9156 rtx label = ix86_expand_aligntest (destreg, 1);
9157 emit_insn (gen_strmovqi (destreg, srcreg));
9158 ix86_adjust_counter (countreg, 1);
9159 emit_label (label);
9160 LABEL_NUSES (label) = 1;
9162 if (align <= 2)
9164 rtx label = ix86_expand_aligntest (destreg, 2);
9165 emit_insn (gen_strmovhi (destreg, srcreg));
9166 ix86_adjust_counter (countreg, 2);
9167 emit_label (label);
9168 LABEL_NUSES (label) = 1;
9170 if (align <= 4
9171 && ((TARGET_PENTIUMPRO && (count == 0
9172 || count >= (unsigned int)260))
9173 || TARGET_64BIT))
9175 rtx label = ix86_expand_aligntest (destreg, 4);
9176 emit_insn (gen_strmovsi (destreg, srcreg));
9177 ix86_adjust_counter (countreg, 4);
9178 emit_label (label);
9179 LABEL_NUSES (label) = 1;
9182 if (!TARGET_SINGLE_STRINGOP)
9183 emit_insn (gen_cld ());
9184 if (TARGET_64BIT)
9186 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
9187 GEN_INT (3)));
9188 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg2,
9189 destreg, srcreg, countreg2));
9191 else
9193 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
9194 emit_insn (gen_rep_movsi (destreg, srcreg, countreg2,
9195 destreg, srcreg, countreg2));
9198 if (label)
9200 emit_label (label);
9201 LABEL_NUSES (label) = 1;
9203 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
9204 emit_insn (gen_strmovsi (destreg, srcreg));
9205 if ((align <= 4 || count == 0) && TARGET_64BIT)
9207 rtx label = ix86_expand_aligntest (countreg, 4);
9208 emit_insn (gen_strmovsi (destreg, srcreg));
9209 emit_label (label);
9210 LABEL_NUSES (label) = 1;
9212 if (align > 2 && count != 0 && (count & 2))
9213 emit_insn (gen_strmovhi (destreg, srcreg));
9214 if (align <= 2 || count == 0)
9216 rtx label = ix86_expand_aligntest (countreg, 2);
9217 emit_insn (gen_strmovhi (destreg, srcreg));
9218 emit_label (label);
9219 LABEL_NUSES (label) = 1;
9221 if (align > 1 && count != 0 && (count & 1))
9222 emit_insn (gen_strmovqi (destreg, srcreg));
9223 if (align <= 1 || count == 0)
9225 rtx label = ix86_expand_aligntest (countreg, 1);
9226 emit_insn (gen_strmovqi (destreg, srcreg));
9227 emit_label (label);
9228 LABEL_NUSES (label) = 1;
9232 insns = get_insns ();
9233 end_sequence ();
9235 ix86_set_move_mem_attrs (insns, dst, src, destreg, srcreg);
9236 emit_insns (insns);
9237 return 1;
9240 /* Expand string clear operation (bzero). Use i386 string operations when
9241 profitable. expand_movstr contains similar code. */
9243 ix86_expand_clrstr (src, count_exp, align_exp)
9244 rtx src, count_exp, align_exp;
9246 rtx destreg, zeroreg, countreg;
9247 enum machine_mode counter_mode;
9248 HOST_WIDE_INT align = 0;
9249 unsigned HOST_WIDE_INT count = 0;
9251 if (GET_CODE (align_exp) == CONST_INT)
9252 align = INTVAL (align_exp);
9254 /* This simple hack avoids all inlining code and simplifies code below. */
9255 if (!TARGET_ALIGN_STRINGOPS)
9256 align = 32;
9258 if (GET_CODE (count_exp) == CONST_INT)
9259 count = INTVAL (count_exp);
9260 /* Figure out proper mode for counter. For 32bits it is always SImode,
9261 for 64bits use SImode when possible, otherwise DImode.
9262 Set count to number of bytes copied when known at compile time. */
9263 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
9264 || x86_64_zero_extended_value (count_exp))
9265 counter_mode = SImode;
9266 else
9267 counter_mode = DImode;
9269 destreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
9271 emit_insn (gen_cld ());
9273 /* When optimizing for size emit simple rep ; movsb instruction for
9274 counts not divisible by 4. */
9276 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
9278 countreg = ix86_zero_extend_to_Pmode (count_exp);
9279 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
9280 if (TARGET_64BIT)
9281 emit_insn (gen_rep_stosqi_rex64 (destreg, countreg, zeroreg,
9282 destreg, countreg));
9283 else
9284 emit_insn (gen_rep_stosqi (destreg, countreg, zeroreg,
9285 destreg, countreg));
9287 else if (count != 0
9288 && (align >= 8
9289 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
9290 || optimize_size || count < (unsigned int)64))
9292 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
9293 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
9294 if (count & ~(size - 1))
9296 countreg = copy_to_mode_reg (counter_mode,
9297 GEN_INT ((count >> (size == 4 ? 2 : 3))
9298 & (TARGET_64BIT ? -1 : 0x3fffffff)));
9299 countreg = ix86_zero_extend_to_Pmode (countreg);
9300 if (size == 4)
9302 if (TARGET_64BIT)
9303 emit_insn (gen_rep_stossi_rex64 (destreg, countreg, zeroreg,
9304 destreg, countreg));
9305 else
9306 emit_insn (gen_rep_stossi (destreg, countreg, zeroreg,
9307 destreg, countreg));
9309 else
9310 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg, zeroreg,
9311 destreg, countreg));
9313 if (size == 8 && (count & 0x04))
9314 emit_insn (gen_strsetsi (destreg,
9315 gen_rtx_SUBREG (SImode, zeroreg, 0)));
9316 if (count & 0x02)
9317 emit_insn (gen_strsethi (destreg,
9318 gen_rtx_SUBREG (HImode, zeroreg, 0)));
9319 if (count & 0x01)
9320 emit_insn (gen_strsetqi (destreg,
9321 gen_rtx_SUBREG (QImode, zeroreg, 0)));
9323 else
9325 rtx countreg2;
9326 rtx label = NULL;
9328 /* In case we don't know anything about the alignment, default to
9329 library version, since it is usually equally fast and result in
9330 shorter code. */
9331 if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD)
9332 return 0;
9334 if (TARGET_SINGLE_STRINGOP)
9335 emit_insn (gen_cld ());
9337 countreg2 = gen_reg_rtx (Pmode);
9338 countreg = copy_to_mode_reg (counter_mode, count_exp);
9339 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
9341 if (count == 0
9342 && align < (TARGET_PENTIUMPRO && (count == 0
9343 || count >= (unsigned int)260)
9344 ? 8 : UNITS_PER_WORD))
9346 label = gen_label_rtx ();
9347 emit_cmp_and_jump_insns (countreg, GEN_INT (UNITS_PER_WORD - 1),
9348 LEU, 0, counter_mode, 1, label);
9350 if (align <= 1)
9352 rtx label = ix86_expand_aligntest (destreg, 1);
9353 emit_insn (gen_strsetqi (destreg,
9354 gen_rtx_SUBREG (QImode, zeroreg, 0)));
9355 ix86_adjust_counter (countreg, 1);
9356 emit_label (label);
9357 LABEL_NUSES (label) = 1;
9359 if (align <= 2)
9361 rtx label = ix86_expand_aligntest (destreg, 2);
9362 emit_insn (gen_strsethi (destreg,
9363 gen_rtx_SUBREG (HImode, zeroreg, 0)));
9364 ix86_adjust_counter (countreg, 2);
9365 emit_label (label);
9366 LABEL_NUSES (label) = 1;
9368 if (align <= 4 && TARGET_PENTIUMPRO && (count == 0
9369 || count >= (unsigned int)260))
9371 rtx label = ix86_expand_aligntest (destreg, 4);
9372 emit_insn (gen_strsetsi (destreg, (TARGET_64BIT
9373 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
9374 : zeroreg)));
9375 ix86_adjust_counter (countreg, 4);
9376 emit_label (label);
9377 LABEL_NUSES (label) = 1;
9380 if (!TARGET_SINGLE_STRINGOP)
9381 emit_insn (gen_cld ());
9382 if (TARGET_64BIT)
9384 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
9385 GEN_INT (3)));
9386 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg2, zeroreg,
9387 destreg, countreg2));
9389 else
9391 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
9392 emit_insn (gen_rep_stossi (destreg, countreg2, zeroreg,
9393 destreg, countreg2));
9396 if (label)
9398 emit_label (label);
9399 LABEL_NUSES (label) = 1;
9401 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
9402 emit_insn (gen_strsetsi (destreg,
9403 gen_rtx_SUBREG (SImode, zeroreg, 0)));
9404 if (TARGET_64BIT && (align <= 4 || count == 0))
9406 rtx label = ix86_expand_aligntest (destreg, 2);
9407 emit_insn (gen_strsetsi (destreg,
9408 gen_rtx_SUBREG (SImode, zeroreg, 0)));
9409 emit_label (label);
9410 LABEL_NUSES (label) = 1;
9412 if (align > 2 && count != 0 && (count & 2))
9413 emit_insn (gen_strsethi (destreg,
9414 gen_rtx_SUBREG (HImode, zeroreg, 0)));
9415 if (align <= 2 || count == 0)
9417 rtx label = ix86_expand_aligntest (destreg, 2);
9418 emit_insn (gen_strsethi (destreg,
9419 gen_rtx_SUBREG (HImode, zeroreg, 0)));
9420 emit_label (label);
9421 LABEL_NUSES (label) = 1;
9423 if (align > 1 && count != 0 && (count & 1))
9424 emit_insn (gen_strsetqi (destreg,
9425 gen_rtx_SUBREG (QImode, zeroreg, 0)));
9426 if (align <= 1 || count == 0)
9428 rtx label = ix86_expand_aligntest (destreg, 1);
9429 emit_insn (gen_strsetqi (destreg,
9430 gen_rtx_SUBREG (QImode, zeroreg, 0)));
9431 emit_label (label);
9432 LABEL_NUSES (label) = 1;
9435 return 1;
9437 /* Expand strlen. */
9439 ix86_expand_strlen (out, src, eoschar, align)
9440 rtx out, src, eoschar, align;
9442 rtx addr, scratch1, scratch2, scratch3, scratch4;
9444 /* The generic case of strlen expander is long. Avoid it's
9445 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
9447 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
9448 && !TARGET_INLINE_ALL_STRINGOPS
9449 && !optimize_size
9450 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
9451 return 0;
9453 addr = force_reg (Pmode, XEXP (src, 0));
9454 scratch1 = gen_reg_rtx (Pmode);
9456 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
9457 && !optimize_size)
9459 /* Well it seems that some optimizer does not combine a call like
9460 foo(strlen(bar), strlen(bar));
9461 when the move and the subtraction is done here. It does calculate
9462 the length just once when these instructions are done inside of
9463 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
9464 often used and I use one fewer register for the lifetime of
9465 output_strlen_unroll() this is better. */
9467 emit_move_insn (out, addr);
9469 ix86_expand_strlensi_unroll_1 (out, align);
9471 /* strlensi_unroll_1 returns the address of the zero at the end of
9472 the string, like memchr(), so compute the length by subtracting
9473 the start address. */
9474 if (TARGET_64BIT)
9475 emit_insn (gen_subdi3 (out, out, addr));
9476 else
9477 emit_insn (gen_subsi3 (out, out, addr));
9479 else
9481 scratch2 = gen_reg_rtx (Pmode);
9482 scratch3 = gen_reg_rtx (Pmode);
9483 scratch4 = force_reg (Pmode, constm1_rtx);
9485 emit_move_insn (scratch3, addr);
9486 eoschar = force_reg (QImode, eoschar);
9488 emit_insn (gen_cld ());
9489 if (TARGET_64BIT)
9491 emit_insn (gen_strlenqi_rex_1 (scratch1, scratch3, eoschar,
9492 align, scratch4, scratch3));
9493 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
9494 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
9496 else
9498 emit_insn (gen_strlenqi_1 (scratch1, scratch3, eoschar,
9499 align, scratch4, scratch3));
9500 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
9501 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
9504 return 1;
9507 /* Expand the appropriate insns for doing strlen if not just doing
9508 repnz; scasb
9510 out = result, initialized with the start address
9511 align_rtx = alignment of the address.
9512 scratch = scratch register, initialized with the startaddress when
9513 not aligned, otherwise undefined
9515 This is just the body. It needs the initialisations mentioned above and
9516 some address computing at the end. These things are done in i386.md. */
9518 static void
9519 ix86_expand_strlensi_unroll_1 (out, align_rtx)
9520 rtx out, align_rtx;
9522 int align;
9523 rtx tmp;
9524 rtx align_2_label = NULL_RTX;
9525 rtx align_3_label = NULL_RTX;
9526 rtx align_4_label = gen_label_rtx ();
9527 rtx end_0_label = gen_label_rtx ();
9528 rtx mem;
9529 rtx tmpreg = gen_reg_rtx (SImode);
9530 rtx scratch = gen_reg_rtx (SImode);
9532 align = 0;
9533 if (GET_CODE (align_rtx) == CONST_INT)
9534 align = INTVAL (align_rtx);
9536 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
9538 /* Is there a known alignment and is it less than 4? */
9539 if (align < 4)
9541 rtx scratch1 = gen_reg_rtx (Pmode);
9542 emit_move_insn (scratch1, out);
9543 /* Is there a known alignment and is it not 2? */
9544 if (align != 2)
9546 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
9547 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
9549 /* Leave just the 3 lower bits. */
9550 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
9551 NULL_RTX, 0, OPTAB_WIDEN);
9553 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
9554 Pmode, 1, align_4_label);
9555 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL,
9556 Pmode, 1, align_2_label);
9557 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL,
9558 Pmode, 1, align_3_label);
9560 else
9562 /* Since the alignment is 2, we have to check 2 or 0 bytes;
9563 check if is aligned to 4 - byte. */
9565 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (2),
9566 NULL_RTX, 0, OPTAB_WIDEN);
9568 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
9569 Pmode, 1, align_4_label);
9572 mem = gen_rtx_MEM (QImode, out);
9574 /* Now compare the bytes. */
9576 /* Compare the first n unaligned byte on a byte per byte basis. */
9577 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
9578 QImode, 1, end_0_label);
9580 /* Increment the address. */
9581 if (TARGET_64BIT)
9582 emit_insn (gen_adddi3 (out, out, const1_rtx));
9583 else
9584 emit_insn (gen_addsi3 (out, out, const1_rtx));
9586 /* Not needed with an alignment of 2 */
9587 if (align != 2)
9589 emit_label (align_2_label);
9591 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
9592 end_0_label);
9594 if (TARGET_64BIT)
9595 emit_insn (gen_adddi3 (out, out, const1_rtx));
9596 else
9597 emit_insn (gen_addsi3 (out, out, const1_rtx));
9599 emit_label (align_3_label);
9602 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
9603 end_0_label);
9605 if (TARGET_64BIT)
9606 emit_insn (gen_adddi3 (out, out, const1_rtx));
9607 else
9608 emit_insn (gen_addsi3 (out, out, const1_rtx));
9611 /* Generate loop to check 4 bytes at a time. It is not a good idea to
9612 align this loop. It gives only huge programs, but does not help to
9613 speed up. */
9614 emit_label (align_4_label);
9616 mem = gen_rtx_MEM (SImode, out);
9617 emit_move_insn (scratch, mem);
9618 if (TARGET_64BIT)
9619 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
9620 else
9621 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
9623 /* This formula yields a nonzero result iff one of the bytes is zero.
9624 This saves three branches inside loop and many cycles. */
9626 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
9627 emit_insn (gen_one_cmplsi2 (scratch, scratch));
9628 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
9629 emit_insn (gen_andsi3 (tmpreg, tmpreg,
9630 GEN_INT (trunc_int_for_mode
9631 (0x80808080, SImode))));
9632 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
9633 align_4_label);
9635 if (TARGET_CMOVE)
9637 rtx reg = gen_reg_rtx (SImode);
9638 rtx reg2 = gen_reg_rtx (Pmode);
9639 emit_move_insn (reg, tmpreg);
9640 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
9642 /* If zero is not in the first two bytes, move two bytes forward. */
9643 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
9644 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
9645 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
9646 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
9647 gen_rtx_IF_THEN_ELSE (SImode, tmp,
9648 reg,
9649 tmpreg)));
9650 /* Emit lea manually to avoid clobbering of flags. */
9651 emit_insn (gen_rtx_SET (SImode, reg2,
9652 gen_rtx_PLUS (Pmode, out, GEN_INT (2))));
9654 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
9655 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
9656 emit_insn (gen_rtx_SET (VOIDmode, out,
9657 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
9658 reg2,
9659 out)));
9662 else
9664 rtx end_2_label = gen_label_rtx ();
9665 /* Is zero in the first two bytes? */
9667 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
9668 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
9669 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
9670 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9671 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
9672 pc_rtx);
9673 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
9674 JUMP_LABEL (tmp) = end_2_label;
9676 /* Not in the first two. Move two bytes forward. */
9677 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
9678 if (TARGET_64BIT)
9679 emit_insn (gen_adddi3 (out, out, GEN_INT (2)));
9680 else
9681 emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
9683 emit_label (end_2_label);
9687 /* Avoid branch in fixing the byte. */
9688 tmpreg = gen_lowpart (QImode, tmpreg);
9689 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
9690 if (TARGET_64BIT)
9691 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3)));
9692 else
9693 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3)));
9695 emit_label (end_0_label);
9698 /* Clear stack slot assignments remembered from previous functions.
9699 This is called from INIT_EXPANDERS once before RTL is emitted for each
9700 function. */
9702 static void
9703 ix86_init_machine_status (p)
9704 struct function *p;
9706 p->machine = (struct machine_function *)
9707 xcalloc (1, sizeof (struct machine_function));
9710 /* Mark machine specific bits of P for GC. */
9711 static void
9712 ix86_mark_machine_status (p)
9713 struct function *p;
9715 struct machine_function *machine = p->machine;
9716 enum machine_mode mode;
9717 int n;
9719 if (! machine)
9720 return;
9722 for (mode = VOIDmode; (int) mode < (int) MAX_MACHINE_MODE;
9723 mode = (enum machine_mode) ((int) mode + 1))
9724 for (n = 0; n < MAX_386_STACK_LOCALS; n++)
9725 ggc_mark_rtx (machine->stack_locals[(int) mode][n]);
9728 static void
9729 ix86_free_machine_status (p)
9730 struct function *p;
9732 free (p->machine);
9733 p->machine = NULL;
9736 /* Return a MEM corresponding to a stack slot with mode MODE.
9737 Allocate a new slot if necessary.
9739 The RTL for a function can have several slots available: N is
9740 which slot to use. */
9743 assign_386_stack_local (mode, n)
9744 enum machine_mode mode;
9745 int n;
9747 if (n < 0 || n >= MAX_386_STACK_LOCALS)
9748 abort ();
9750 if (ix86_stack_locals[(int) mode][n] == NULL_RTX)
9751 ix86_stack_locals[(int) mode][n]
9752 = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
9754 return ix86_stack_locals[(int) mode][n];
9757 /* Calculate the length of the memory address in the instruction
9758 encoding. Does not include the one-byte modrm, opcode, or prefix. */
9760 static int
9761 memory_address_length (addr)
9762 rtx addr;
9764 struct ix86_address parts;
9765 rtx base, index, disp;
9766 int len;
9768 if (GET_CODE (addr) == PRE_DEC
9769 || GET_CODE (addr) == POST_INC
9770 || GET_CODE (addr) == PRE_MODIFY
9771 || GET_CODE (addr) == POST_MODIFY)
9772 return 0;
9774 if (! ix86_decompose_address (addr, &parts))
9775 abort ();
9777 base = parts.base;
9778 index = parts.index;
9779 disp = parts.disp;
9780 len = 0;
9782 /* Register Indirect. */
9783 if (base && !index && !disp)
9785 /* Special cases: ebp and esp need the two-byte modrm form. */
9786 if (addr == stack_pointer_rtx
9787 || addr == arg_pointer_rtx
9788 || addr == frame_pointer_rtx
9789 || addr == hard_frame_pointer_rtx)
9790 len = 1;
9793 /* Direct Addressing. */
9794 else if (disp && !base && !index)
9795 len = 4;
9797 else
9799 /* Find the length of the displacement constant. */
9800 if (disp)
9802 if (GET_CODE (disp) == CONST_INT
9803 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K'))
9804 len = 1;
9805 else
9806 len = 4;
9809 /* An index requires the two-byte modrm form. */
9810 if (index)
9811 len += 1;
9814 return len;
9817 /* Compute default value for "length_immediate" attribute. When SHORTFORM is set
9818 expect that insn have 8bit immediate alternative. */
9820 ix86_attr_length_immediate_default (insn, shortform)
9821 rtx insn;
9822 int shortform;
9824 int len = 0;
9825 int i;
9826 extract_insn_cached (insn);
9827 for (i = recog_data.n_operands - 1; i >= 0; --i)
9828 if (CONSTANT_P (recog_data.operand[i]))
9830 if (len)
9831 abort ();
9832 if (shortform
9833 && GET_CODE (recog_data.operand[i]) == CONST_INT
9834 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
9835 len = 1;
9836 else
9838 switch (get_attr_mode (insn))
9840 case MODE_QI:
9841 len+=1;
9842 break;
9843 case MODE_HI:
9844 len+=2;
9845 break;
9846 case MODE_SI:
9847 len+=4;
9848 break;
9849 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
9850 case MODE_DI:
9851 len+=4;
9852 break;
9853 default:
9854 fatal_insn ("unknown insn mode", insn);
9858 return len;
9860 /* Compute default value for "length_address" attribute. */
9862 ix86_attr_length_address_default (insn)
9863 rtx insn;
9865 int i;
9866 extract_insn_cached (insn);
9867 for (i = recog_data.n_operands - 1; i >= 0; --i)
9868 if (GET_CODE (recog_data.operand[i]) == MEM)
9870 return memory_address_length (XEXP (recog_data.operand[i], 0));
9871 break;
9873 return 0;
9876 /* Return the maximum number of instructions a cpu can issue. */
9878 static int
9879 ix86_issue_rate ()
9881 switch (ix86_cpu)
9883 case PROCESSOR_PENTIUM:
9884 case PROCESSOR_K6:
9885 return 2;
9887 case PROCESSOR_PENTIUMPRO:
9888 case PROCESSOR_PENTIUM4:
9889 case PROCESSOR_ATHLON:
9890 return 3;
9892 default:
9893 return 1;
9897 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
9898 by DEP_INSN and nothing set by DEP_INSN. */
9900 static int
9901 ix86_flags_dependant (insn, dep_insn, insn_type)
9902 rtx insn, dep_insn;
9903 enum attr_type insn_type;
9905 rtx set, set2;
9907 /* Simplify the test for uninteresting insns. */
9908 if (insn_type != TYPE_SETCC
9909 && insn_type != TYPE_ICMOV
9910 && insn_type != TYPE_FCMOV
9911 && insn_type != TYPE_IBR)
9912 return 0;
9914 if ((set = single_set (dep_insn)) != 0)
9916 set = SET_DEST (set);
9917 set2 = NULL_RTX;
9919 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
9920 && XVECLEN (PATTERN (dep_insn), 0) == 2
9921 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
9922 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
9924 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
9925 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
9927 else
9928 return 0;
9930 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
9931 return 0;
9933 /* This test is true if the dependent insn reads the flags but
9934 not any other potentially set register. */
9935 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
9936 return 0;
9938 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
9939 return 0;
9941 return 1;
9944 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
9945 address with operands set by DEP_INSN. */
9947 static int
9948 ix86_agi_dependant (insn, dep_insn, insn_type)
9949 rtx insn, dep_insn;
9950 enum attr_type insn_type;
9952 rtx addr;
9954 if (insn_type == TYPE_LEA
9955 && TARGET_PENTIUM)
9957 addr = PATTERN (insn);
9958 if (GET_CODE (addr) == SET)
9960 else if (GET_CODE (addr) == PARALLEL
9961 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
9962 addr = XVECEXP (addr, 0, 0);
9963 else
9964 abort ();
9965 addr = SET_SRC (addr);
9967 else
9969 int i;
9970 extract_insn_cached (insn);
9971 for (i = recog_data.n_operands - 1; i >= 0; --i)
9972 if (GET_CODE (recog_data.operand[i]) == MEM)
9974 addr = XEXP (recog_data.operand[i], 0);
9975 goto found;
9977 return 0;
9978 found:;
9981 return modified_in_p (addr, dep_insn);
9984 static int
9985 ix86_adjust_cost (insn, link, dep_insn, cost)
9986 rtx insn, link, dep_insn;
9987 int cost;
9989 enum attr_type insn_type, dep_insn_type;
9990 enum attr_memory memory, dep_memory;
9991 rtx set, set2;
9992 int dep_insn_code_number;
9994 /* Anti and output depenancies have zero cost on all CPUs. */
9995 if (REG_NOTE_KIND (link) != 0)
9996 return 0;
9998 dep_insn_code_number = recog_memoized (dep_insn);
10000 /* If we can't recognize the insns, we can't really do anything. */
10001 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
10002 return cost;
10004 insn_type = get_attr_type (insn);
10005 dep_insn_type = get_attr_type (dep_insn);
10007 switch (ix86_cpu)
10009 case PROCESSOR_PENTIUM:
10010 /* Address Generation Interlock adds a cycle of latency. */
10011 if (ix86_agi_dependant (insn, dep_insn, insn_type))
10012 cost += 1;
10014 /* ??? Compares pair with jump/setcc. */
10015 if (ix86_flags_dependant (insn, dep_insn, insn_type))
10016 cost = 0;
10018 /* Floating point stores require value to be ready one cycle ealier. */
10019 if (insn_type == TYPE_FMOV
10020 && get_attr_memory (insn) == MEMORY_STORE
10021 && !ix86_agi_dependant (insn, dep_insn, insn_type))
10022 cost += 1;
10023 break;
10025 case PROCESSOR_PENTIUMPRO:
10026 memory = get_attr_memory (insn);
10027 dep_memory = get_attr_memory (dep_insn);
10029 /* Since we can't represent delayed latencies of load+operation,
10030 increase the cost here for non-imov insns. */
10031 if (dep_insn_type != TYPE_IMOV
10032 && dep_insn_type != TYPE_FMOV
10033 && (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH))
10034 cost += 1;
10036 /* INT->FP conversion is expensive. */
10037 if (get_attr_fp_int_src (dep_insn))
10038 cost += 5;
10040 /* There is one cycle extra latency between an FP op and a store. */
10041 if (insn_type == TYPE_FMOV
10042 && (set = single_set (dep_insn)) != NULL_RTX
10043 && (set2 = single_set (insn)) != NULL_RTX
10044 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
10045 && GET_CODE (SET_DEST (set2)) == MEM)
10046 cost += 1;
10048 /* Show ability of reorder buffer to hide latency of load by executing
10049 in parallel with previous instruction in case
10050 previous instruction is not needed to compute the address. */
10051 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
10052 && !ix86_agi_dependant (insn, dep_insn, insn_type))
10054 /* Claim moves to take one cycle, as core can issue one load
10055 at time and the next load can start cycle later. */
10056 if (dep_insn_type == TYPE_IMOV
10057 || dep_insn_type == TYPE_FMOV)
10058 cost = 1;
10059 else if (cost > 1)
10060 cost--;
10062 break;
10064 case PROCESSOR_K6:
10065 memory = get_attr_memory (insn);
10066 dep_memory = get_attr_memory (dep_insn);
10067 /* The esp dependency is resolved before the instruction is really
10068 finished. */
10069 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
10070 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
10071 return 1;
10073 /* Since we can't represent delayed latencies of load+operation,
10074 increase the cost here for non-imov insns. */
10075 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
10076 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
10078 /* INT->FP conversion is expensive. */
10079 if (get_attr_fp_int_src (dep_insn))
10080 cost += 5;
10082 /* Show ability of reorder buffer to hide latency of load by executing
10083 in parallel with previous instruction in case
10084 previous instruction is not needed to compute the address. */
10085 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
10086 && !ix86_agi_dependant (insn, dep_insn, insn_type))
10088 /* Claim moves to take one cycle, as core can issue one load
10089 at time and the next load can start cycle later. */
10090 if (dep_insn_type == TYPE_IMOV
10091 || dep_insn_type == TYPE_FMOV)
10092 cost = 1;
10093 else if (cost > 2)
10094 cost -= 2;
10095 else
10096 cost = 1;
10098 break;
10100 case PROCESSOR_ATHLON:
10101 memory = get_attr_memory (insn);
10102 dep_memory = get_attr_memory (dep_insn);
10104 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
10106 if (dep_insn_type == TYPE_IMOV || dep_insn_type == TYPE_FMOV)
10107 cost += 2;
10108 else
10109 cost += 3;
10111 /* Show ability of reorder buffer to hide latency of load by executing
10112 in parallel with previous instruction in case
10113 previous instruction is not needed to compute the address. */
10114 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
10115 && !ix86_agi_dependant (insn, dep_insn, insn_type))
10117 /* Claim moves to take one cycle, as core can issue one load
10118 at time and the next load can start cycle later. */
10119 if (dep_insn_type == TYPE_IMOV
10120 || dep_insn_type == TYPE_FMOV)
10121 cost = 0;
10122 else if (cost >= 3)
10123 cost -= 3;
10124 else
10125 cost = 0;
10128 default:
10129 break;
10132 return cost;
10135 static union
10137 struct ppro_sched_data
10139 rtx decode[3];
10140 int issued_this_cycle;
10141 } ppro;
10142 } ix86_sched_data;
10144 static int
10145 ix86_safe_length (insn)
10146 rtx insn;
10148 if (recog_memoized (insn) >= 0)
10149 return get_attr_length(insn);
10150 else
10151 return 128;
10154 static int
10155 ix86_safe_length_prefix (insn)
10156 rtx insn;
10158 if (recog_memoized (insn) >= 0)
10159 return get_attr_length(insn);
10160 else
10161 return 0;
10164 static enum attr_memory
10165 ix86_safe_memory (insn)
10166 rtx insn;
10168 if (recog_memoized (insn) >= 0)
10169 return get_attr_memory(insn);
10170 else
10171 return MEMORY_UNKNOWN;
10174 static enum attr_pent_pair
10175 ix86_safe_pent_pair (insn)
10176 rtx insn;
10178 if (recog_memoized (insn) >= 0)
10179 return get_attr_pent_pair(insn);
10180 else
10181 return PENT_PAIR_NP;
10184 static enum attr_ppro_uops
10185 ix86_safe_ppro_uops (insn)
10186 rtx insn;
10188 if (recog_memoized (insn) >= 0)
10189 return get_attr_ppro_uops (insn);
10190 else
10191 return PPRO_UOPS_MANY;
10194 static void
10195 ix86_dump_ppro_packet (dump)
10196 FILE *dump;
10198 if (ix86_sched_data.ppro.decode[0])
10200 fprintf (dump, "PPRO packet: %d",
10201 INSN_UID (ix86_sched_data.ppro.decode[0]));
10202 if (ix86_sched_data.ppro.decode[1])
10203 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
10204 if (ix86_sched_data.ppro.decode[2])
10205 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
10206 fputc ('\n', dump);
10210 /* We're beginning a new block. Initialize data structures as necessary. */
10212 static void
10213 ix86_sched_init (dump, sched_verbose, veclen)
10214 FILE *dump ATTRIBUTE_UNUSED;
10215 int sched_verbose ATTRIBUTE_UNUSED;
10216 int veclen ATTRIBUTE_UNUSED;
10218 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
10221 /* Shift INSN to SLOT, and shift everything else down. */
10223 static void
10224 ix86_reorder_insn (insnp, slot)
10225 rtx *insnp, *slot;
10227 if (insnp != slot)
10229 rtx insn = *insnp;
10231 insnp[0] = insnp[1];
10232 while (++insnp != slot);
10233 *insnp = insn;
10237 /* Find an instruction with given pairability and minimal amount of cycles
10238 lost by the fact that the CPU waits for both pipelines to finish before
10239 reading next instructions. Also take care that both instructions together
10240 can not exceed 7 bytes. */
10242 static rtx *
10243 ix86_pent_find_pair (e_ready, ready, type, first)
10244 rtx *e_ready;
10245 rtx *ready;
10246 enum attr_pent_pair type;
10247 rtx first;
10249 int mincycles, cycles;
10250 enum attr_pent_pair tmp;
10251 enum attr_memory memory;
10252 rtx *insnp, *bestinsnp = NULL;
10254 if (ix86_safe_length (first) > 7 + ix86_safe_length_prefix (first))
10255 return NULL;
10257 memory = ix86_safe_memory (first);
10258 cycles = result_ready_cost (first);
10259 mincycles = INT_MAX;
10261 for (insnp = e_ready; insnp >= ready && mincycles; --insnp)
10262 if ((tmp = ix86_safe_pent_pair (*insnp)) == type
10263 && ix86_safe_length (*insnp) <= 7 + ix86_safe_length_prefix (*insnp))
10265 enum attr_memory second_memory;
10266 int secondcycles, currentcycles;
10268 second_memory = ix86_safe_memory (*insnp);
10269 secondcycles = result_ready_cost (*insnp);
10270 currentcycles = abs (cycles - secondcycles);
10272 if (secondcycles >= 1 && cycles >= 1)
10274 /* Two read/modify/write instructions together takes two
10275 cycles longer. */
10276 if (memory == MEMORY_BOTH && second_memory == MEMORY_BOTH)
10277 currentcycles += 2;
10279 /* Read modify/write instruction followed by read/modify
10280 takes one cycle longer. */
10281 if (memory == MEMORY_BOTH && second_memory == MEMORY_LOAD
10282 && tmp != PENT_PAIR_UV
10283 && ix86_safe_pent_pair (first) != PENT_PAIR_UV)
10284 currentcycles += 1;
10286 if (currentcycles < mincycles)
10287 bestinsnp = insnp, mincycles = currentcycles;
10290 return bestinsnp;
10293 /* Subroutines of ix86_sched_reorder. */
10295 static void
10296 ix86_sched_reorder_pentium (ready, e_ready)
10297 rtx *ready;
10298 rtx *e_ready;
10300 enum attr_pent_pair pair1, pair2;
10301 rtx *insnp;
10303 /* This wouldn't be necessary if Haifa knew that static insn ordering
10304 is important to which pipe an insn is issued to. So we have to make
10305 some minor rearrangements. */
10307 pair1 = ix86_safe_pent_pair (*e_ready);
10309 /* If the first insn is non-pairable, let it be. */
10310 if (pair1 == PENT_PAIR_NP)
10311 return;
10313 pair2 = PENT_PAIR_NP;
10314 insnp = 0;
10316 /* If the first insn is UV or PV pairable, search for a PU
10317 insn to go with. */
10318 if (pair1 == PENT_PAIR_UV || pair1 == PENT_PAIR_PV)
10320 insnp = ix86_pent_find_pair (e_ready-1, ready,
10321 PENT_PAIR_PU, *e_ready);
10322 if (insnp)
10323 pair2 = PENT_PAIR_PU;
10326 /* If the first insn is PU or UV pairable, search for a PV
10327 insn to go with. */
10328 if (pair2 == PENT_PAIR_NP
10329 && (pair1 == PENT_PAIR_PU || pair1 == PENT_PAIR_UV))
10331 insnp = ix86_pent_find_pair (e_ready-1, ready,
10332 PENT_PAIR_PV, *e_ready);
10333 if (insnp)
10334 pair2 = PENT_PAIR_PV;
10337 /* If the first insn is pairable, search for a UV
10338 insn to go with. */
10339 if (pair2 == PENT_PAIR_NP)
10341 insnp = ix86_pent_find_pair (e_ready-1, ready,
10342 PENT_PAIR_UV, *e_ready);
10343 if (insnp)
10344 pair2 = PENT_PAIR_UV;
10347 if (pair2 == PENT_PAIR_NP)
10348 return;
10350 /* Found something! Decide if we need to swap the order. */
10351 if (pair1 == PENT_PAIR_PV || pair2 == PENT_PAIR_PU
10352 || (pair1 == PENT_PAIR_UV && pair2 == PENT_PAIR_UV
10353 && ix86_safe_memory (*e_ready) == MEMORY_BOTH
10354 && ix86_safe_memory (*insnp) == MEMORY_LOAD))
10355 ix86_reorder_insn (insnp, e_ready);
10356 else
10357 ix86_reorder_insn (insnp, e_ready - 1);
10360 static void
10361 ix86_sched_reorder_ppro (ready, e_ready)
10362 rtx *ready;
10363 rtx *e_ready;
10365 rtx decode[3];
10366 enum attr_ppro_uops cur_uops;
10367 int issued_this_cycle;
10368 rtx *insnp;
10369 int i;
10371 /* At this point .ppro.decode contains the state of the three
10372 decoders from last "cycle". That is, those insns that were
10373 actually independent. But here we're scheduling for the
10374 decoder, and we may find things that are decodable in the
10375 same cycle. */
10377 memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode));
10378 issued_this_cycle = 0;
10380 insnp = e_ready;
10381 cur_uops = ix86_safe_ppro_uops (*insnp);
10383 /* If the decoders are empty, and we've a complex insn at the
10384 head of the priority queue, let it issue without complaint. */
10385 if (decode[0] == NULL)
10387 if (cur_uops == PPRO_UOPS_MANY)
10389 decode[0] = *insnp;
10390 goto ppro_done;
10393 /* Otherwise, search for a 2-4 uop unsn to issue. */
10394 while (cur_uops != PPRO_UOPS_FEW)
10396 if (insnp == ready)
10397 break;
10398 cur_uops = ix86_safe_ppro_uops (*--insnp);
10401 /* If so, move it to the head of the line. */
10402 if (cur_uops == PPRO_UOPS_FEW)
10403 ix86_reorder_insn (insnp, e_ready);
10405 /* Issue the head of the queue. */
10406 issued_this_cycle = 1;
10407 decode[0] = *e_ready--;
10410 /* Look for simple insns to fill in the other two slots. */
10411 for (i = 1; i < 3; ++i)
10412 if (decode[i] == NULL)
10414 if (ready >= e_ready)
10415 goto ppro_done;
10417 insnp = e_ready;
10418 cur_uops = ix86_safe_ppro_uops (*insnp);
10419 while (cur_uops != PPRO_UOPS_ONE)
10421 if (insnp == ready)
10422 break;
10423 cur_uops = ix86_safe_ppro_uops (*--insnp);
10426 /* Found one. Move it to the head of the queue and issue it. */
10427 if (cur_uops == PPRO_UOPS_ONE)
10429 ix86_reorder_insn (insnp, e_ready);
10430 decode[i] = *e_ready--;
10431 issued_this_cycle++;
10432 continue;
10435 /* ??? Didn't find one. Ideally, here we would do a lazy split
10436 of 2-uop insns, issue one and queue the other. */
10439 ppro_done:
10440 if (issued_this_cycle == 0)
10441 issued_this_cycle = 1;
10442 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
10445 /* We are about to being issuing insns for this clock cycle.
10446 Override the default sort algorithm to better slot instructions. */
10447 static int
10448 ix86_sched_reorder (dump, sched_verbose, ready, n_readyp, clock_var)
10449 FILE *dump ATTRIBUTE_UNUSED;
10450 int sched_verbose ATTRIBUTE_UNUSED;
10451 rtx *ready;
10452 int *n_readyp;
10453 int clock_var ATTRIBUTE_UNUSED;
10455 int n_ready = *n_readyp;
10456 rtx *e_ready = ready + n_ready - 1;
10458 if (n_ready < 2)
10459 goto out;
10461 switch (ix86_cpu)
10463 default:
10464 break;
10466 case PROCESSOR_PENTIUM:
10467 ix86_sched_reorder_pentium (ready, e_ready);
10468 break;
10470 case PROCESSOR_PENTIUMPRO:
10471 ix86_sched_reorder_ppro (ready, e_ready);
10472 break;
10475 out:
10476 return ix86_issue_rate ();
10479 /* We are about to issue INSN. Return the number of insns left on the
10480 ready queue that can be issued this cycle. */
10482 static int
10483 ix86_variable_issue (dump, sched_verbose, insn, can_issue_more)
10484 FILE *dump;
10485 int sched_verbose;
10486 rtx insn;
10487 int can_issue_more;
10489 int i;
10490 switch (ix86_cpu)
10492 default:
10493 return can_issue_more - 1;
10495 case PROCESSOR_PENTIUMPRO:
10497 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
10499 if (uops == PPRO_UOPS_MANY)
10501 if (sched_verbose)
10502 ix86_dump_ppro_packet (dump);
10503 ix86_sched_data.ppro.decode[0] = insn;
10504 ix86_sched_data.ppro.decode[1] = NULL;
10505 ix86_sched_data.ppro.decode[2] = NULL;
10506 if (sched_verbose)
10507 ix86_dump_ppro_packet (dump);
10508 ix86_sched_data.ppro.decode[0] = NULL;
10510 else if (uops == PPRO_UOPS_FEW)
10512 if (sched_verbose)
10513 ix86_dump_ppro_packet (dump);
10514 ix86_sched_data.ppro.decode[0] = insn;
10515 ix86_sched_data.ppro.decode[1] = NULL;
10516 ix86_sched_data.ppro.decode[2] = NULL;
10518 else
10520 for (i = 0; i < 3; ++i)
10521 if (ix86_sched_data.ppro.decode[i] == NULL)
10523 ix86_sched_data.ppro.decode[i] = insn;
10524 break;
10526 if (i == 3)
10527 abort ();
10528 if (i == 2)
10530 if (sched_verbose)
10531 ix86_dump_ppro_packet (dump);
10532 ix86_sched_data.ppro.decode[0] = NULL;
10533 ix86_sched_data.ppro.decode[1] = NULL;
10534 ix86_sched_data.ppro.decode[2] = NULL;
10538 return --ix86_sched_data.ppro.issued_this_cycle;
10542 /* Walk through INSNS and look for MEM references whose address is DSTREG or
10543 SRCREG and set the memory attribute to those of DSTREF and SRCREF, as
10544 appropriate. */
10546 void
10547 ix86_set_move_mem_attrs (insns, dstref, srcref, dstreg, srcreg)
10548 rtx insns;
10549 rtx dstref, srcref, dstreg, srcreg;
10551 rtx insn;
10553 for (insn = insns; insn != 0 ; insn = NEXT_INSN (insn))
10554 if (INSN_P (insn))
10555 ix86_set_move_mem_attrs_1 (PATTERN (insn), dstref, srcref,
10556 dstreg, srcreg);
10559 /* Subroutine of above to actually do the updating by recursively walking
10560 the rtx. */
10562 static void
10563 ix86_set_move_mem_attrs_1 (x, dstref, srcref, dstreg, srcreg)
10564 rtx x;
10565 rtx dstref, srcref, dstreg, srcreg;
10567 enum rtx_code code = GET_CODE (x);
10568 const char *format_ptr = GET_RTX_FORMAT (code);
10569 int i, j;
10571 if (code == MEM && XEXP (x, 0) == dstreg)
10572 MEM_COPY_ATTRIBUTES (x, dstref);
10573 else if (code == MEM && XEXP (x, 0) == srcreg)
10574 MEM_COPY_ATTRIBUTES (x, srcref);
10576 for (i = 0; i < GET_RTX_LENGTH (code); i++, format_ptr++)
10578 if (*format_ptr == 'e')
10579 ix86_set_move_mem_attrs_1 (XEXP (x, i), dstref, srcref,
10580 dstreg, srcreg);
10581 else if (*format_ptr == 'E')
10582 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
10583 ix86_set_move_mem_attrs_1 (XVECEXP (x, i, j), dstref, srcref,
10584 dstreg, srcreg);
10588 /* Compute the alignment given to a constant that is being placed in memory.
10589 EXP is the constant and ALIGN is the alignment that the object would
10590 ordinarily have.
10591 The value of this function is used instead of that alignment to align
10592 the object. */
10595 ix86_constant_alignment (exp, align)
10596 tree exp;
10597 int align;
10599 if (TREE_CODE (exp) == REAL_CST)
10601 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
10602 return 64;
10603 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
10604 return 128;
10606 else if (TREE_CODE (exp) == STRING_CST && TREE_STRING_LENGTH (exp) >= 31
10607 && align < 256)
10608 return 256;
10610 return align;
10613 /* Compute the alignment for a static variable.
10614 TYPE is the data type, and ALIGN is the alignment that
10615 the object would ordinarily have. The value of this function is used
10616 instead of that alignment to align the object. */
10619 ix86_data_alignment (type, align)
10620 tree type;
10621 int align;
10623 if (AGGREGATE_TYPE_P (type)
10624 && TYPE_SIZE (type)
10625 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
10626 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
10627 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
10628 return 256;
10630 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
10631 to 16byte boundary. */
10632 if (TARGET_64BIT)
10634 if (AGGREGATE_TYPE_P (type)
10635 && TYPE_SIZE (type)
10636 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
10637 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
10638 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
10639 return 128;
10642 if (TREE_CODE (type) == ARRAY_TYPE)
10644 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
10645 return 64;
10646 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
10647 return 128;
10649 else if (TREE_CODE (type) == COMPLEX_TYPE)
10652 if (TYPE_MODE (type) == DCmode && align < 64)
10653 return 64;
10654 if (TYPE_MODE (type) == XCmode && align < 128)
10655 return 128;
10657 else if ((TREE_CODE (type) == RECORD_TYPE
10658 || TREE_CODE (type) == UNION_TYPE
10659 || TREE_CODE (type) == QUAL_UNION_TYPE)
10660 && TYPE_FIELDS (type))
10662 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
10663 return 64;
10664 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
10665 return 128;
10667 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
10668 || TREE_CODE (type) == INTEGER_TYPE)
10670 if (TYPE_MODE (type) == DFmode && align < 64)
10671 return 64;
10672 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
10673 return 128;
10676 return align;
10679 /* Compute the alignment for a local variable.
10680 TYPE is the data type, and ALIGN is the alignment that
10681 the object would ordinarily have. The value of this macro is used
10682 instead of that alignment to align the object. */
10685 ix86_local_alignment (type, align)
10686 tree type;
10687 int align;
10689 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
10690 to 16byte boundary. */
10691 if (TARGET_64BIT)
10693 if (AGGREGATE_TYPE_P (type)
10694 && TYPE_SIZE (type)
10695 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
10696 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
10697 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
10698 return 128;
10700 if (TREE_CODE (type) == ARRAY_TYPE)
10702 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
10703 return 64;
10704 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
10705 return 128;
10707 else if (TREE_CODE (type) == COMPLEX_TYPE)
10709 if (TYPE_MODE (type) == DCmode && align < 64)
10710 return 64;
10711 if (TYPE_MODE (type) == XCmode && align < 128)
10712 return 128;
10714 else if ((TREE_CODE (type) == RECORD_TYPE
10715 || TREE_CODE (type) == UNION_TYPE
10716 || TREE_CODE (type) == QUAL_UNION_TYPE)
10717 && TYPE_FIELDS (type))
10719 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
10720 return 64;
10721 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
10722 return 128;
10724 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
10725 || TREE_CODE (type) == INTEGER_TYPE)
10728 if (TYPE_MODE (type) == DFmode && align < 64)
10729 return 64;
10730 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
10731 return 128;
10733 return align;
10736 /* Emit RTL insns to initialize the variable parts of a trampoline.
10737 FNADDR is an RTX for the address of the function's pure code.
10738 CXT is an RTX for the static chain value for the function. */
10739 void
10740 x86_initialize_trampoline (tramp, fnaddr, cxt)
10741 rtx tramp, fnaddr, cxt;
10743 if (!TARGET_64BIT)
10745 /* Compute offset from the end of the jmp to the target function. */
10746 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
10747 plus_constant (tramp, 10),
10748 NULL_RTX, 1, OPTAB_DIRECT);
10749 emit_move_insn (gen_rtx_MEM (QImode, tramp),
10750 GEN_INT (trunc_int_for_mode (0xb9, QImode)));
10751 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
10752 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
10753 GEN_INT (trunc_int_for_mode (0xe9, QImode)));
10754 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
10756 else
10758 int offset = 0;
10759 /* Try to load address using shorter movl instead of movabs.
10760 We may want to support movq for kernel mode, but kernel does not use
10761 trampolines at the moment. */
10762 if (x86_64_zero_extended_value (fnaddr))
10764 fnaddr = copy_to_mode_reg (DImode, fnaddr);
10765 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
10766 GEN_INT (trunc_int_for_mode (0xbb41, HImode)));
10767 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
10768 gen_lowpart (SImode, fnaddr));
10769 offset += 6;
10771 else
10773 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
10774 GEN_INT (trunc_int_for_mode (0xbb49, HImode)));
10775 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
10776 fnaddr);
10777 offset += 10;
10779 /* Load static chain using movabs to r10. */
10780 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
10781 GEN_INT (trunc_int_for_mode (0xba49, HImode)));
10782 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
10783 cxt);
10784 offset += 10;
10785 /* Jump to the r11 */
10786 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
10787 GEN_INT (trunc_int_for_mode (0xff49, HImode)));
10788 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
10789 GEN_INT (trunc_int_for_mode (0xe3, QImode)));
10790 offset += 3;
10791 if (offset > TRAMPOLINE_SIZE)
10792 abort();
10796 #define def_builtin(MASK, NAME, TYPE, CODE) \
10797 do { \
10798 if ((MASK) & target_flags) \
10799 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, NULL); \
10800 } while (0)
10802 struct builtin_description
10804 const unsigned int mask;
10805 const enum insn_code icode;
10806 const char *const name;
10807 const enum ix86_builtins code;
10808 const enum rtx_code comparison;
10809 const unsigned int flag;
10812 static const struct builtin_description bdesc_comi[] =
10814 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, EQ, 0 },
10815 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, LT, 0 },
10816 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, LE, 0 },
10817 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, LT, 1 },
10818 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, LE, 1 },
10819 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, NE, 0 },
10820 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, EQ, 0 },
10821 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, LT, 0 },
10822 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, LE, 0 },
10823 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, LT, 1 },
10824 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, LE, 1 },
10825 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, NE, 0 }
10828 static const struct builtin_description bdesc_2arg[] =
10830 /* SSE */
10831 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
10832 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
10833 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
10834 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
10835 { MASK_SSE, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
10836 { MASK_SSE, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
10837 { MASK_SSE, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
10838 { MASK_SSE, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
10840 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
10841 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
10842 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
10843 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
10844 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
10845 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
10846 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
10847 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
10848 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
10849 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
10850 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
10851 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
10852 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
10853 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
10854 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
10855 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgtss", IX86_BUILTIN_CMPGTSS, LT, 1 },
10856 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgess", IX86_BUILTIN_CMPGESS, LE, 1 },
10857 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
10858 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
10859 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
10860 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
10861 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, LT, 1 },
10862 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, LE, 1 },
10863 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
10865 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
10866 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
10867 { MASK_SSE, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
10868 { MASK_SSE, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
10870 { MASK_SSE, CODE_FOR_sse_andti3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
10871 { MASK_SSE, CODE_FOR_sse_nandti3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
10872 { MASK_SSE, CODE_FOR_sse_iorti3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
10873 { MASK_SSE, CODE_FOR_sse_xorti3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
10875 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
10876 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
10877 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
10878 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
10879 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
10881 /* MMX */
10882 { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
10883 { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
10884 { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
10885 { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
10886 { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
10887 { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
10889 { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
10890 { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
10891 { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
10892 { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
10893 { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
10894 { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
10895 { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
10896 { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
10898 { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
10899 { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
10900 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
10902 { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
10903 { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
10904 { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
10905 { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
10907 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
10908 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
10910 { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
10911 { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
10912 { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
10913 { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
10914 { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
10915 { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
10917 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
10918 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
10919 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
10920 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
10922 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
10923 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
10924 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
10925 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
10926 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
10927 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
10929 /* Special. */
10930 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
10931 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
10932 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
10934 { MASK_SSE, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
10935 { MASK_SSE, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
10937 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
10938 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
10939 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
10940 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
10941 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
10942 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
10944 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
10945 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
10946 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
10947 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
10948 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
10949 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
10951 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
10952 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
10953 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
10954 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
10956 { MASK_SSE, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
10957 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 }
10961 static const struct builtin_description bdesc_1arg[] =
10963 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
10964 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
10966 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
10967 { MASK_SSE, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
10968 { MASK_SSE, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
10970 { MASK_SSE, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
10971 { MASK_SSE, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
10972 { MASK_SSE, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
10973 { MASK_SSE, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 }
10977 void
10978 ix86_init_builtins ()
10980 if (TARGET_MMX)
10981 ix86_init_mmx_sse_builtins ();
10984 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
10985 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
10986 builtins. */
10987 void
10988 ix86_init_mmx_sse_builtins ()
10990 const struct builtin_description * d;
10991 size_t i;
10992 tree endlink = void_list_node;
10994 tree pchar_type_node = build_pointer_type (char_type_node);
10995 tree pfloat_type_node = build_pointer_type (float_type_node);
10996 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
10997 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
10999 /* Comparisons. */
11000 tree int_ftype_v4sf_v4sf
11001 = build_function_type (integer_type_node,
11002 tree_cons (NULL_TREE, V4SF_type_node,
11003 tree_cons (NULL_TREE,
11004 V4SF_type_node,
11005 endlink)));
11006 tree v4si_ftype_v4sf_v4sf
11007 = build_function_type (V4SI_type_node,
11008 tree_cons (NULL_TREE, V4SF_type_node,
11009 tree_cons (NULL_TREE,
11010 V4SF_type_node,
11011 endlink)));
11012 /* MMX/SSE/integer conversions. */
11013 tree int_ftype_v4sf
11014 = build_function_type (integer_type_node,
11015 tree_cons (NULL_TREE, V4SF_type_node,
11016 endlink));
11017 tree int_ftype_v8qi
11018 = build_function_type (integer_type_node,
11019 tree_cons (NULL_TREE, V8QI_type_node,
11020 endlink));
11021 tree int_ftype_v2si
11022 = build_function_type (integer_type_node,
11023 tree_cons (NULL_TREE, V2SI_type_node,
11024 endlink));
11025 tree v2si_ftype_int
11026 = build_function_type (V2SI_type_node,
11027 tree_cons (NULL_TREE, integer_type_node,
11028 endlink));
11029 tree v4sf_ftype_v4sf_int
11030 = build_function_type (V4SF_type_node,
11031 tree_cons (NULL_TREE, V4SF_type_node,
11032 tree_cons (NULL_TREE, integer_type_node,
11033 endlink)));
11034 tree v4sf_ftype_v4sf_v2si
11035 = build_function_type (V4SF_type_node,
11036 tree_cons (NULL_TREE, V4SF_type_node,
11037 tree_cons (NULL_TREE, V2SI_type_node,
11038 endlink)));
11039 tree int_ftype_v4hi_int
11040 = build_function_type (integer_type_node,
11041 tree_cons (NULL_TREE, V4HI_type_node,
11042 tree_cons (NULL_TREE, integer_type_node,
11043 endlink)));
11044 tree v4hi_ftype_v4hi_int_int
11045 = build_function_type (V4HI_type_node,
11046 tree_cons (NULL_TREE, V4HI_type_node,
11047 tree_cons (NULL_TREE, integer_type_node,
11048 tree_cons (NULL_TREE,
11049 integer_type_node,
11050 endlink))));
11051 /* Miscellaneous. */
11052 tree v8qi_ftype_v4hi_v4hi
11053 = build_function_type (V8QI_type_node,
11054 tree_cons (NULL_TREE, V4HI_type_node,
11055 tree_cons (NULL_TREE, V4HI_type_node,
11056 endlink)));
11057 tree v4hi_ftype_v2si_v2si
11058 = build_function_type (V4HI_type_node,
11059 tree_cons (NULL_TREE, V2SI_type_node,
11060 tree_cons (NULL_TREE, V2SI_type_node,
11061 endlink)));
11062 tree v4sf_ftype_v4sf_v4sf_int
11063 = build_function_type (V4SF_type_node,
11064 tree_cons (NULL_TREE, V4SF_type_node,
11065 tree_cons (NULL_TREE, V4SF_type_node,
11066 tree_cons (NULL_TREE,
11067 integer_type_node,
11068 endlink))));
11069 tree v4hi_ftype_v8qi_v8qi
11070 = build_function_type (V4HI_type_node,
11071 tree_cons (NULL_TREE, V8QI_type_node,
11072 tree_cons (NULL_TREE, V8QI_type_node,
11073 endlink)));
11074 tree v2si_ftype_v4hi_v4hi
11075 = build_function_type (V2SI_type_node,
11076 tree_cons (NULL_TREE, V4HI_type_node,
11077 tree_cons (NULL_TREE, V4HI_type_node,
11078 endlink)));
11079 tree v4hi_ftype_v4hi_int
11080 = build_function_type (V4HI_type_node,
11081 tree_cons (NULL_TREE, V4HI_type_node,
11082 tree_cons (NULL_TREE, integer_type_node,
11083 endlink)));
11084 tree v4hi_ftype_v4hi_di
11085 = build_function_type (V4HI_type_node,
11086 tree_cons (NULL_TREE, V4HI_type_node,
11087 tree_cons (NULL_TREE,
11088 long_long_integer_type_node,
11089 endlink)));
11090 tree v2si_ftype_v2si_di
11091 = build_function_type (V2SI_type_node,
11092 tree_cons (NULL_TREE, V2SI_type_node,
11093 tree_cons (NULL_TREE,
11094 long_long_integer_type_node,
11095 endlink)));
11096 tree void_ftype_void
11097 = build_function_type (void_type_node, endlink);
11098 tree void_ftype_pchar_int
11099 = build_function_type (void_type_node,
11100 tree_cons (NULL_TREE, pchar_type_node,
11101 tree_cons (NULL_TREE, integer_type_node,
11102 endlink)));
11103 tree void_ftype_unsigned
11104 = build_function_type (void_type_node,
11105 tree_cons (NULL_TREE, unsigned_type_node,
11106 endlink));
11107 tree unsigned_ftype_void
11108 = build_function_type (unsigned_type_node, endlink);
11109 tree di_ftype_void
11110 = build_function_type (long_long_unsigned_type_node, endlink);
11111 tree ti_ftype_void
11112 = build_function_type (intTI_type_node, endlink);
11113 tree v2si_ftype_v4sf
11114 = build_function_type (V2SI_type_node,
11115 tree_cons (NULL_TREE, V4SF_type_node,
11116 endlink));
11117 /* Loads/stores. */
11118 tree maskmovq_args = tree_cons (NULL_TREE, V8QI_type_node,
11119 tree_cons (NULL_TREE, V8QI_type_node,
11120 tree_cons (NULL_TREE,
11121 pchar_type_node,
11122 endlink)));
11123 tree void_ftype_v8qi_v8qi_pchar
11124 = build_function_type (void_type_node, maskmovq_args);
11125 tree v4sf_ftype_pfloat
11126 = build_function_type (V4SF_type_node,
11127 tree_cons (NULL_TREE, pfloat_type_node,
11128 endlink));
11129 tree v4sf_ftype_float
11130 = build_function_type (V4SF_type_node,
11131 tree_cons (NULL_TREE, float_type_node,
11132 endlink));
11133 tree v4sf_ftype_float_float_float_float
11134 = build_function_type (V4SF_type_node,
11135 tree_cons (NULL_TREE, float_type_node,
11136 tree_cons (NULL_TREE, float_type_node,
11137 tree_cons (NULL_TREE,
11138 float_type_node,
11139 tree_cons (NULL_TREE,
11140 float_type_node,
11141 endlink)))));
11142 /* @@@ the type is bogus */
11143 tree v4sf_ftype_v4sf_pv2si
11144 = build_function_type (V4SF_type_node,
11145 tree_cons (NULL_TREE, V4SF_type_node,
11146 tree_cons (NULL_TREE, pv2si_type_node,
11147 endlink)));
11148 tree void_ftype_pv2si_v4sf
11149 = build_function_type (void_type_node,
11150 tree_cons (NULL_TREE, pv2si_type_node,
11151 tree_cons (NULL_TREE, V4SF_type_node,
11152 endlink)));
11153 tree void_ftype_pfloat_v4sf
11154 = build_function_type (void_type_node,
11155 tree_cons (NULL_TREE, pfloat_type_node,
11156 tree_cons (NULL_TREE, V4SF_type_node,
11157 endlink)));
11158 tree void_ftype_pdi_di
11159 = build_function_type (void_type_node,
11160 tree_cons (NULL_TREE, pdi_type_node,
11161 tree_cons (NULL_TREE,
11162 long_long_unsigned_type_node,
11163 endlink)));
11164 /* Normal vector unops. */
11165 tree v4sf_ftype_v4sf
11166 = build_function_type (V4SF_type_node,
11167 tree_cons (NULL_TREE, V4SF_type_node,
11168 endlink));
11170 /* Normal vector binops. */
11171 tree v4sf_ftype_v4sf_v4sf
11172 = build_function_type (V4SF_type_node,
11173 tree_cons (NULL_TREE, V4SF_type_node,
11174 tree_cons (NULL_TREE, V4SF_type_node,
11175 endlink)));
11176 tree v8qi_ftype_v8qi_v8qi
11177 = build_function_type (V8QI_type_node,
11178 tree_cons (NULL_TREE, V8QI_type_node,
11179 tree_cons (NULL_TREE, V8QI_type_node,
11180 endlink)));
11181 tree v4hi_ftype_v4hi_v4hi
11182 = build_function_type (V4HI_type_node,
11183 tree_cons (NULL_TREE, V4HI_type_node,
11184 tree_cons (NULL_TREE, V4HI_type_node,
11185 endlink)));
11186 tree v2si_ftype_v2si_v2si
11187 = build_function_type (V2SI_type_node,
11188 tree_cons (NULL_TREE, V2SI_type_node,
11189 tree_cons (NULL_TREE, V2SI_type_node,
11190 endlink)));
11191 tree ti_ftype_ti_ti
11192 = build_function_type (intTI_type_node,
11193 tree_cons (NULL_TREE, intTI_type_node,
11194 tree_cons (NULL_TREE, intTI_type_node,
11195 endlink)));
11196 tree di_ftype_di_di
11197 = build_function_type (long_long_unsigned_type_node,
11198 tree_cons (NULL_TREE, long_long_unsigned_type_node,
11199 tree_cons (NULL_TREE,
11200 long_long_unsigned_type_node,
11201 endlink)));
11203 tree v2si_ftype_v2sf
11204 = build_function_type (V2SI_type_node,
11205 tree_cons (NULL_TREE, V2SF_type_node,
11206 endlink));
11207 tree v2sf_ftype_v2si
11208 = build_function_type (V2SF_type_node,
11209 tree_cons (NULL_TREE, V2SI_type_node,
11210 endlink));
11211 tree v2si_ftype_v2si
11212 = build_function_type (V2SI_type_node,
11213 tree_cons (NULL_TREE, V2SI_type_node,
11214 endlink));
11215 tree v2sf_ftype_v2sf
11216 = build_function_type (V2SF_type_node,
11217 tree_cons (NULL_TREE, V2SF_type_node,
11218 endlink));
11219 tree v2sf_ftype_v2sf_v2sf
11220 = build_function_type (V2SF_type_node,
11221 tree_cons (NULL_TREE, V2SF_type_node,
11222 tree_cons (NULL_TREE,
11223 V2SF_type_node,
11224 endlink)));
11225 tree v2si_ftype_v2sf_v2sf
11226 = build_function_type (V2SI_type_node,
11227 tree_cons (NULL_TREE, V2SF_type_node,
11228 tree_cons (NULL_TREE,
11229 V2SF_type_node,
11230 endlink)));
11232 tree void_ftype_pchar
11233 = build_function_type (void_type_node,
11234 tree_cons (NULL_TREE, pchar_type_node,
11235 endlink));
11237 /* Add all builtins that are more or less simple operations on two
11238 operands. */
11239 for (i = 0, d = bdesc_2arg; i < sizeof (bdesc_2arg) / sizeof *d; i++, d++)
11241 /* Use one of the operands; the target can have a different mode for
11242 mask-generating compares. */
11243 enum machine_mode mode;
11244 tree type;
11246 if (d->name == 0)
11247 continue;
11248 mode = insn_data[d->icode].operand[1].mode;
11250 switch (mode)
11252 case V4SFmode:
11253 type = v4sf_ftype_v4sf_v4sf;
11254 break;
11255 case V8QImode:
11256 type = v8qi_ftype_v8qi_v8qi;
11257 break;
11258 case V4HImode:
11259 type = v4hi_ftype_v4hi_v4hi;
11260 break;
11261 case V2SImode:
11262 type = v2si_ftype_v2si_v2si;
11263 break;
11264 case TImode:
11265 type = ti_ftype_ti_ti;
11266 break;
11267 case DImode:
11268 type = di_ftype_di_di;
11269 break;
11271 default:
11272 abort ();
11275 /* Override for comparisons. */
11276 if (d->icode == CODE_FOR_maskcmpv4sf3
11277 || d->icode == CODE_FOR_maskncmpv4sf3
11278 || d->icode == CODE_FOR_vmmaskcmpv4sf3
11279 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
11280 type = v4si_ftype_v4sf_v4sf;
11282 def_builtin (d->mask, d->name, type, d->code);
11285 /* Add the remaining MMX insns with somewhat more complicated types. */
11286 def_builtin (MASK_MMX, "__builtin_ia32_m_from_int", v2si_ftype_int, IX86_BUILTIN_M_FROM_INT);
11287 def_builtin (MASK_MMX, "__builtin_ia32_m_to_int", int_ftype_v2si, IX86_BUILTIN_M_TO_INT);
11288 def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
11289 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
11290 def_builtin (MASK_MMX, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
11291 def_builtin (MASK_MMX, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
11292 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
11293 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
11294 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
11296 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
11297 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
11298 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
11300 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
11301 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
11303 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
11304 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
11306 /* comi/ucomi insns. */
11307 for (i = 0, d = bdesc_comi; i < sizeof (bdesc_comi) / sizeof *d; i++, d++)
11308 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
11310 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
11311 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
11312 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
11314 def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
11315 def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
11316 def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
11317 def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
11318 def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
11319 def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
11321 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
11322 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
11324 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
11326 def_builtin (MASK_SSE, "__builtin_ia32_loadaps", v4sf_ftype_pfloat, IX86_BUILTIN_LOADAPS);
11327 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pfloat, IX86_BUILTIN_LOADUPS);
11328 def_builtin (MASK_SSE, "__builtin_ia32_loadss", v4sf_ftype_pfloat, IX86_BUILTIN_LOADSS);
11329 def_builtin (MASK_SSE, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
11330 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
11331 def_builtin (MASK_SSE, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
11333 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
11334 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
11335 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
11336 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
11338 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
11339 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
11340 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
11341 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
11343 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
11344 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_prefetch", void_ftype_pchar_int, IX86_BUILTIN_PREFETCH);
11346 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", v4hi_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
11348 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
11349 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
11350 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
11351 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
11352 def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
11353 def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
11355 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
11357 /* Original 3DNow! */
11358 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
11359 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
11360 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
11361 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
11362 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
11363 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
11364 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
11365 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
11366 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
11367 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
11368 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
11369 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
11370 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
11371 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
11372 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
11373 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
11374 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
11375 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
11376 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
11377 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
11378 def_builtin (MASK_3DNOW, "__builtin_ia32_prefetch_3dnow", void_ftype_pchar, IX86_BUILTIN_PREFETCH_3DNOW);
11379 def_builtin (MASK_3DNOW, "__builtin_ia32_prefetchw", void_ftype_pchar, IX86_BUILTIN_PREFETCHW);
11381 /* 3DNow! extension as used in the Athlon CPU. */
11382 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
11383 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
11384 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
11385 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
11386 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
11387 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
11389 /* Composite intrinsics. */
11390 def_builtin (MASK_SSE, "__builtin_ia32_setps1", v4sf_ftype_float, IX86_BUILTIN_SETPS1);
11391 def_builtin (MASK_SSE, "__builtin_ia32_setps", v4sf_ftype_float_float_float_float, IX86_BUILTIN_SETPS);
11392 def_builtin (MASK_SSE, "__builtin_ia32_setzerops", ti_ftype_void, IX86_BUILTIN_CLRPS);
11393 def_builtin (MASK_SSE, "__builtin_ia32_loadps1", v4sf_ftype_pfloat, IX86_BUILTIN_LOADPS1);
11394 def_builtin (MASK_SSE, "__builtin_ia32_loadrps", v4sf_ftype_pfloat, IX86_BUILTIN_LOADRPS);
11395 def_builtin (MASK_SSE, "__builtin_ia32_storeps1", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREPS1);
11396 def_builtin (MASK_SSE, "__builtin_ia32_storerps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORERPS);
11399 /* Errors in the source file can cause expand_expr to return const0_rtx
11400 where we expect a vector. To avoid crashing, use one of the vector
11401 clear instructions. */
11402 static rtx
11403 safe_vector_operand (x, mode)
11404 rtx x;
11405 enum machine_mode mode;
11407 if (x != const0_rtx)
11408 return x;
11409 x = gen_reg_rtx (mode);
11411 if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
11412 emit_insn (gen_mmx_clrdi (mode == DImode ? x
11413 : gen_rtx_SUBREG (DImode, x, 0)));
11414 else
11415 emit_insn (gen_sse_clrti (mode == TImode ? x
11416 : gen_rtx_SUBREG (TImode, x, 0)));
11417 return x;
11420 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
11422 static rtx
11423 ix86_expand_binop_builtin (icode, arglist, target)
11424 enum insn_code icode;
11425 tree arglist;
11426 rtx target;
11428 rtx pat;
11429 tree arg0 = TREE_VALUE (arglist);
11430 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11431 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11432 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11433 enum machine_mode tmode = insn_data[icode].operand[0].mode;
11434 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
11435 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
11437 if (VECTOR_MODE_P (mode0))
11438 op0 = safe_vector_operand (op0, mode0);
11439 if (VECTOR_MODE_P (mode1))
11440 op1 = safe_vector_operand (op1, mode1);
11442 if (! target
11443 || GET_MODE (target) != tmode
11444 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11445 target = gen_reg_rtx (tmode);
11447 /* In case the insn wants input operands in modes different from
11448 the result, abort. */
11449 if (GET_MODE (op0) != mode0 || GET_MODE (op1) != mode1)
11450 abort ();
11452 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11453 op0 = copy_to_mode_reg (mode0, op0);
11454 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
11455 op1 = copy_to_mode_reg (mode1, op1);
11457 pat = GEN_FCN (icode) (target, op0, op1);
11458 if (! pat)
11459 return 0;
11460 emit_insn (pat);
11461 return target;
11464 /* Subroutine of ix86_expand_builtin to take care of stores. */
11466 static rtx
11467 ix86_expand_store_builtin (icode, arglist, shuffle)
11468 enum insn_code icode;
11469 tree arglist;
11470 int shuffle;
11472 rtx pat;
11473 tree arg0 = TREE_VALUE (arglist);
11474 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11475 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11476 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11477 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
11478 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
11480 if (VECTOR_MODE_P (mode1))
11481 op1 = safe_vector_operand (op1, mode1);
11483 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
11484 if (shuffle >= 0 || ! (*insn_data[icode].operand[1].predicate) (op1, mode1))
11485 op1 = copy_to_mode_reg (mode1, op1);
11486 if (shuffle >= 0)
11487 emit_insn (gen_sse_shufps (op1, op1, op1, GEN_INT (shuffle)));
11488 pat = GEN_FCN (icode) (op0, op1);
11489 if (pat)
11490 emit_insn (pat);
11491 return 0;
11494 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
11496 static rtx
11497 ix86_expand_unop_builtin (icode, arglist, target, do_load)
11498 enum insn_code icode;
11499 tree arglist;
11500 rtx target;
11501 int do_load;
11503 rtx pat;
11504 tree arg0 = TREE_VALUE (arglist);
11505 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11506 enum machine_mode tmode = insn_data[icode].operand[0].mode;
11507 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
11509 if (! target
11510 || GET_MODE (target) != tmode
11511 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11512 target = gen_reg_rtx (tmode);
11513 if (do_load)
11514 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
11515 else
11517 if (VECTOR_MODE_P (mode0))
11518 op0 = safe_vector_operand (op0, mode0);
11520 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11521 op0 = copy_to_mode_reg (mode0, op0);
11524 pat = GEN_FCN (icode) (target, op0);
11525 if (! pat)
11526 return 0;
11527 emit_insn (pat);
11528 return target;
11531 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
11532 sqrtss, rsqrtss, rcpss. */
11534 static rtx
11535 ix86_expand_unop1_builtin (icode, arglist, target)
11536 enum insn_code icode;
11537 tree arglist;
11538 rtx target;
11540 rtx pat;
11541 tree arg0 = TREE_VALUE (arglist);
11542 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11543 enum machine_mode tmode = insn_data[icode].operand[0].mode;
11544 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
11546 if (! target
11547 || GET_MODE (target) != tmode
11548 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11549 target = gen_reg_rtx (tmode);
11551 if (VECTOR_MODE_P (mode0))
11552 op0 = safe_vector_operand (op0, mode0);
11554 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11555 op0 = copy_to_mode_reg (mode0, op0);
11557 pat = GEN_FCN (icode) (target, op0, op0);
11558 if (! pat)
11559 return 0;
11560 emit_insn (pat);
11561 return target;
11564 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
11566 static rtx
11567 ix86_expand_sse_compare (d, arglist, target)
11568 const struct builtin_description *d;
11569 tree arglist;
11570 rtx target;
11572 rtx pat;
11573 tree arg0 = TREE_VALUE (arglist);
11574 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11575 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11576 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11577 rtx op2;
11578 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
11579 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
11580 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
11581 enum rtx_code comparison = d->comparison;
11583 if (VECTOR_MODE_P (mode0))
11584 op0 = safe_vector_operand (op0, mode0);
11585 if (VECTOR_MODE_P (mode1))
11586 op1 = safe_vector_operand (op1, mode1);
11588 /* Swap operands if we have a comparison that isn't available in
11589 hardware. */
11590 if (d->flag)
11592 rtx tmp = gen_reg_rtx (mode1);
11593 emit_move_insn (tmp, op1);
11594 op1 = op0;
11595 op0 = tmp;
11598 if (! target
11599 || GET_MODE (target) != tmode
11600 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
11601 target = gen_reg_rtx (tmode);
11603 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
11604 op0 = copy_to_mode_reg (mode0, op0);
11605 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
11606 op1 = copy_to_mode_reg (mode1, op1);
11608 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
11609 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
11610 if (! pat)
11611 return 0;
11612 emit_insn (pat);
11613 return target;
11616 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
11618 static rtx
11619 ix86_expand_sse_comi (d, arglist, target)
11620 const struct builtin_description *d;
11621 tree arglist;
11622 rtx target;
11624 rtx pat;
11625 tree arg0 = TREE_VALUE (arglist);
11626 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11627 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11628 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11629 rtx op2;
11630 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
11631 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
11632 enum rtx_code comparison = d->comparison;
11634 if (VECTOR_MODE_P (mode0))
11635 op0 = safe_vector_operand (op0, mode0);
11636 if (VECTOR_MODE_P (mode1))
11637 op1 = safe_vector_operand (op1, mode1);
11639 /* Swap operands if we have a comparison that isn't available in
11640 hardware. */
11641 if (d->flag)
11643 rtx tmp = op1;
11644 op1 = op0;
11645 op0 = tmp;
11648 target = gen_reg_rtx (SImode);
11649 emit_move_insn (target, const0_rtx);
11650 target = gen_rtx_SUBREG (QImode, target, 0);
11652 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
11653 op0 = copy_to_mode_reg (mode0, op0);
11654 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
11655 op1 = copy_to_mode_reg (mode1, op1);
11657 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
11658 pat = GEN_FCN (d->icode) (op0, op1, op2);
11659 if (! pat)
11660 return 0;
11661 emit_insn (pat);
11662 emit_insn (gen_rtx_SET (VOIDmode,
11663 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
11664 gen_rtx_fmt_ee (comparison, QImode,
11665 gen_rtx_REG (CCmode, FLAGS_REG),
11666 const0_rtx)));
11668 return target;
11671 /* Expand an expression EXP that calls a built-in function,
11672 with result going to TARGET if that's convenient
11673 (and in mode MODE if that's convenient).
11674 SUBTARGET may be used as the target for computing one of EXP's operands.
11675 IGNORE is nonzero if the value is to be ignored. */
11678 ix86_expand_builtin (exp, target, subtarget, mode, ignore)
11679 tree exp;
11680 rtx target;
11681 rtx subtarget ATTRIBUTE_UNUSED;
11682 enum machine_mode mode ATTRIBUTE_UNUSED;
11683 int ignore ATTRIBUTE_UNUSED;
11685 const struct builtin_description *d;
11686 size_t i;
11687 enum insn_code icode;
11688 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
11689 tree arglist = TREE_OPERAND (exp, 1);
11690 tree arg0, arg1, arg2, arg3;
11691 rtx op0, op1, op2, pat;
11692 enum machine_mode tmode, mode0, mode1, mode2;
11693 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
11695 switch (fcode)
11697 case IX86_BUILTIN_EMMS:
11698 emit_insn (gen_emms ());
11699 return 0;
11701 case IX86_BUILTIN_SFENCE:
11702 emit_insn (gen_sfence ());
11703 return 0;
11705 case IX86_BUILTIN_M_FROM_INT:
11706 target = gen_reg_rtx (DImode);
11707 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
11708 emit_move_insn (gen_rtx_SUBREG (SImode, target, 0), op0);
11709 return target;
11711 case IX86_BUILTIN_M_TO_INT:
11712 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
11713 op0 = copy_to_mode_reg (DImode, op0);
11714 target = gen_reg_rtx (SImode);
11715 emit_move_insn (target, gen_rtx_SUBREG (SImode, op0, 0));
11716 return target;
11718 case IX86_BUILTIN_PEXTRW:
11719 icode = CODE_FOR_mmx_pextrw;
11720 arg0 = TREE_VALUE (arglist);
11721 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11722 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11723 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11724 tmode = insn_data[icode].operand[0].mode;
11725 mode0 = insn_data[icode].operand[1].mode;
11726 mode1 = insn_data[icode].operand[2].mode;
11728 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11729 op0 = copy_to_mode_reg (mode0, op0);
11730 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
11732 /* @@@ better error message */
11733 error ("selector must be an immediate");
11734 return const0_rtx;
11736 if (target == 0
11737 || GET_MODE (target) != tmode
11738 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11739 target = gen_reg_rtx (tmode);
11740 pat = GEN_FCN (icode) (target, op0, op1);
11741 if (! pat)
11742 return 0;
11743 emit_insn (pat);
11744 return target;
11746 case IX86_BUILTIN_PINSRW:
11747 icode = CODE_FOR_mmx_pinsrw;
11748 arg0 = TREE_VALUE (arglist);
11749 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11750 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
11751 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11752 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11753 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
11754 tmode = insn_data[icode].operand[0].mode;
11755 mode0 = insn_data[icode].operand[1].mode;
11756 mode1 = insn_data[icode].operand[2].mode;
11757 mode2 = insn_data[icode].operand[3].mode;
11759 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11760 op0 = copy_to_mode_reg (mode0, op0);
11761 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
11762 op1 = copy_to_mode_reg (mode1, op1);
11763 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
11765 /* @@@ better error message */
11766 error ("selector must be an immediate");
11767 return const0_rtx;
11769 if (target == 0
11770 || GET_MODE (target) != tmode
11771 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11772 target = gen_reg_rtx (tmode);
11773 pat = GEN_FCN (icode) (target, op0, op1, op2);
11774 if (! pat)
11775 return 0;
11776 emit_insn (pat);
11777 return target;
11779 case IX86_BUILTIN_MASKMOVQ:
11780 icode = CODE_FOR_mmx_maskmovq;
11781 /* Note the arg order is different from the operand order. */
11782 arg1 = TREE_VALUE (arglist);
11783 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
11784 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
11785 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11786 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11787 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
11788 mode0 = insn_data[icode].operand[0].mode;
11789 mode1 = insn_data[icode].operand[1].mode;
11790 mode2 = insn_data[icode].operand[2].mode;
11792 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11793 op0 = copy_to_mode_reg (mode0, op0);
11794 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
11795 op1 = copy_to_mode_reg (mode1, op1);
11796 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
11797 op2 = copy_to_mode_reg (mode2, op2);
11798 pat = GEN_FCN (icode) (op0, op1, op2);
11799 if (! pat)
11800 return 0;
11801 emit_insn (pat);
11802 return 0;
11804 case IX86_BUILTIN_SQRTSS:
11805 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
11806 case IX86_BUILTIN_RSQRTSS:
11807 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
11808 case IX86_BUILTIN_RCPSS:
11809 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
11811 case IX86_BUILTIN_LOADAPS:
11812 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
11814 case IX86_BUILTIN_LOADUPS:
11815 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
11817 case IX86_BUILTIN_STOREAPS:
11818 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist, -1);
11819 case IX86_BUILTIN_STOREUPS:
11820 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist, -1);
11822 case IX86_BUILTIN_LOADSS:
11823 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
11825 case IX86_BUILTIN_STORESS:
11826 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist, -1);
11828 case IX86_BUILTIN_LOADHPS:
11829 case IX86_BUILTIN_LOADLPS:
11830 icode = (fcode == IX86_BUILTIN_LOADHPS
11831 ? CODE_FOR_sse_movhps : CODE_FOR_sse_movlps);
11832 arg0 = TREE_VALUE (arglist);
11833 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11834 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11835 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11836 tmode = insn_data[icode].operand[0].mode;
11837 mode0 = insn_data[icode].operand[1].mode;
11838 mode1 = insn_data[icode].operand[2].mode;
11840 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11841 op0 = copy_to_mode_reg (mode0, op0);
11842 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
11843 if (target == 0
11844 || GET_MODE (target) != tmode
11845 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11846 target = gen_reg_rtx (tmode);
11847 pat = GEN_FCN (icode) (target, op0, op1);
11848 if (! pat)
11849 return 0;
11850 emit_insn (pat);
11851 return target;
11853 case IX86_BUILTIN_STOREHPS:
11854 case IX86_BUILTIN_STORELPS:
11855 icode = (fcode == IX86_BUILTIN_STOREHPS
11856 ? CODE_FOR_sse_movhps : CODE_FOR_sse_movlps);
11857 arg0 = TREE_VALUE (arglist);
11858 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11859 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11860 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11861 mode0 = insn_data[icode].operand[1].mode;
11862 mode1 = insn_data[icode].operand[2].mode;
11864 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
11865 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
11866 op1 = copy_to_mode_reg (mode1, op1);
11868 pat = GEN_FCN (icode) (op0, op0, op1);
11869 if (! pat)
11870 return 0;
11871 emit_insn (pat);
11872 return 0;
11874 case IX86_BUILTIN_MOVNTPS:
11875 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist, -1);
11876 case IX86_BUILTIN_MOVNTQ:
11877 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist, -1);
11879 case IX86_BUILTIN_LDMXCSR:
11880 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
11881 target = assign_386_stack_local (SImode, 0);
11882 emit_move_insn (target, op0);
11883 emit_insn (gen_ldmxcsr (target));
11884 return 0;
11886 case IX86_BUILTIN_STMXCSR:
11887 target = assign_386_stack_local (SImode, 0);
11888 emit_insn (gen_stmxcsr (target));
11889 return copy_to_mode_reg (SImode, target);
11891 case IX86_BUILTIN_PREFETCH:
11892 icode = CODE_FOR_prefetch;
11893 arg0 = TREE_VALUE (arglist);
11894 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11895 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11896 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11897 mode0 = insn_data[icode].operand[0].mode;
11898 mode1 = insn_data[icode].operand[1].mode;
11900 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
11902 /* @@@ better error message */
11903 error ("selector must be an immediate");
11904 return const0_rtx;
11907 op0 = copy_to_mode_reg (Pmode, op0);
11908 pat = GEN_FCN (icode) (op0, op1);
11909 if (! pat)
11910 return 0;
11911 emit_insn (pat);
11912 return target;
11914 case IX86_BUILTIN_SHUFPS:
11915 icode = CODE_FOR_sse_shufps;
11916 arg0 = TREE_VALUE (arglist);
11917 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11918 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
11919 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11920 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11921 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
11922 tmode = insn_data[icode].operand[0].mode;
11923 mode0 = insn_data[icode].operand[1].mode;
11924 mode1 = insn_data[icode].operand[2].mode;
11925 mode2 = insn_data[icode].operand[3].mode;
11927 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11928 op0 = copy_to_mode_reg (mode0, op0);
11929 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
11930 op1 = copy_to_mode_reg (mode1, op1);
11931 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
11933 /* @@@ better error message */
11934 error ("mask must be an immediate");
11935 return const0_rtx;
11937 if (target == 0
11938 || GET_MODE (target) != tmode
11939 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11940 target = gen_reg_rtx (tmode);
11941 pat = GEN_FCN (icode) (target, op0, op1, op2);
11942 if (! pat)
11943 return 0;
11944 emit_insn (pat);
11945 return target;
11947 case IX86_BUILTIN_PSHUFW:
11948 icode = CODE_FOR_mmx_pshufw;
11949 arg0 = TREE_VALUE (arglist);
11950 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11951 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11952 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11953 tmode = insn_data[icode].operand[0].mode;
11954 mode1 = insn_data[icode].operand[1].mode;
11955 mode2 = insn_data[icode].operand[2].mode;
11957 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
11958 op0 = copy_to_mode_reg (mode1, op0);
11959 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
11961 /* @@@ better error message */
11962 error ("mask must be an immediate");
11963 return const0_rtx;
11965 if (target == 0
11966 || GET_MODE (target) != tmode
11967 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11968 target = gen_reg_rtx (tmode);
11969 pat = GEN_FCN (icode) (target, op0, op1);
11970 if (! pat)
11971 return 0;
11972 emit_insn (pat);
11973 return target;
11975 case IX86_BUILTIN_FEMMS:
11976 emit_insn (gen_femms ());
11977 return NULL_RTX;
11979 case IX86_BUILTIN_PAVGUSB:
11980 return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target);
11982 case IX86_BUILTIN_PF2ID:
11983 return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0);
11985 case IX86_BUILTIN_PFACC:
11986 return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target);
11988 case IX86_BUILTIN_PFADD:
11989 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target);
11991 case IX86_BUILTIN_PFCMPEQ:
11992 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target);
11994 case IX86_BUILTIN_PFCMPGE:
11995 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target);
11997 case IX86_BUILTIN_PFCMPGT:
11998 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target);
12000 case IX86_BUILTIN_PFMAX:
12001 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target);
12003 case IX86_BUILTIN_PFMIN:
12004 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target);
12006 case IX86_BUILTIN_PFMUL:
12007 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target);
12009 case IX86_BUILTIN_PFRCP:
12010 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0);
12012 case IX86_BUILTIN_PFRCPIT1:
12013 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target);
12015 case IX86_BUILTIN_PFRCPIT2:
12016 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target);
12018 case IX86_BUILTIN_PFRSQIT1:
12019 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target);
12021 case IX86_BUILTIN_PFRSQRT:
12022 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0);
12024 case IX86_BUILTIN_PFSUB:
12025 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target);
12027 case IX86_BUILTIN_PFSUBR:
12028 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target);
12030 case IX86_BUILTIN_PI2FD:
12031 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0);
12033 case IX86_BUILTIN_PMULHRW:
12034 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
12036 case IX86_BUILTIN_PREFETCH_3DNOW:
12037 icode = CODE_FOR_prefetch_3dnow;
12038 arg0 = TREE_VALUE (arglist);
12039 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12040 mode0 = insn_data[icode].operand[0].mode;
12041 pat = GEN_FCN (icode) (copy_to_mode_reg (Pmode, op0));
12042 if (! pat)
12043 return NULL_RTX;
12044 emit_insn (pat);
12045 return NULL_RTX;
12047 case IX86_BUILTIN_PREFETCHW:
12048 icode = CODE_FOR_prefetchw;
12049 arg0 = TREE_VALUE (arglist);
12050 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12051 mode0 = insn_data[icode].operand[0].mode;
12052 pat = GEN_FCN (icode) (copy_to_mode_reg (Pmode, op0));
12053 if (! pat)
12054 return NULL_RTX;
12055 emit_insn (pat);
12056 return NULL_RTX;
12058 case IX86_BUILTIN_PF2IW:
12059 return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
12061 case IX86_BUILTIN_PFNACC:
12062 return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target);
12064 case IX86_BUILTIN_PFPNACC:
12065 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target);
12067 case IX86_BUILTIN_PI2FW:
12068 return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0);
12070 case IX86_BUILTIN_PSWAPDSI:
12071 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0);
12073 case IX86_BUILTIN_PSWAPDSF:
12074 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
12076 /* Composite intrinsics. */
12077 case IX86_BUILTIN_SETPS1:
12078 target = assign_386_stack_local (SFmode, 0);
12079 arg0 = TREE_VALUE (arglist);
12080 emit_move_insn (adjust_address (target, SFmode, 0),
12081 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
12082 op0 = gen_reg_rtx (V4SFmode);
12083 emit_insn (gen_sse_loadss (op0, adjust_address (target, V4SFmode, 0)));
12084 emit_insn (gen_sse_shufps (op0, op0, op0, GEN_INT (0)));
12085 return op0;
12087 case IX86_BUILTIN_SETPS:
12088 target = assign_386_stack_local (V4SFmode, 0);
12089 arg0 = TREE_VALUE (arglist);
12090 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12091 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
12092 arg3 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
12093 emit_move_insn (adjust_address (target, SFmode, 0),
12094 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
12095 emit_move_insn (adjust_address (target, SFmode, 4),
12096 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
12097 emit_move_insn (adjust_address (target, SFmode, 8),
12098 expand_expr (arg2, NULL_RTX, VOIDmode, 0));
12099 emit_move_insn (adjust_address (target, SFmode, 12),
12100 expand_expr (arg3, NULL_RTX, VOIDmode, 0));
12101 op0 = gen_reg_rtx (V4SFmode);
12102 emit_insn (gen_sse_movaps (op0, target));
12103 return op0;
12105 case IX86_BUILTIN_CLRPS:
12106 target = gen_reg_rtx (TImode);
12107 emit_insn (gen_sse_clrti (target));
12108 return target;
12110 case IX86_BUILTIN_LOADRPS:
12111 target = ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist,
12112 gen_reg_rtx (V4SFmode), 1);
12113 emit_insn (gen_sse_shufps (target, target, target, GEN_INT (0x1b)));
12114 return target;
12116 case IX86_BUILTIN_LOADPS1:
12117 target = ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist,
12118 gen_reg_rtx (V4SFmode), 1);
12119 emit_insn (gen_sse_shufps (target, target, target, const0_rtx));
12120 return target;
12122 case IX86_BUILTIN_STOREPS1:
12123 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist, 0);
12124 case IX86_BUILTIN_STORERPS:
12125 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist, 0x1B);
12127 case IX86_BUILTIN_MMX_ZERO:
12128 target = gen_reg_rtx (DImode);
12129 emit_insn (gen_mmx_clrdi (target));
12130 return target;
12132 default:
12133 break;
12136 for (i = 0, d = bdesc_2arg; i < sizeof (bdesc_2arg) / sizeof *d; i++, d++)
12137 if (d->code == fcode)
12139 /* Compares are treated specially. */
12140 if (d->icode == CODE_FOR_maskcmpv4sf3
12141 || d->icode == CODE_FOR_vmmaskcmpv4sf3
12142 || d->icode == CODE_FOR_maskncmpv4sf3
12143 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
12144 return ix86_expand_sse_compare (d, arglist, target);
12146 return ix86_expand_binop_builtin (d->icode, arglist, target);
12149 for (i = 0, d = bdesc_1arg; i < sizeof (bdesc_1arg) / sizeof *d; i++, d++)
12150 if (d->code == fcode)
12151 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
12153 for (i = 0, d = bdesc_comi; i < sizeof (bdesc_comi) / sizeof *d; i++, d++)
12154 if (d->code == fcode)
12155 return ix86_expand_sse_comi (d, arglist, target);
12157 /* @@@ Should really do something sensible here. */
12158 return 0;
12161 /* Store OPERAND to the memory after reload is completed. This means
12162 that we can't easily use assign_stack_local. */
12164 ix86_force_to_memory (mode, operand)
12165 enum machine_mode mode;
12166 rtx operand;
12168 rtx result;
12169 if (!reload_completed)
12170 abort ();
12171 if (TARGET_64BIT && TARGET_RED_ZONE)
12173 result = gen_rtx_MEM (mode,
12174 gen_rtx_PLUS (Pmode,
12175 stack_pointer_rtx,
12176 GEN_INT (-RED_ZONE_SIZE)));
12177 emit_move_insn (result, operand);
12179 else if (TARGET_64BIT && !TARGET_RED_ZONE)
12181 switch (mode)
12183 case HImode:
12184 case SImode:
12185 operand = gen_lowpart (DImode, operand);
12186 /* FALLTHRU */
12187 case DImode:
12188 emit_insn (
12189 gen_rtx_SET (VOIDmode,
12190 gen_rtx_MEM (DImode,
12191 gen_rtx_PRE_DEC (DImode,
12192 stack_pointer_rtx)),
12193 operand));
12194 break;
12195 default:
12196 abort ();
12198 result = gen_rtx_MEM (mode, stack_pointer_rtx);
12200 else
12202 switch (mode)
12204 case DImode:
12206 rtx operands[2];
12207 split_di (&operand, 1, operands, operands + 1);
12208 emit_insn (
12209 gen_rtx_SET (VOIDmode,
12210 gen_rtx_MEM (SImode,
12211 gen_rtx_PRE_DEC (Pmode,
12212 stack_pointer_rtx)),
12213 operands[1]));
12214 emit_insn (
12215 gen_rtx_SET (VOIDmode,
12216 gen_rtx_MEM (SImode,
12217 gen_rtx_PRE_DEC (Pmode,
12218 stack_pointer_rtx)),
12219 operands[0]));
12221 break;
12222 case HImode:
12223 /* It is better to store HImodes as SImodes. */
12224 if (!TARGET_PARTIAL_REG_STALL)
12225 operand = gen_lowpart (SImode, operand);
12226 /* FALLTHRU */
12227 case SImode:
12228 emit_insn (
12229 gen_rtx_SET (VOIDmode,
12230 gen_rtx_MEM (GET_MODE (operand),
12231 gen_rtx_PRE_DEC (SImode,
12232 stack_pointer_rtx)),
12233 operand));
12234 break;
12235 default:
12236 abort ();
12238 result = gen_rtx_MEM (mode, stack_pointer_rtx);
12240 return result;
12243 /* Free operand from the memory. */
12244 void
12245 ix86_free_from_memory (mode)
12246 enum machine_mode mode;
12248 if (!TARGET_64BIT || !TARGET_RED_ZONE)
12250 int size;
12252 if (mode == DImode || TARGET_64BIT)
12253 size = 8;
12254 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
12255 size = 2;
12256 else
12257 size = 4;
12258 /* Use LEA to deallocate stack space. In peephole2 it will be converted
12259 to pop or add instruction if registers are available. */
12260 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
12261 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12262 GEN_INT (size))));
12266 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
12267 QImode must go into class Q_REGS.
12268 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
12269 movdf to do mem-to-mem moves through integer regs. */
12270 enum reg_class
12271 ix86_preferred_reload_class (x, class)
12272 rtx x;
12273 enum reg_class class;
12275 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
12277 /* SSE can't load any constant directly yet. */
12278 if (SSE_CLASS_P (class))
12279 return NO_REGS;
12280 /* Floats can load 0 and 1. */
12281 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
12283 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
12284 if (MAYBE_SSE_CLASS_P (class))
12285 return (reg_class_subset_p (class, GENERAL_REGS)
12286 ? GENERAL_REGS : FLOAT_REGS);
12287 else
12288 return class;
12290 /* General regs can load everything. */
12291 if (reg_class_subset_p (class, GENERAL_REGS))
12292 return GENERAL_REGS;
12293 /* In case we haven't resolved FLOAT or SSE yet, give up. */
12294 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
12295 return NO_REGS;
12297 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
12298 return NO_REGS;
12299 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
12300 return Q_REGS;
12301 return class;
12304 /* If we are copying between general and FP registers, we need a memory
12305 location. The same is true for SSE and MMX registers.
12307 The macro can't work reliably when one of the CLASSES is class containing
12308 registers from multiple units (SSE, MMX, integer). We avoid this by never
12309 combining those units in single alternative in the machine description.
12310 Ensure that this constraint holds to avoid unexpected surprises.
12312 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
12313 enforce these sanity checks. */
12315 ix86_secondary_memory_needed (class1, class2, mode, strict)
12316 enum reg_class class1, class2;
12317 enum machine_mode mode;
12318 int strict;
12320 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
12321 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
12322 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
12323 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
12324 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
12325 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
12327 if (strict)
12328 abort ();
12329 else
12330 return 1;
12332 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
12333 || (SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
12334 && (mode) != SImode)
12335 || (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
12336 && (mode) != SImode));
12338 /* Return the cost of moving data from a register in class CLASS1 to
12339 one in class CLASS2.
12341 It is not required that the cost always equal 2 when FROM is the same as TO;
12342 on some machines it is expensive to move between registers if they are not
12343 general registers. */
12345 ix86_register_move_cost (mode, class1, class2)
12346 enum machine_mode mode;
12347 enum reg_class class1, class2;
12349 /* In case we require secondary memory, compute cost of the store followed
12350 by load. In case of copying from general_purpose_register we may emit
12351 multiple stores followed by single load causing memory size mismatch
12352 stall. Count this as arbitarily high cost of 20. */
12353 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
12355 int add_cost = 0;
12356 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
12357 add_cost = 20;
12358 return (MEMORY_MOVE_COST (mode, class1, 0)
12359 + MEMORY_MOVE_COST (mode, class2, 1) + add_cost);
12361 /* Moves between SSE/MMX and integer unit are expensive. */
12362 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
12363 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
12364 return ix86_cost->mmxsse_to_integer;
12365 if (MAYBE_FLOAT_CLASS_P (class1))
12366 return ix86_cost->fp_move;
12367 if (MAYBE_SSE_CLASS_P (class1))
12368 return ix86_cost->sse_move;
12369 if (MAYBE_MMX_CLASS_P (class1))
12370 return ix86_cost->mmx_move;
12371 return 2;
12374 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
12376 ix86_hard_regno_mode_ok (regno, mode)
12377 int regno;
12378 enum machine_mode mode;
12380 /* Flags and only flags can only hold CCmode values. */
12381 if (CC_REGNO_P (regno))
12382 return GET_MODE_CLASS (mode) == MODE_CC;
12383 if (GET_MODE_CLASS (mode) == MODE_CC
12384 || GET_MODE_CLASS (mode) == MODE_RANDOM
12385 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
12386 return 0;
12387 if (FP_REGNO_P (regno))
12388 return VALID_FP_MODE_P (mode);
12389 if (SSE_REGNO_P (regno))
12390 return VALID_SSE_REG_MODE (mode);
12391 if (MMX_REGNO_P (regno))
12392 return VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode);
12393 /* We handle both integer and floats in the general purpose registers.
12394 In future we should be able to handle vector modes as well. */
12395 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
12396 return 0;
12397 /* Take care for QImode values - they can be in non-QI regs, but then
12398 they do cause partial register stalls. */
12399 if (regno < 4 || mode != QImode || TARGET_64BIT)
12400 return 1;
12401 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
12404 /* Return the cost of moving data of mode M between a
12405 register and memory. A value of 2 is the default; this cost is
12406 relative to those in `REGISTER_MOVE_COST'.
12408 If moving between registers and memory is more expensive than
12409 between two registers, you should define this macro to express the
12410 relative cost.
12412 Model also increased moving costs of QImode registers in non
12413 Q_REGS classes.
12416 ix86_memory_move_cost (mode, class, in)
12417 enum machine_mode mode;
12418 enum reg_class class;
12419 int in;
12421 if (FLOAT_CLASS_P (class))
12423 int index;
12424 switch (mode)
12426 case SFmode:
12427 index = 0;
12428 break;
12429 case DFmode:
12430 index = 1;
12431 break;
12432 case XFmode:
12433 case TFmode:
12434 index = 2;
12435 break;
12436 default:
12437 return 100;
12439 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
12441 if (SSE_CLASS_P (class))
12443 int index;
12444 switch (GET_MODE_SIZE (mode))
12446 case 4:
12447 index = 0;
12448 break;
12449 case 8:
12450 index = 1;
12451 break;
12452 case 16:
12453 index = 2;
12454 break;
12455 default:
12456 return 100;
12458 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
12460 if (MMX_CLASS_P (class))
12462 int index;
12463 switch (GET_MODE_SIZE (mode))
12465 case 4:
12466 index = 0;
12467 break;
12468 case 8:
12469 index = 1;
12470 break;
12471 default:
12472 return 100;
12474 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
12476 switch (GET_MODE_SIZE (mode))
12478 case 1:
12479 if (in)
12480 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
12481 : ix86_cost->movzbl_load);
12482 else
12483 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
12484 : ix86_cost->int_store[0] + 4);
12485 break;
12486 case 2:
12487 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
12488 default:
12489 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
12490 if (mode == TFmode)
12491 mode = XFmode;
12492 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
12493 * (int) GET_MODE_SIZE (mode) / 4);
12497 #ifdef DO_GLOBAL_CTORS_BODY
12498 static void
12499 ix86_svr3_asm_out_constructor (symbol, priority)
12500 rtx symbol;
12501 int priority ATTRIBUTE_UNUSED;
12503 init_section ();
12504 fputs ("\tpushl $", asm_out_file);
12505 assemble_name (asm_out_file, XSTR (symbol, 0));
12506 fputc ('\n', asm_out_file);
12508 #endif