1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003 Free Software Foundation, Inc.
5 This file is part of GNU CC.
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
28 #include "hard-reg-set.h"
30 #include "insn-config.h"
31 #include "conditions.h"
33 #include "insn-attr.h"
41 #include "basic-block.h"
44 #include "target-def.h"
45 #include "langhooks.h"
47 #ifndef CHECK_STACK_LIMIT
48 #define CHECK_STACK_LIMIT (-1)
51 /* Processor costs (relative to an add) */
53 struct processor_costs size_cost
= { /* costs for tunning for size */
54 2, /* cost of an add instruction */
55 3, /* cost of a lea instruction */
56 2, /* variable shift costs */
57 3, /* constant shift costs */
58 3, /* cost of starting a multiply */
59 0, /* cost of multiply per each bit set */
60 3, /* cost of a divide/mod */
61 3, /* cost of movsx */
62 3, /* cost of movzx */
65 2, /* cost for loading QImode using movzbl */
66 {2, 2, 2}, /* cost of loading integer registers
67 in QImode, HImode and SImode.
68 Relative to reg-reg move (2). */
69 {2, 2, 2}, /* cost of storing integer registers */
70 2, /* cost of reg,reg fld/fst */
71 {2, 2, 2}, /* cost of loading fp registers
72 in SFmode, DFmode and XFmode */
73 {2, 2, 2}, /* cost of loading integer registers */
74 3, /* cost of moving MMX register */
75 {3, 3}, /* cost of loading MMX registers
76 in SImode and DImode */
77 {3, 3}, /* cost of storing MMX registers
78 in SImode and DImode */
79 3, /* cost of moving SSE register */
80 {3, 3, 3}, /* cost of loading SSE registers
81 in SImode, DImode and TImode */
82 {3, 3, 3}, /* cost of storing SSE registers
83 in SImode, DImode and TImode */
84 3, /* MMX or SSE register to integer */
85 0, /* size of prefetch block */
86 0, /* number of parallel prefetches */
87 2, /* cost of FADD and FSUB insns. */
88 2, /* cost of FMUL instruction. */
89 2, /* cost of FDIV instruction. */
90 2, /* cost of FABS instruction. */
91 2, /* cost of FCHS instruction. */
92 2, /* cost of FSQRT instruction. */
95 /* Processor costs (relative to an add) */
97 struct processor_costs i386_cost
= { /* 386 specific costs */
98 1, /* cost of an add instruction */
99 1, /* cost of a lea instruction */
100 3, /* variable shift costs */
101 2, /* constant shift costs */
102 6, /* cost of starting a multiply */
103 1, /* cost of multiply per each bit set */
104 23, /* cost of a divide/mod */
105 3, /* cost of movsx */
106 2, /* cost of movzx */
107 15, /* "large" insn */
109 4, /* cost for loading QImode using movzbl */
110 {2, 4, 2}, /* cost of loading integer registers
111 in QImode, HImode and SImode.
112 Relative to reg-reg move (2). */
113 {2, 4, 2}, /* cost of storing integer registers */
114 2, /* cost of reg,reg fld/fst */
115 {8, 8, 8}, /* cost of loading fp registers
116 in SFmode, DFmode and XFmode */
117 {8, 8, 8}, /* cost of loading integer registers */
118 2, /* cost of moving MMX register */
119 {4, 8}, /* cost of loading MMX registers
120 in SImode and DImode */
121 {4, 8}, /* cost of storing MMX registers
122 in SImode and DImode */
123 2, /* cost of moving SSE register */
124 {4, 8, 16}, /* cost of loading SSE registers
125 in SImode, DImode and TImode */
126 {4, 8, 16}, /* cost of storing SSE registers
127 in SImode, DImode and TImode */
128 3, /* MMX or SSE register to integer */
129 0, /* size of prefetch block */
130 0, /* number of parallel prefetches */
131 23, /* cost of FADD and FSUB insns. */
132 27, /* cost of FMUL instruction. */
133 88, /* cost of FDIV instruction. */
134 22, /* cost of FABS instruction. */
135 24, /* cost of FCHS instruction. */
136 122, /* cost of FSQRT instruction. */
140 struct processor_costs i486_cost
= { /* 486 specific costs */
141 1, /* cost of an add instruction */
142 1, /* cost of a lea instruction */
143 3, /* variable shift costs */
144 2, /* constant shift costs */
145 12, /* cost of starting a multiply */
146 1, /* cost of multiply per each bit set */
147 40, /* cost of a divide/mod */
148 3, /* cost of movsx */
149 2, /* cost of movzx */
150 15, /* "large" insn */
152 4, /* cost for loading QImode using movzbl */
153 {2, 4, 2}, /* cost of loading integer registers
154 in QImode, HImode and SImode.
155 Relative to reg-reg move (2). */
156 {2, 4, 2}, /* cost of storing integer registers */
157 2, /* cost of reg,reg fld/fst */
158 {8, 8, 8}, /* cost of loading fp registers
159 in SFmode, DFmode and XFmode */
160 {8, 8, 8}, /* cost of loading integer registers */
161 2, /* cost of moving MMX register */
162 {4, 8}, /* cost of loading MMX registers
163 in SImode and DImode */
164 {4, 8}, /* cost of storing MMX registers
165 in SImode and DImode */
166 2, /* cost of moving SSE register */
167 {4, 8, 16}, /* cost of loading SSE registers
168 in SImode, DImode and TImode */
169 {4, 8, 16}, /* cost of storing SSE registers
170 in SImode, DImode and TImode */
171 3, /* MMX or SSE register to integer */
172 0, /* size of prefetch block */
173 0, /* number of parallel prefetches */
174 8, /* cost of FADD and FSUB insns. */
175 16, /* cost of FMUL instruction. */
176 73, /* cost of FDIV instruction. */
177 3, /* cost of FABS instruction. */
178 3, /* cost of FCHS instruction. */
179 83, /* cost of FSQRT instruction. */
183 struct processor_costs pentium_cost
= {
184 1, /* cost of an add instruction */
185 1, /* cost of a lea instruction */
186 4, /* variable shift costs */
187 1, /* constant shift costs */
188 11, /* cost of starting a multiply */
189 0, /* cost of multiply per each bit set */
190 25, /* cost of a divide/mod */
191 3, /* cost of movsx */
192 2, /* cost of movzx */
193 8, /* "large" insn */
195 6, /* cost for loading QImode using movzbl */
196 {2, 4, 2}, /* cost of loading integer registers
197 in QImode, HImode and SImode.
198 Relative to reg-reg move (2). */
199 {2, 4, 2}, /* cost of storing integer registers */
200 2, /* cost of reg,reg fld/fst */
201 {2, 2, 6}, /* cost of loading fp registers
202 in SFmode, DFmode and XFmode */
203 {4, 4, 6}, /* cost of loading integer registers */
204 8, /* cost of moving MMX register */
205 {8, 8}, /* cost of loading MMX registers
206 in SImode and DImode */
207 {8, 8}, /* cost of storing MMX registers
208 in SImode and DImode */
209 2, /* cost of moving SSE register */
210 {4, 8, 16}, /* cost of loading SSE registers
211 in SImode, DImode and TImode */
212 {4, 8, 16}, /* cost of storing SSE registers
213 in SImode, DImode and TImode */
214 3, /* MMX or SSE register to integer */
215 0, /* size of prefetch block */
216 0, /* number of parallel prefetches */
217 3, /* cost of FADD and FSUB insns. */
218 3, /* cost of FMUL instruction. */
219 39, /* cost of FDIV instruction. */
220 1, /* cost of FABS instruction. */
221 1, /* cost of FCHS instruction. */
222 70, /* cost of FSQRT instruction. */
226 struct processor_costs pentiumpro_cost
= {
227 1, /* cost of an add instruction */
228 1, /* cost of a lea instruction */
229 1, /* variable shift costs */
230 1, /* constant shift costs */
231 4, /* cost of starting a multiply */
232 0, /* cost of multiply per each bit set */
233 17, /* cost of a divide/mod */
234 1, /* cost of movsx */
235 1, /* cost of movzx */
236 8, /* "large" insn */
238 2, /* cost for loading QImode using movzbl */
239 {4, 4, 4}, /* cost of loading integer registers
240 in QImode, HImode and SImode.
241 Relative to reg-reg move (2). */
242 {2, 2, 2}, /* cost of storing integer registers */
243 2, /* cost of reg,reg fld/fst */
244 {2, 2, 6}, /* cost of loading fp registers
245 in SFmode, DFmode and XFmode */
246 {4, 4, 6}, /* cost of loading integer registers */
247 2, /* cost of moving MMX register */
248 {2, 2}, /* cost of loading MMX registers
249 in SImode and DImode */
250 {2, 2}, /* cost of storing MMX registers
251 in SImode and DImode */
252 2, /* cost of moving SSE register */
253 {2, 2, 8}, /* cost of loading SSE registers
254 in SImode, DImode and TImode */
255 {2, 2, 8}, /* cost of storing SSE registers
256 in SImode, DImode and TImode */
257 3, /* MMX or SSE register to integer */
258 32, /* size of prefetch block */
259 6, /* number of parallel prefetches */
260 3, /* cost of FADD and FSUB insns. */
261 5, /* cost of FMUL instruction. */
262 56, /* cost of FDIV instruction. */
263 2, /* cost of FABS instruction. */
264 2, /* cost of FCHS instruction. */
265 56, /* cost of FSQRT instruction. */
269 struct processor_costs k6_cost
= {
270 1, /* cost of an add instruction */
271 2, /* cost of a lea instruction */
272 1, /* variable shift costs */
273 1, /* constant shift costs */
274 3, /* cost of starting a multiply */
275 0, /* cost of multiply per each bit set */
276 18, /* cost of a divide/mod */
277 2, /* cost of movsx */
278 2, /* cost of movzx */
279 8, /* "large" insn */
281 3, /* cost for loading QImode using movzbl */
282 {4, 5, 4}, /* cost of loading integer registers
283 in QImode, HImode and SImode.
284 Relative to reg-reg move (2). */
285 {2, 3, 2}, /* cost of storing integer registers */
286 4, /* cost of reg,reg fld/fst */
287 {6, 6, 6}, /* cost of loading fp registers
288 in SFmode, DFmode and XFmode */
289 {4, 4, 4}, /* cost of loading integer registers */
290 2, /* cost of moving MMX register */
291 {2, 2}, /* cost of loading MMX registers
292 in SImode and DImode */
293 {2, 2}, /* cost of storing MMX registers
294 in SImode and DImode */
295 2, /* cost of moving SSE register */
296 {2, 2, 8}, /* cost of loading SSE registers
297 in SImode, DImode and TImode */
298 {2, 2, 8}, /* cost of storing SSE registers
299 in SImode, DImode and TImode */
300 6, /* MMX or SSE register to integer */
301 32, /* size of prefetch block */
302 1, /* number of parallel prefetches */
303 2, /* cost of FADD and FSUB insns. */
304 2, /* cost of FMUL instruction. */
305 56, /* cost of FDIV instruction. */
306 2, /* cost of FABS instruction. */
307 2, /* cost of FCHS instruction. */
308 56, /* cost of FSQRT instruction. */
312 struct processor_costs athlon_cost
= {
313 1, /* cost of an add instruction */
314 2, /* cost of a lea instruction */
315 1, /* variable shift costs */
316 1, /* constant shift costs */
317 5, /* cost of starting a multiply */
318 0, /* cost of multiply per each bit set */
319 42, /* cost of a divide/mod */
320 1, /* cost of movsx */
321 1, /* cost of movzx */
322 8, /* "large" insn */
324 4, /* cost for loading QImode using movzbl */
325 {3, 4, 3}, /* cost of loading integer registers
326 in QImode, HImode and SImode.
327 Relative to reg-reg move (2). */
328 {3, 4, 3}, /* cost of storing integer registers */
329 4, /* cost of reg,reg fld/fst */
330 {4, 4, 12}, /* cost of loading fp registers
331 in SFmode, DFmode and XFmode */
332 {6, 6, 8}, /* cost of loading integer registers */
333 2, /* cost of moving MMX register */
334 {4, 4}, /* cost of loading MMX registers
335 in SImode and DImode */
336 {4, 4}, /* cost of storing MMX registers
337 in SImode and DImode */
338 2, /* cost of moving SSE register */
339 {4, 4, 6}, /* cost of loading SSE registers
340 in SImode, DImode and TImode */
341 {4, 4, 5}, /* cost of storing SSE registers
342 in SImode, DImode and TImode */
343 5, /* MMX or SSE register to integer */
344 64, /* size of prefetch block */
345 6, /* number of parallel prefetches */
346 4, /* cost of FADD and FSUB insns. */
347 4, /* cost of FMUL instruction. */
348 24, /* cost of FDIV instruction. */
349 2, /* cost of FABS instruction. */
350 2, /* cost of FCHS instruction. */
351 35, /* cost of FSQRT instruction. */
355 struct processor_costs pentium4_cost
= {
356 1, /* cost of an add instruction */
357 1, /* cost of a lea instruction */
358 8, /* variable shift costs */
359 8, /* constant shift costs */
360 30, /* cost of starting a multiply */
361 0, /* cost of multiply per each bit set */
362 112, /* cost of a divide/mod */
363 1, /* cost of movsx */
364 1, /* cost of movzx */
365 16, /* "large" insn */
367 2, /* cost for loading QImode using movzbl */
368 {4, 5, 4}, /* cost of loading integer registers
369 in QImode, HImode and SImode.
370 Relative to reg-reg move (2). */
371 {2, 3, 2}, /* cost of storing integer registers */
372 2, /* cost of reg,reg fld/fst */
373 {2, 2, 6}, /* cost of loading fp registers
374 in SFmode, DFmode and XFmode */
375 {4, 4, 6}, /* cost of loading integer registers */
376 2, /* cost of moving MMX register */
377 {2, 2}, /* cost of loading MMX registers
378 in SImode and DImode */
379 {2, 2}, /* cost of storing MMX registers
380 in SImode and DImode */
381 12, /* cost of moving SSE register */
382 {12, 12, 12}, /* cost of loading SSE registers
383 in SImode, DImode and TImode */
384 {2, 2, 8}, /* cost of storing SSE registers
385 in SImode, DImode and TImode */
386 10, /* MMX or SSE register to integer */
387 64, /* size of prefetch block */
388 6, /* number of parallel prefetches */
389 5, /* cost of FADD and FSUB insns. */
390 7, /* cost of FMUL instruction. */
391 43, /* cost of FDIV instruction. */
392 2, /* cost of FABS instruction. */
393 2, /* cost of FCHS instruction. */
394 43, /* cost of FSQRT instruction. */
397 const struct processor_costs
*ix86_cost
= &pentium_cost
;
399 /* Processor feature/optimization bitmasks. */
400 #define m_386 (1<<PROCESSOR_I386)
401 #define m_486 (1<<PROCESSOR_I486)
402 #define m_PENT (1<<PROCESSOR_PENTIUM)
403 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
404 #define m_K6 (1<<PROCESSOR_K6)
405 #define m_ATHLON (1<<PROCESSOR_ATHLON)
406 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
408 const int x86_use_leave
= m_386
| m_K6
| m_ATHLON
;
409 const int x86_push_memory
= m_386
| m_K6
| m_ATHLON
| m_PENT4
;
410 const int x86_zero_extend_with_and
= m_486
| m_PENT
;
411 const int x86_movx
= m_ATHLON
| m_PPRO
| m_PENT4
/* m_386 | m_K6 */;
412 const int x86_double_with_add
= ~m_386
;
413 const int x86_use_bit_test
= m_386
;
414 const int x86_unroll_strlen
= m_486
| m_PENT
| m_PPRO
| m_ATHLON
| m_K6
;
415 const int x86_cmove
= m_PPRO
| m_ATHLON
| m_PENT4
;
416 const int x86_3dnow_a
= m_ATHLON
;
417 const int x86_deep_branch
= m_PPRO
| m_K6
| m_ATHLON
| m_PENT4
;
418 const int x86_branch_hints
= m_PENT4
;
419 const int x86_use_sahf
= m_PPRO
| m_K6
| m_PENT4
;
420 const int x86_partial_reg_stall
= m_PPRO
;
421 const int x86_use_loop
= m_K6
;
422 const int x86_use_fiop
= ~(m_PPRO
| m_ATHLON
| m_PENT
);
423 const int x86_use_mov0
= m_K6
;
424 const int x86_use_cltd
= ~(m_PENT
| m_K6
);
425 const int x86_read_modify_write
= ~m_PENT
;
426 const int x86_read_modify
= ~(m_PENT
| m_PPRO
);
427 const int x86_split_long_moves
= m_PPRO
;
428 const int x86_promote_QImode
= m_K6
| m_PENT
| m_386
| m_486
| m_ATHLON
;
429 const int x86_fast_prefix
= ~(m_PENT
| m_486
| m_386
);
430 const int x86_single_stringop
= m_386
| m_PENT4
;
431 const int x86_qimode_math
= ~(0);
432 const int x86_promote_qi_regs
= 0;
433 const int x86_himode_math
= ~(m_PPRO
);
434 const int x86_promote_hi_regs
= m_PPRO
;
435 const int x86_sub_esp_4
= m_ATHLON
| m_PPRO
| m_PENT4
;
436 const int x86_sub_esp_8
= m_ATHLON
| m_PPRO
| m_386
| m_486
| m_PENT4
;
437 const int x86_add_esp_4
= m_ATHLON
| m_K6
| m_PENT4
;
438 const int x86_add_esp_8
= m_ATHLON
| m_PPRO
| m_K6
| m_386
| m_486
| m_PENT4
;
439 const int x86_integer_DFmode_moves
= ~(m_ATHLON
| m_PENT4
| m_PPRO
);
440 const int x86_partial_reg_dependency
= m_ATHLON
| m_PENT4
;
441 const int x86_memory_mismatch_stall
= m_ATHLON
| m_PENT4
;
442 const int x86_accumulate_outgoing_args
= m_ATHLON
| m_PENT4
| m_PPRO
;
443 const int x86_prologue_using_move
= m_ATHLON
| m_PENT4
| m_PPRO
;
444 const int x86_epilogue_using_move
= m_ATHLON
| m_PENT4
| m_PPRO
;
445 const int x86_decompose_lea
= m_PENT4
;
446 const int x86_shift1
= ~m_486
;
447 const int x86_arch_always_fancy_math_387
= m_PENT
| m_PPRO
| m_ATHLON
| m_PENT4
;
449 /* In case the avreage insn count for single function invocation is
450 lower than this constant, emit fast (but longer) prologue and
452 #define FAST_PROLOGUE_INSN_COUNT 30
454 /* Set by prologue expander and used by epilogue expander to determine
456 static int use_fast_prologue_epilogue
;
458 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
459 static const char *const qi_reg_name
[] = QI_REGISTER_NAMES
;
460 static const char *const qi_high_reg_name
[] = QI_HIGH_REGISTER_NAMES
;
461 static const char *const hi_reg_name
[] = HI_REGISTER_NAMES
;
463 /* Array of the smallest class containing reg number REGNO, indexed by
464 REGNO. Used by REGNO_REG_CLASS in i386.h. */
466 enum reg_class
const regclass_map
[FIRST_PSEUDO_REGISTER
] =
469 AREG
, DREG
, CREG
, BREG
,
471 SIREG
, DIREG
, NON_Q_REGS
, NON_Q_REGS
,
473 FP_TOP_REG
, FP_SECOND_REG
, FLOAT_REGS
, FLOAT_REGS
,
474 FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
,
477 /* flags, fpsr, dirflag, frame */
478 NO_REGS
, NO_REGS
, NO_REGS
, NON_Q_REGS
,
479 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
481 MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
,
483 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
484 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
485 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
489 /* The "default" register map used in 32bit mode. */
491 int const dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
493 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
494 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
495 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
496 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
497 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
498 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
499 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
502 static int const x86_64_int_parameter_registers
[6] =
504 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
505 FIRST_REX_INT_REG
/*R8 */, FIRST_REX_INT_REG
+ 1 /*R9 */
508 static int const x86_64_int_return_registers
[4] =
510 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
513 /* The "default" register map used in 64bit mode. */
514 int const dbx64_register_map
[FIRST_PSEUDO_REGISTER
] =
516 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
517 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
518 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
519 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
520 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
521 8,9,10,11,12,13,14,15, /* extended integer registers */
522 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
525 /* Define the register numbers to be used in Dwarf debugging information.
526 The SVR4 reference port C compiler uses the following register numbers
527 in its Dwarf output code:
528 0 for %eax (gcc regno = 0)
529 1 for %ecx (gcc regno = 2)
530 2 for %edx (gcc regno = 1)
531 3 for %ebx (gcc regno = 3)
532 4 for %esp (gcc regno = 7)
533 5 for %ebp (gcc regno = 6)
534 6 for %esi (gcc regno = 4)
535 7 for %edi (gcc regno = 5)
536 The following three DWARF register numbers are never generated by
537 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
538 believes these numbers have these meanings.
539 8 for %eip (no gcc equivalent)
540 9 for %eflags (gcc regno = 17)
541 10 for %trapno (no gcc equivalent)
542 It is not at all clear how we should number the FP stack registers
543 for the x86 architecture. If the version of SDB on x86/svr4 were
544 a bit less brain dead with respect to floating-point then we would
545 have a precedent to follow with respect to DWARF register numbers
546 for x86 FP registers, but the SDB on x86/svr4 is so completely
547 broken with respect to FP registers that it is hardly worth thinking
548 of it as something to strive for compatibility with.
549 The version of x86/svr4 SDB I have at the moment does (partially)
550 seem to believe that DWARF register number 11 is associated with
551 the x86 register %st(0), but that's about all. Higher DWARF
552 register numbers don't seem to be associated with anything in
553 particular, and even for DWARF regno 11, SDB only seems to under-
554 stand that it should say that a variable lives in %st(0) (when
555 asked via an `=' command) if we said it was in DWARF regno 11,
556 but SDB still prints garbage when asked for the value of the
557 variable in question (via a `/' command).
558 (Also note that the labels SDB prints for various FP stack regs
559 when doing an `x' command are all wrong.)
560 Note that these problems generally don't affect the native SVR4
561 C compiler because it doesn't allow the use of -O with -g and
562 because when it is *not* optimizing, it allocates a memory
563 location for each floating-point variable, and the memory
564 location is what gets described in the DWARF AT_location
565 attribute for the variable in question.
566 Regardless of the severe mental illness of the x86/svr4 SDB, we
567 do something sensible here and we use the following DWARF
568 register numbers. Note that these are all stack-top-relative
570 11 for %st(0) (gcc regno = 8)
571 12 for %st(1) (gcc regno = 9)
572 13 for %st(2) (gcc regno = 10)
573 14 for %st(3) (gcc regno = 11)
574 15 for %st(4) (gcc regno = 12)
575 16 for %st(5) (gcc regno = 13)
576 17 for %st(6) (gcc regno = 14)
577 18 for %st(7) (gcc regno = 15)
579 int const svr4_dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
581 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
582 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
583 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
584 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
585 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
586 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded integer registers */
587 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded SSE registers */
590 /* Test and compare insns in i386.md store the information needed to
591 generate branch and scc insns here. */
593 rtx ix86_compare_op0
= NULL_RTX
;
594 rtx ix86_compare_op1
= NULL_RTX
;
596 /* The encoding characters for the four TLS models present in ELF. */
598 static char const tls_model_chars
[] = " GLil";
600 #define MAX_386_STACK_LOCALS 3
601 /* Size of the register save area. */
602 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
604 /* Define the structure for the machine field in struct function. */
605 struct machine_function
GTY(())
607 rtx stack_locals
[(int) MAX_MACHINE_MODE
][MAX_386_STACK_LOCALS
];
608 const char *some_ld_name
;
609 int save_varrargs_registers
;
610 int accesses_prev_frame
;
613 #define ix86_stack_locals (cfun->machine->stack_locals)
614 #define ix86_save_varrargs_registers (cfun->machine->save_varrargs_registers)
616 /* Structure describing stack frame layout.
617 Stack grows downward:
623 saved frame pointer if frame_pointer_needed
624 <- HARD_FRAME_POINTER
630 > to_allocate <- FRAME_POINTER
642 int outgoing_arguments_size
;
645 HOST_WIDE_INT to_allocate
;
646 /* The offsets relative to ARG_POINTER. */
647 HOST_WIDE_INT frame_pointer_offset
;
648 HOST_WIDE_INT hard_frame_pointer_offset
;
649 HOST_WIDE_INT stack_pointer_offset
;
652 /* Used to enable/disable debugging features. */
653 const char *ix86_debug_arg_string
, *ix86_debug_addr_string
;
654 /* Code model option as passed by user. */
655 const char *ix86_cmodel_string
;
657 enum cmodel ix86_cmodel
;
659 const char *ix86_asm_string
;
660 enum asm_dialect ix86_asm_dialect
= ASM_ATT
;
662 const char *ix86_tls_dialect_string
;
663 enum tls_dialect ix86_tls_dialect
= TLS_DIALECT_GNU
;
665 /* Which unit we are generating floating point math for. */
666 enum fpmath_unit ix86_fpmath
;
668 /* Which cpu are we scheduling for. */
669 enum processor_type ix86_cpu
;
670 /* Which instruction set architecture to use. */
671 enum processor_type ix86_arch
;
673 /* Strings to hold which cpu and instruction set architecture to use. */
674 const char *ix86_cpu_string
; /* for -mcpu=<xxx> */
675 const char *ix86_arch_string
; /* for -march=<xxx> */
676 const char *ix86_fpmath_string
; /* for -mfpmath=<xxx> */
678 /* # of registers to use to pass arguments. */
679 const char *ix86_regparm_string
;
681 /* true if sse prefetch instruction is not NOOP. */
682 int x86_prefetch_sse
;
684 /* ix86_regparm_string as a number */
687 /* Alignment to use for loops and jumps: */
689 /* Power of two alignment for loops. */
690 const char *ix86_align_loops_string
;
692 /* Power of two alignment for non-loop jumps. */
693 const char *ix86_align_jumps_string
;
695 /* Power of two alignment for stack boundary in bytes. */
696 const char *ix86_preferred_stack_boundary_string
;
698 /* Preferred alignment for stack boundary in bits. */
699 int ix86_preferred_stack_boundary
;
701 /* Values 1-5: see jump.c */
702 int ix86_branch_cost
;
703 const char *ix86_branch_cost_string
;
705 /* Power of two alignment for functions. */
706 const char *ix86_align_funcs_string
;
708 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
709 static char internal_label_prefix
[16];
710 static int internal_label_prefix_len
;
712 static int local_symbolic_operand
PARAMS ((rtx
, enum machine_mode
));
713 static int tls_symbolic_operand_1
PARAMS ((rtx
, enum tls_model
));
714 static void output_pic_addr_const
PARAMS ((FILE *, rtx
, int));
715 static void put_condition_code
PARAMS ((enum rtx_code
, enum machine_mode
,
717 static const char *get_some_local_dynamic_name
PARAMS ((void));
718 static int get_some_local_dynamic_name_1
PARAMS ((rtx
*, void *));
719 static rtx maybe_get_pool_constant
PARAMS ((rtx
));
720 static rtx ix86_expand_int_compare
PARAMS ((enum rtx_code
, rtx
, rtx
));
721 static enum rtx_code ix86_prepare_fp_compare_args
PARAMS ((enum rtx_code
,
723 static rtx get_thread_pointer
PARAMS ((void));
724 static void get_pc_thunk_name
PARAMS ((char [32], unsigned int));
725 static rtx gen_push
PARAMS ((rtx
));
726 static int memory_address_length
PARAMS ((rtx addr
));
727 static int ix86_flags_dependant
PARAMS ((rtx
, rtx
, enum attr_type
));
728 static int ix86_agi_dependant
PARAMS ((rtx
, rtx
, enum attr_type
));
729 static enum attr_ppro_uops ix86_safe_ppro_uops
PARAMS ((rtx
));
730 static void ix86_dump_ppro_packet
PARAMS ((FILE *));
731 static void ix86_reorder_insn
PARAMS ((rtx
*, rtx
*));
732 static struct machine_function
* ix86_init_machine_status
PARAMS ((void));
733 static int ix86_split_to_parts
PARAMS ((rtx
, rtx
*, enum machine_mode
));
734 static int ix86_nsaved_regs
PARAMS ((void));
735 static void ix86_emit_save_regs
PARAMS ((void));
736 static void ix86_emit_save_regs_using_mov
PARAMS ((rtx
, HOST_WIDE_INT
));
737 static void ix86_emit_restore_regs_using_mov
PARAMS ((rtx
, int, int));
738 static void ix86_output_function_epilogue
PARAMS ((FILE *, HOST_WIDE_INT
));
739 static void ix86_set_move_mem_attrs_1
PARAMS ((rtx
, rtx
, rtx
, rtx
, rtx
));
740 static void ix86_sched_reorder_ppro
PARAMS ((rtx
*, rtx
*));
741 static HOST_WIDE_INT ix86_GOT_alias_set
PARAMS ((void));
742 static void ix86_adjust_counter
PARAMS ((rtx
, HOST_WIDE_INT
));
743 static rtx ix86_expand_aligntest
PARAMS ((rtx
, int));
744 static void ix86_expand_strlensi_unroll_1
PARAMS ((rtx
, rtx
));
745 static int ix86_issue_rate
PARAMS ((void));
746 static int ix86_adjust_cost
PARAMS ((rtx
, rtx
, rtx
, int));
747 static void ix86_sched_init
PARAMS ((FILE *, int, int));
748 static int ix86_sched_reorder
PARAMS ((FILE *, int, rtx
*, int *, int));
749 static int ix86_variable_issue
PARAMS ((FILE *, int, rtx
, int));
750 static int ia32_use_dfa_pipeline_interface
PARAMS ((void));
751 static int ia32_multipass_dfa_lookahead
PARAMS ((void));
752 static void ix86_init_mmx_sse_builtins
PARAMS ((void));
753 static rtx x86_this_parameter
PARAMS ((tree
));
754 static void x86_output_mi_thunk
PARAMS ((FILE *, tree
, HOST_WIDE_INT
,
755 HOST_WIDE_INT
, tree
));
756 static bool x86_can_output_mi_thunk
PARAMS ((tree
, HOST_WIDE_INT
,
757 HOST_WIDE_INT
, tree
));
761 rtx base
, index
, disp
;
765 static int ix86_decompose_address
PARAMS ((rtx
, struct ix86_address
*));
766 static bool ix86_cannot_force_const_mem
PARAMS ((rtx
));
768 static void ix86_encode_section_info
PARAMS ((tree
, int)) ATTRIBUTE_UNUSED
;
769 static const char *ix86_strip_name_encoding
PARAMS ((const char *))
772 struct builtin_description
;
773 static rtx ix86_expand_sse_comi
PARAMS ((const struct builtin_description
*,
775 static rtx ix86_expand_sse_compare
PARAMS ((const struct builtin_description
*,
777 static rtx ix86_expand_unop1_builtin
PARAMS ((enum insn_code
, tree
, rtx
));
778 static rtx ix86_expand_unop_builtin
PARAMS ((enum insn_code
, tree
, rtx
, int));
779 static rtx ix86_expand_binop_builtin
PARAMS ((enum insn_code
, tree
, rtx
));
780 static rtx ix86_expand_store_builtin
PARAMS ((enum insn_code
, tree
));
781 static rtx safe_vector_operand
PARAMS ((rtx
, enum machine_mode
));
782 static enum rtx_code ix86_fp_compare_code_to_integer
PARAMS ((enum rtx_code
));
783 static void ix86_fp_comparison_codes
PARAMS ((enum rtx_code code
,
787 static rtx ix86_expand_fp_compare
PARAMS ((enum rtx_code
, rtx
, rtx
, rtx
,
789 static int ix86_fp_comparison_arithmetics_cost
PARAMS ((enum rtx_code code
));
790 static int ix86_fp_comparison_fcomi_cost
PARAMS ((enum rtx_code code
));
791 static int ix86_fp_comparison_sahf_cost
PARAMS ((enum rtx_code code
));
792 static int ix86_fp_comparison_cost
PARAMS ((enum rtx_code code
));
793 static unsigned int ix86_select_alt_pic_regnum
PARAMS ((void));
794 static int ix86_save_reg
PARAMS ((unsigned int, int));
795 static void ix86_compute_frame_layout
PARAMS ((struct ix86_frame
*));
796 static int ix86_comp_type_attributes
PARAMS ((tree
, tree
));
797 static int ix86_fntype_regparm
PARAMS ((tree
));
798 const struct attribute_spec ix86_attribute_table
[];
799 static tree ix86_handle_cdecl_attribute
PARAMS ((tree
*, tree
, tree
, int, bool *));
800 static tree ix86_handle_regparm_attribute
PARAMS ((tree
*, tree
, tree
, int, bool *));
801 static bool ix86_ms_bitfield_layout_p
PARAMS ((tree
));
802 static int ix86_value_regno
PARAMS ((enum machine_mode
));
803 static bool contains_128bit_aligned_vector_p
PARAMS ((tree
));
805 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
806 static void ix86_svr3_asm_out_constructor
PARAMS ((rtx
, int));
809 /* Register class used for passing given 64bit part of the argument.
810 These represent classes as documented by the PS ABI, with the exception
811 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
812 use SF or DFmode move instead of DImode to avoid reformating penalties.
814 Similary we play games with INTEGERSI_CLASS to use cheaper SImode moves
815 whenever possible (upper half does contain padding).
817 enum x86_64_reg_class
820 X86_64_INTEGER_CLASS
,
821 X86_64_INTEGERSI_CLASS
,
830 static const char * const x86_64_reg_class_name
[] =
831 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
833 #define MAX_CLASSES 4
834 static int classify_argument
PARAMS ((enum machine_mode
, tree
,
835 enum x86_64_reg_class
[MAX_CLASSES
],
837 static int examine_argument
PARAMS ((enum machine_mode
, tree
, int, int *,
839 static rtx construct_container
PARAMS ((enum machine_mode
, tree
, int, int, int,
841 static enum x86_64_reg_class merge_classes
PARAMS ((enum x86_64_reg_class
,
842 enum x86_64_reg_class
));
844 /* Initialize the GCC target structure. */
845 #undef TARGET_ATTRIBUTE_TABLE
846 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
847 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
848 # undef TARGET_MERGE_DECL_ATTRIBUTES
849 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
852 #undef TARGET_COMP_TYPE_ATTRIBUTES
853 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
855 #undef TARGET_INIT_BUILTINS
856 #define TARGET_INIT_BUILTINS ix86_init_builtins
858 #undef TARGET_EXPAND_BUILTIN
859 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
861 #undef TARGET_ASM_FUNCTION_EPILOGUE
862 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
864 #undef TARGET_ASM_OPEN_PAREN
865 #define TARGET_ASM_OPEN_PAREN ""
866 #undef TARGET_ASM_CLOSE_PAREN
867 #define TARGET_ASM_CLOSE_PAREN ""
869 #undef TARGET_ASM_ALIGNED_HI_OP
870 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
871 #undef TARGET_ASM_ALIGNED_SI_OP
872 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
874 #undef TARGET_ASM_ALIGNED_DI_OP
875 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
878 #undef TARGET_ASM_UNALIGNED_HI_OP
879 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
880 #undef TARGET_ASM_UNALIGNED_SI_OP
881 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
882 #undef TARGET_ASM_UNALIGNED_DI_OP
883 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
885 #undef TARGET_SCHED_ADJUST_COST
886 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
887 #undef TARGET_SCHED_ISSUE_RATE
888 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
889 #undef TARGET_SCHED_VARIABLE_ISSUE
890 #define TARGET_SCHED_VARIABLE_ISSUE ix86_variable_issue
891 #undef TARGET_SCHED_INIT
892 #define TARGET_SCHED_INIT ix86_sched_init
893 #undef TARGET_SCHED_REORDER
894 #define TARGET_SCHED_REORDER ix86_sched_reorder
895 #undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
896 #define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \
897 ia32_use_dfa_pipeline_interface
898 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
899 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
900 ia32_multipass_dfa_lookahead
903 #undef TARGET_HAVE_TLS
904 #define TARGET_HAVE_TLS true
906 #undef TARGET_CANNOT_FORCE_CONST_MEM
907 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
909 #undef TARGET_MS_BITFIELD_LAYOUT_P
910 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
912 #undef TARGET_ASM_OUTPUT_MI_THUNK
913 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
914 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
915 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
917 struct gcc_target targetm
= TARGET_INITIALIZER
;
919 /* The svr4 ABI for the i386 says that records and unions are returned
921 #ifndef DEFAULT_PCC_STRUCT_RETURN
922 #define DEFAULT_PCC_STRUCT_RETURN 1
925 /* Sometimes certain combinations of command options do not make
926 sense on a particular target machine. You can define a macro
927 `OVERRIDE_OPTIONS' to take account of this. This macro, if
928 defined, is executed once just after all the command options have
931 Don't use this macro to turn on various extra optimizations for
932 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
938 /* Comes from final.c -- no real reason to change it. */
939 #define MAX_CODE_ALIGN 16
943 const struct processor_costs
*cost
; /* Processor costs */
944 const int target_enable
; /* Target flags to enable. */
945 const int target_disable
; /* Target flags to disable. */
946 const int align_loop
; /* Default alignments. */
947 const int align_loop_max_skip
;
948 const int align_jump
;
949 const int align_jump_max_skip
;
950 const int align_func
;
951 const int branch_cost
;
953 const processor_target_table
[PROCESSOR_max
] =
955 {&i386_cost
, 0, 0, 4, 3, 4, 3, 4, 1},
956 {&i486_cost
, 0, 0, 16, 15, 16, 15, 16, 1},
957 {&pentium_cost
, 0, 0, 16, 7, 16, 7, 16, 1},
958 {&pentiumpro_cost
, 0, 0, 16, 15, 16, 7, 16, 1},
959 {&k6_cost
, 0, 0, 32, 7, 32, 7, 32, 1},
960 {&athlon_cost
, 0, 0, 16, 7, 64, 7, 16, 1},
961 {&pentium4_cost
, 0, 0, 0, 0, 0, 0, 0, 1}
964 static const char * const cpu_names
[] = TARGET_CPU_DEFAULT_NAMES
;
967 const char *const name
; /* processor name or nickname. */
968 const enum processor_type processor
;
974 PTA_PREFETCH_SSE
= 8,
979 const processor_alias_table
[] =
981 {"i386", PROCESSOR_I386
, 0},
982 {"i486", PROCESSOR_I486
, 0},
983 {"i586", PROCESSOR_PENTIUM
, 0},
984 {"pentium", PROCESSOR_PENTIUM
, 0},
985 {"pentium-mmx", PROCESSOR_PENTIUM
, PTA_MMX
},
986 {"winchip-c6", PROCESSOR_I486
, PTA_MMX
},
987 {"winchip2", PROCESSOR_I486
, PTA_MMX
| PTA_3DNOW
},
988 {"c3", PROCESSOR_I486
, PTA_MMX
| PTA_3DNOW
},
989 {"i686", PROCESSOR_PENTIUMPRO
, 0},
990 {"pentiumpro", PROCESSOR_PENTIUMPRO
, 0},
991 {"pentium2", PROCESSOR_PENTIUMPRO
, PTA_MMX
},
992 {"pentium3", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_SSE
| PTA_PREFETCH_SSE
},
993 {"pentium4", PROCESSOR_PENTIUM4
, PTA_SSE
| PTA_SSE2
|
994 PTA_MMX
| PTA_PREFETCH_SSE
},
995 {"k6", PROCESSOR_K6
, PTA_MMX
},
996 {"k6-2", PROCESSOR_K6
, PTA_MMX
| PTA_3DNOW
},
997 {"k6-3", PROCESSOR_K6
, PTA_MMX
| PTA_3DNOW
},
998 {"athlon", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1000 {"athlon-tbird", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
1001 | PTA_3DNOW
| PTA_3DNOW_A
},
1002 {"athlon-4", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1003 | PTA_3DNOW_A
| PTA_SSE
},
1004 {"athlon-xp", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1005 | PTA_3DNOW_A
| PTA_SSE
},
1006 {"athlon-mp", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1007 | PTA_3DNOW_A
| PTA_SSE
},
1010 int const pta_size
= ARRAY_SIZE (processor_alias_table
);
1012 /* By default our XFmode is the 80-bit extended format. If we have
1013 use TFmode instead, it's also the 80-bit format, but with padding. */
1014 real_format_for_mode
[XFmode
- QFmode
] = &ieee_extended_intel_96_format
;
1015 real_format_for_mode
[TFmode
- QFmode
] = &ieee_extended_intel_128_format
;
1017 /* Set the default values for switches whose default depends on TARGET_64BIT
1018 in case they weren't overwriten by command line options. */
1021 if (flag_omit_frame_pointer
== 2)
1022 flag_omit_frame_pointer
= 1;
1023 if (flag_asynchronous_unwind_tables
== 2)
1024 flag_asynchronous_unwind_tables
= 1;
1025 if (flag_pcc_struct_return
== 2)
1026 flag_pcc_struct_return
= 0;
1030 if (flag_omit_frame_pointer
== 2)
1031 flag_omit_frame_pointer
= 0;
1032 if (flag_asynchronous_unwind_tables
== 2)
1033 flag_asynchronous_unwind_tables
= 0;
1034 if (flag_pcc_struct_return
== 2)
1035 flag_pcc_struct_return
= DEFAULT_PCC_STRUCT_RETURN
;
1038 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1039 SUBTARGET_OVERRIDE_OPTIONS
;
1042 if (!ix86_cpu_string
&& ix86_arch_string
)
1043 ix86_cpu_string
= ix86_arch_string
;
1044 if (!ix86_cpu_string
)
1045 ix86_cpu_string
= cpu_names
[TARGET_CPU_DEFAULT
];
1046 if (!ix86_arch_string
)
1047 ix86_arch_string
= TARGET_64BIT
? "athlon-4" : "i386";
1049 if (ix86_cmodel_string
!= 0)
1051 if (!strcmp (ix86_cmodel_string
, "small"))
1052 ix86_cmodel
= flag_pic
? CM_SMALL_PIC
: CM_SMALL
;
1054 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string
);
1055 else if (!strcmp (ix86_cmodel_string
, "32"))
1056 ix86_cmodel
= CM_32
;
1057 else if (!strcmp (ix86_cmodel_string
, "kernel") && !flag_pic
)
1058 ix86_cmodel
= CM_KERNEL
;
1059 else if (!strcmp (ix86_cmodel_string
, "medium") && !flag_pic
)
1060 ix86_cmodel
= CM_MEDIUM
;
1061 else if (!strcmp (ix86_cmodel_string
, "large") && !flag_pic
)
1062 ix86_cmodel
= CM_LARGE
;
1064 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string
);
1068 ix86_cmodel
= CM_32
;
1070 ix86_cmodel
= flag_pic
? CM_SMALL_PIC
: CM_SMALL
;
1072 if (ix86_asm_string
!= 0)
1074 if (!strcmp (ix86_asm_string
, "intel"))
1075 ix86_asm_dialect
= ASM_INTEL
;
1076 else if (!strcmp (ix86_asm_string
, "att"))
1077 ix86_asm_dialect
= ASM_ATT
;
1079 error ("bad value (%s) for -masm= switch", ix86_asm_string
);
1081 if ((TARGET_64BIT
== 0) != (ix86_cmodel
== CM_32
))
1082 error ("code model `%s' not supported in the %s bit mode",
1083 ix86_cmodel_string
, TARGET_64BIT
? "64" : "32");
1084 if (ix86_cmodel
== CM_LARGE
)
1085 sorry ("code model `large' not supported yet");
1086 if ((TARGET_64BIT
!= 0) != ((target_flags
& MASK_64BIT
) != 0))
1087 sorry ("%i-bit mode not compiled in",
1088 (target_flags
& MASK_64BIT
) ? 64 : 32);
1090 for (i
= 0; i
< pta_size
; i
++)
1091 if (! strcmp (ix86_arch_string
, processor_alias_table
[i
].name
))
1093 ix86_arch
= processor_alias_table
[i
].processor
;
1094 /* Default cpu tuning to the architecture. */
1095 ix86_cpu
= ix86_arch
;
1096 if (processor_alias_table
[i
].flags
& PTA_MMX
1097 && !(target_flags_explicit
& MASK_MMX
))
1098 target_flags
|= MASK_MMX
;
1099 if (processor_alias_table
[i
].flags
& PTA_3DNOW
1100 && !(target_flags_explicit
& MASK_3DNOW
))
1101 target_flags
|= MASK_3DNOW
;
1102 if (processor_alias_table
[i
].flags
& PTA_3DNOW_A
1103 && !(target_flags_explicit
& MASK_3DNOW_A
))
1104 target_flags
|= MASK_3DNOW_A
;
1105 if (processor_alias_table
[i
].flags
& PTA_SSE
1106 && !(target_flags_explicit
& MASK_SSE
))
1107 target_flags
|= MASK_SSE
;
1108 if (processor_alias_table
[i
].flags
& PTA_SSE2
1109 && !(target_flags_explicit
& MASK_SSE2
))
1110 target_flags
|= MASK_SSE2
;
1111 if (processor_alias_table
[i
].flags
& PTA_PREFETCH_SSE
)
1112 x86_prefetch_sse
= true;
1117 error ("bad value (%s) for -march= switch", ix86_arch_string
);
1119 for (i
= 0; i
< pta_size
; i
++)
1120 if (! strcmp (ix86_cpu_string
, processor_alias_table
[i
].name
))
1122 ix86_cpu
= processor_alias_table
[i
].processor
;
1125 if (processor_alias_table
[i
].flags
& PTA_PREFETCH_SSE
)
1126 x86_prefetch_sse
= true;
1128 error ("bad value (%s) for -mcpu= switch", ix86_cpu_string
);
1131 ix86_cost
= &size_cost
;
1133 ix86_cost
= processor_target_table
[ix86_cpu
].cost
;
1134 target_flags
|= processor_target_table
[ix86_cpu
].target_enable
;
1135 target_flags
&= ~processor_target_table
[ix86_cpu
].target_disable
;
1137 /* Arrange to set up i386_stack_locals for all functions. */
1138 init_machine_status
= ix86_init_machine_status
;
1140 /* Validate -mregparm= value. */
1141 if (ix86_regparm_string
)
1143 i
= atoi (ix86_regparm_string
);
1144 if (i
< 0 || i
> REGPARM_MAX
)
1145 error ("-mregparm=%d is not between 0 and %d", i
, REGPARM_MAX
);
1151 ix86_regparm
= REGPARM_MAX
;
1153 /* If the user has provided any of the -malign-* options,
1154 warn and use that value only if -falign-* is not set.
1155 Remove this code in GCC 3.2 or later. */
1156 if (ix86_align_loops_string
)
1158 warning ("-malign-loops is obsolete, use -falign-loops");
1159 if (align_loops
== 0)
1161 i
= atoi (ix86_align_loops_string
);
1162 if (i
< 0 || i
> MAX_CODE_ALIGN
)
1163 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
1165 align_loops
= 1 << i
;
1169 if (ix86_align_jumps_string
)
1171 warning ("-malign-jumps is obsolete, use -falign-jumps");
1172 if (align_jumps
== 0)
1174 i
= atoi (ix86_align_jumps_string
);
1175 if (i
< 0 || i
> MAX_CODE_ALIGN
)
1176 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
1178 align_jumps
= 1 << i
;
1182 if (ix86_align_funcs_string
)
1184 warning ("-malign-functions is obsolete, use -falign-functions");
1185 if (align_functions
== 0)
1187 i
= atoi (ix86_align_funcs_string
);
1188 if (i
< 0 || i
> MAX_CODE_ALIGN
)
1189 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
1191 align_functions
= 1 << i
;
1195 /* Default align_* from the processor table. */
1196 if (align_loops
== 0)
1198 align_loops
= processor_target_table
[ix86_cpu
].align_loop
;
1199 align_loops_max_skip
= processor_target_table
[ix86_cpu
].align_loop_max_skip
;
1201 if (align_jumps
== 0)
1203 align_jumps
= processor_target_table
[ix86_cpu
].align_jump
;
1204 align_jumps_max_skip
= processor_target_table
[ix86_cpu
].align_jump_max_skip
;
1206 if (align_functions
== 0)
1208 align_functions
= processor_target_table
[ix86_cpu
].align_func
;
1211 /* Validate -mpreferred-stack-boundary= value, or provide default.
1212 The default of 128 bits is for Pentium III's SSE __m128, but we
1213 don't want additional code to keep the stack aligned when
1214 optimizing for code size. */
1215 ix86_preferred_stack_boundary
= (optimize_size
1216 ? TARGET_64BIT
? 128 : 32
1218 if (ix86_preferred_stack_boundary_string
)
1220 i
= atoi (ix86_preferred_stack_boundary_string
);
1221 if (i
< (TARGET_64BIT
? 4 : 2) || i
> 12)
1222 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i
,
1223 TARGET_64BIT
? 4 : 2);
1225 ix86_preferred_stack_boundary
= (1 << i
) * BITS_PER_UNIT
;
1228 /* Validate -mbranch-cost= value, or provide default. */
1229 ix86_branch_cost
= processor_target_table
[ix86_cpu
].branch_cost
;
1230 if (ix86_branch_cost_string
)
1232 i
= atoi (ix86_branch_cost_string
);
1234 error ("-mbranch-cost=%d is not between 0 and 5", i
);
1236 ix86_branch_cost
= i
;
1239 if (ix86_tls_dialect_string
)
1241 if (strcmp (ix86_tls_dialect_string
, "gnu") == 0)
1242 ix86_tls_dialect
= TLS_DIALECT_GNU
;
1243 else if (strcmp (ix86_tls_dialect_string
, "sun") == 0)
1244 ix86_tls_dialect
= TLS_DIALECT_SUN
;
1246 error ("bad value (%s) for -mtls-dialect= switch",
1247 ix86_tls_dialect_string
);
1250 /* Keep nonleaf frame pointers. */
1251 if (TARGET_OMIT_LEAF_FRAME_POINTER
)
1252 flag_omit_frame_pointer
= 1;
1254 /* If we're doing fast math, we don't care about comparison order
1255 wrt NaNs. This lets us use a shorter comparison sequence. */
1256 if (flag_unsafe_math_optimizations
)
1257 target_flags
&= ~MASK_IEEE_FP
;
1259 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1260 since the insns won't need emulation. */
1261 if (x86_arch_always_fancy_math_387
& (1 << ix86_arch
))
1262 target_flags
&= ~MASK_NO_FANCY_MATH_387
;
1266 if (TARGET_ALIGN_DOUBLE
)
1267 error ("-malign-double makes no sense in the 64bit mode");
1269 error ("-mrtd calling convention not supported in the 64bit mode");
1270 /* Enable by default the SSE and MMX builtins. */
1271 target_flags
|= (MASK_SSE2
| MASK_SSE
| MASK_MMX
| MASK_128BIT_LONG_DOUBLE
);
1272 ix86_fpmath
= FPMATH_SSE
;
1275 ix86_fpmath
= FPMATH_387
;
1277 if (ix86_fpmath_string
!= 0)
1279 if (! strcmp (ix86_fpmath_string
, "387"))
1280 ix86_fpmath
= FPMATH_387
;
1281 else if (! strcmp (ix86_fpmath_string
, "sse"))
1285 warning ("SSE instruction set disabled, using 387 arithmetics");
1286 ix86_fpmath
= FPMATH_387
;
1289 ix86_fpmath
= FPMATH_SSE
;
1291 else if (! strcmp (ix86_fpmath_string
, "387,sse")
1292 || ! strcmp (ix86_fpmath_string
, "sse,387"))
1296 warning ("SSE instruction set disabled, using 387 arithmetics");
1297 ix86_fpmath
= FPMATH_387
;
1299 else if (!TARGET_80387
)
1301 warning ("387 instruction set disabled, using SSE arithmetics");
1302 ix86_fpmath
= FPMATH_SSE
;
1305 ix86_fpmath
= FPMATH_SSE
| FPMATH_387
;
1308 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string
);
1311 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1315 target_flags
|= MASK_MMX
;
1316 x86_prefetch_sse
= true;
1319 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1322 target_flags
|= MASK_MMX
;
1323 /* If we are targetting the Athlon architecture, enable the 3Dnow/MMX
1324 extensions it adds. */
1325 if (x86_3dnow_a
& (1 << ix86_arch
))
1326 target_flags
|= MASK_3DNOW_A
;
1328 if ((x86_accumulate_outgoing_args
& CPUMASK
)
1329 && !(target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
1331 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
1333 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1336 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix
, "LX", 0);
1337 p
= strchr (internal_label_prefix
, 'X');
1338 internal_label_prefix_len
= p
- internal_label_prefix
;
1344 optimization_options (level
, size
)
1346 int size ATTRIBUTE_UNUSED
;
1348 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1349 make the problem with not enough registers even worse. */
1350 #ifdef INSN_SCHEDULING
1352 flag_schedule_insns
= 0;
1355 /* The default values of these switches depend on the TARGET_64BIT
1356 that is not known at this moment. Mark these values with 2 and
1357 let user the to override these. In case there is no command line option
1358 specifying them, we will set the defaults in override_options. */
1360 flag_omit_frame_pointer
= 2;
1361 flag_pcc_struct_return
= 2;
1362 flag_asynchronous_unwind_tables
= 2;
1365 /* Table of valid machine attributes. */
1366 const struct attribute_spec ix86_attribute_table
[] =
1368 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1369 /* Stdcall attribute says callee is responsible for popping arguments
1370 if they are not variable. */
1371 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute
},
1372 /* Fastcall attribute says callee is responsible for popping arguments
1373 if they are not variable. */
1374 { "fastcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute
},
1375 /* Cdecl attribute says the callee is a normal C declaration */
1376 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute
},
1377 /* Regparm attribute specifies how many integer arguments are to be
1378 passed in registers. */
1379 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute
},
1380 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
1381 { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute
},
1382 { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute
},
1383 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute
},
1385 { NULL
, 0, 0, false, false, false, NULL
}
1388 /* Handle a "cdecl" or "stdcall" attribute;
1389 arguments as in struct attribute_spec.handler. */
1391 ix86_handle_cdecl_attribute (node
, name
, args
, flags
, no_add_attrs
)
1394 tree args ATTRIBUTE_UNUSED
;
1395 int flags ATTRIBUTE_UNUSED
;
1398 if (TREE_CODE (*node
) != FUNCTION_TYPE
1399 && TREE_CODE (*node
) != METHOD_TYPE
1400 && TREE_CODE (*node
) != FIELD_DECL
1401 && TREE_CODE (*node
) != TYPE_DECL
)
1403 warning ("`%s' attribute only applies to functions",
1404 IDENTIFIER_POINTER (name
));
1405 *no_add_attrs
= true;
1410 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name
));
1411 *no_add_attrs
= true;
1417 /* Handle a "regparm" attribute;
1418 arguments as in struct attribute_spec.handler. */
1420 ix86_handle_regparm_attribute (node
, name
, args
, flags
, no_add_attrs
)
1424 int flags ATTRIBUTE_UNUSED
;
1427 if (TREE_CODE (*node
) != FUNCTION_TYPE
1428 && TREE_CODE (*node
) != METHOD_TYPE
1429 && TREE_CODE (*node
) != FIELD_DECL
1430 && TREE_CODE (*node
) != TYPE_DECL
)
1432 warning ("`%s' attribute only applies to functions",
1433 IDENTIFIER_POINTER (name
));
1434 *no_add_attrs
= true;
1440 cst
= TREE_VALUE (args
);
1441 if (TREE_CODE (cst
) != INTEGER_CST
)
1443 warning ("`%s' attribute requires an integer constant argument",
1444 IDENTIFIER_POINTER (name
));
1445 *no_add_attrs
= true;
1447 else if (compare_tree_int (cst
, REGPARM_MAX
) > 0)
1449 warning ("argument to `%s' attribute larger than %d",
1450 IDENTIFIER_POINTER (name
), REGPARM_MAX
);
1451 *no_add_attrs
= true;
1458 /* Return 0 if the attributes for two types are incompatible, 1 if they
1459 are compatible, and 2 if they are nearly compatible (which causes a
1460 warning to be generated). */
1463 ix86_comp_type_attributes (type1
, type2
)
1467 /* Check for mismatch of non-default calling convention. */
1468 const char *const rtdstr
= TARGET_RTD
? "cdecl" : "stdcall";
1470 if (TREE_CODE (type1
) != FUNCTION_TYPE
)
1473 /* Check for mismatched fastcall types */
1474 if (!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1
))
1475 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2
)))
1478 /* Check for mismatched return types (cdecl vs stdcall). */
1479 if (!lookup_attribute (rtdstr
, TYPE_ATTRIBUTES (type1
))
1480 != !lookup_attribute (rtdstr
, TYPE_ATTRIBUTES (type2
)))
1485 /* Return the regparm value for a fuctio with the indicated TYPE. */
1488 ix86_fntype_regparm (type
)
1493 attr
= lookup_attribute ("regparm", TYPE_ATTRIBUTES (type
));
1495 return TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
)));
1497 return ix86_regparm
;
1500 /* Return true if EAX is live at the start of the function. Used by
1501 ix86_expand_prologue to determine if we need special help before
1502 calling allocate_stack_worker. */
1505 ix86_eax_live_at_start_p (void)
1507 /* Cheat. Don't bother working forward from ix86_function_regparm
1508 to the function type to whether an actual argument is located in
1509 eax. Instead just look at cfg info, which is still close enough
1510 to correct at this point. This gives false positives for broken
1511 functions that might use uninitialized data that happens to be
1512 allocated in eax, but who cares? */
1513 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR
->global_live_at_end
, 0);
1516 /* Value is the number of bytes of arguments automatically
1517 popped when returning from a subroutine call.
1518 FUNDECL is the declaration node of the function (as a tree),
1519 FUNTYPE is the data type of the function (as a tree),
1520 or for a library call it is an identifier node for the subroutine name.
1521 SIZE is the number of bytes of arguments passed on the stack.
1523 On the 80386, the RTD insn may be used to pop them if the number
1524 of args is fixed, but if the number is variable then the caller
1525 must pop them all. RTD can't be used for library calls now
1526 because the library is compiled with the Unix compiler.
1527 Use of RTD is a selectable option, since it is incompatible with
1528 standard Unix calling sequences. If the option is not selected,
1529 the caller must always pop the args.
1531 The attribute stdcall is equivalent to RTD on a per module basis. */
1534 ix86_return_pops_args (fundecl
, funtype
, size
)
1539 int rtd
= TARGET_RTD
&& (!fundecl
|| TREE_CODE (fundecl
) != IDENTIFIER_NODE
);
1541 /* Cdecl functions override -mrtd, and never pop the stack. */
1542 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype
))) {
1544 /* Stdcall and fastcall functions will pop the stack if not variable args. */
1545 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype
))
1546 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype
)))
1550 && (TYPE_ARG_TYPES (funtype
) == NULL_TREE
1551 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype
)))
1552 == void_type_node
)))
1556 /* Lose any fake structure return argument if it is passed on the stack. */
1557 if (aggregate_value_p (TREE_TYPE (funtype
))
1560 int nregs
= ix86_fntype_regparm (funtype
);
1563 return GET_MODE_SIZE (Pmode
);
1569 /* Argument support functions. */
1571 /* Return true when register may be used to pass function parameters. */
1573 ix86_function_arg_regno_p (regno
)
1578 return (regno
< REGPARM_MAX
1579 || (TARGET_SSE
&& SSE_REGNO_P (regno
) && !fixed_regs
[regno
]));
1580 if (SSE_REGNO_P (regno
) && TARGET_SSE
)
1582 /* RAX is used as hidden argument to va_arg functions. */
1585 for (i
= 0; i
< REGPARM_MAX
; i
++)
1586 if (regno
== x86_64_int_parameter_registers
[i
])
1591 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1592 for a call to a function whose data type is FNTYPE.
1593 For a library call, FNTYPE is 0. */
1596 init_cumulative_args (cum
, fntype
, libname
)
1597 CUMULATIVE_ARGS
*cum
; /* Argument info to initialize */
1598 tree fntype
; /* tree ptr for function decl */
1599 rtx libname
; /* SYMBOL_REF of library name or 0 */
1601 static CUMULATIVE_ARGS zero_cum
;
1602 tree param
, next_param
;
1604 if (TARGET_DEBUG_ARG
)
1606 fprintf (stderr
, "\ninit_cumulative_args (");
1608 fprintf (stderr
, "fntype code = %s, ret code = %s",
1609 tree_code_name
[(int) TREE_CODE (fntype
)],
1610 tree_code_name
[(int) TREE_CODE (TREE_TYPE (fntype
))]);
1612 fprintf (stderr
, "no fntype");
1615 fprintf (stderr
, ", libname = %s", XSTR (libname
, 0));
1620 /* Set up the number of registers to use for passing arguments. */
1621 cum
->nregs
= ix86_regparm
;
1622 cum
->sse_nregs
= SSE_REGPARM_MAX
;
1623 if (fntype
&& !TARGET_64BIT
)
1625 tree attr
= lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype
));
1628 cum
->nregs
= TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
)));
1630 cum
->maybe_vaarg
= false;
1632 /* Use ecx and edx registers if function has fastcall attribute */
1633 if (fntype
&& !TARGET_64BIT
)
1635 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype
)))
1642 /* Determine if this function has variable arguments. This is
1643 indicated by the last argument being 'void_type_mode' if there
1644 are no variable arguments. If there are variable arguments, then
1645 we won't pass anything in registers */
1649 for (param
= (fntype
) ? TYPE_ARG_TYPES (fntype
) : 0;
1650 param
!= 0; param
= next_param
)
1652 next_param
= TREE_CHAIN (param
);
1653 if (next_param
== 0 && TREE_VALUE (param
) != void_type_node
)
1660 cum
->maybe_vaarg
= true;
1664 if ((!fntype
&& !libname
)
1665 || (fntype
&& !TYPE_ARG_TYPES (fntype
)))
1666 cum
->maybe_vaarg
= 1;
1668 if (TARGET_DEBUG_ARG
)
1669 fprintf (stderr
, ", nregs=%d )\n", cum
->nregs
);
1674 /* x86-64 register passing impleemntation. See x86-64 ABI for details. Goal
1675 of this code is to classify each 8bytes of incoming argument by the register
1676 class and assign registers accordingly. */
1678 /* Return the union class of CLASS1 and CLASS2.
1679 See the x86-64 PS ABI for details. */
1681 static enum x86_64_reg_class
1682 merge_classes (class1
, class2
)
1683 enum x86_64_reg_class class1
, class2
;
1685 /* Rule #1: If both classes are equal, this is the resulting class. */
1686 if (class1
== class2
)
1689 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
1691 if (class1
== X86_64_NO_CLASS
)
1693 if (class2
== X86_64_NO_CLASS
)
1696 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
1697 if (class1
== X86_64_MEMORY_CLASS
|| class2
== X86_64_MEMORY_CLASS
)
1698 return X86_64_MEMORY_CLASS
;
1700 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
1701 if ((class1
== X86_64_INTEGERSI_CLASS
&& class2
== X86_64_SSESF_CLASS
)
1702 || (class2
== X86_64_INTEGERSI_CLASS
&& class1
== X86_64_SSESF_CLASS
))
1703 return X86_64_INTEGERSI_CLASS
;
1704 if (class1
== X86_64_INTEGER_CLASS
|| class1
== X86_64_INTEGERSI_CLASS
1705 || class2
== X86_64_INTEGER_CLASS
|| class2
== X86_64_INTEGERSI_CLASS
)
1706 return X86_64_INTEGER_CLASS
;
1708 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
1709 if (class1
== X86_64_X87_CLASS
|| class1
== X86_64_X87UP_CLASS
1710 || class2
== X86_64_X87_CLASS
|| class2
== X86_64_X87UP_CLASS
)
1711 return X86_64_MEMORY_CLASS
;
1713 /* Rule #6: Otherwise class SSE is used. */
1714 return X86_64_SSE_CLASS
;
1717 /* Classify the argument of type TYPE and mode MODE.
1718 CLASSES will be filled by the register class used to pass each word
1719 of the operand. The number of words is returned. In case the parameter
1720 should be passed in memory, 0 is returned. As a special case for zero
1721 sized containers, classes[0] will be NO_CLASS and 1 is returned.
1723 BIT_OFFSET is used internally for handling records and specifies offset
1724 of the offset in bits modulo 256 to avoid overflow cases.
1726 See the x86-64 PS ABI for details.
1730 classify_argument (mode
, type
, classes
, bit_offset
)
1731 enum machine_mode mode
;
1733 enum x86_64_reg_class classes
[MAX_CLASSES
];
1737 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
1738 int words
= (bytes
+ (bit_offset
% 64) / 8 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
1740 /* Variable sized entities are always passed/returned in memory. */
1744 if (type
&& AGGREGATE_TYPE_P (type
))
1748 enum x86_64_reg_class subclasses
[MAX_CLASSES
];
1750 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
1754 for (i
= 0; i
< words
; i
++)
1755 classes
[i
] = X86_64_NO_CLASS
;
1757 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
1758 signalize memory class, so handle it as special case. */
1761 classes
[0] = X86_64_NO_CLASS
;
1765 /* Classify each field of record and merge classes. */
1766 if (TREE_CODE (type
) == RECORD_TYPE
)
1768 /* For classes first merge in the field of the subclasses. */
1769 if (TYPE_BINFO (type
) != NULL
&& TYPE_BINFO_BASETYPES (type
) != NULL
)
1771 tree bases
= TYPE_BINFO_BASETYPES (type
);
1772 int n_bases
= TREE_VEC_LENGTH (bases
);
1775 for (i
= 0; i
< n_bases
; ++i
)
1777 tree binfo
= TREE_VEC_ELT (bases
, i
);
1779 int offset
= tree_low_cst (BINFO_OFFSET (binfo
), 0) * 8;
1780 tree type
= BINFO_TYPE (binfo
);
1782 num
= classify_argument (TYPE_MODE (type
),
1784 (offset
+ bit_offset
) % 256);
1787 for (i
= 0; i
< num
; i
++)
1789 int pos
= (offset
+ (bit_offset
% 64)) / 8 / 8;
1791 merge_classes (subclasses
[i
], classes
[i
+ pos
]);
1795 /* And now merge the fields of structure. */
1796 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
1798 if (TREE_CODE (field
) == FIELD_DECL
)
1802 /* Bitfields are always classified as integer. Handle them
1803 early, since later code would consider them to be
1804 misaligned integers. */
1805 if (DECL_BIT_FIELD (field
))
1807 for (i
= int_bit_position (field
) / 8 / 8;
1808 i
< (int_bit_position (field
)
1809 + tree_low_cst (DECL_SIZE (field
), 0)
1812 merge_classes (X86_64_INTEGER_CLASS
,
1817 num
= classify_argument (TYPE_MODE (TREE_TYPE (field
)),
1818 TREE_TYPE (field
), subclasses
,
1819 (int_bit_position (field
)
1820 + bit_offset
) % 256);
1823 for (i
= 0; i
< num
; i
++)
1826 (int_bit_position (field
) + (bit_offset
% 64)) / 8 / 8;
1828 merge_classes (subclasses
[i
], classes
[i
+ pos
]);
1834 /* Arrays are handled as small records. */
1835 else if (TREE_CODE (type
) == ARRAY_TYPE
)
1838 num
= classify_argument (TYPE_MODE (TREE_TYPE (type
)),
1839 TREE_TYPE (type
), subclasses
, bit_offset
);
1843 /* The partial classes are now full classes. */
1844 if (subclasses
[0] == X86_64_SSESF_CLASS
&& bytes
!= 4)
1845 subclasses
[0] = X86_64_SSE_CLASS
;
1846 if (subclasses
[0] == X86_64_INTEGERSI_CLASS
&& bytes
!= 4)
1847 subclasses
[0] = X86_64_INTEGER_CLASS
;
1849 for (i
= 0; i
< words
; i
++)
1850 classes
[i
] = subclasses
[i
% num
];
1852 /* Unions are similar to RECORD_TYPE but offset is always 0. */
1853 else if (TREE_CODE (type
) == UNION_TYPE
1854 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
1856 /* For classes first merge in the field of the subclasses. */
1857 if (TYPE_BINFO (type
) != NULL
&& TYPE_BINFO_BASETYPES (type
) != NULL
)
1859 tree bases
= TYPE_BINFO_BASETYPES (type
);
1860 int n_bases
= TREE_VEC_LENGTH (bases
);
1863 for (i
= 0; i
< n_bases
; ++i
)
1865 tree binfo
= TREE_VEC_ELT (bases
, i
);
1867 int offset
= tree_low_cst (BINFO_OFFSET (binfo
), 0) * 8;
1868 tree type
= BINFO_TYPE (binfo
);
1870 num
= classify_argument (TYPE_MODE (type
),
1872 (offset
+ (bit_offset
% 64)) % 256);
1875 for (i
= 0; i
< num
; i
++)
1877 int pos
= (offset
+ (bit_offset
% 64)) / 8 / 8;
1879 merge_classes (subclasses
[i
], classes
[i
+ pos
]);
1883 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
1885 if (TREE_CODE (field
) == FIELD_DECL
)
1888 num
= classify_argument (TYPE_MODE (TREE_TYPE (field
)),
1889 TREE_TYPE (field
), subclasses
,
1893 for (i
= 0; i
< num
; i
++)
1894 classes
[i
] = merge_classes (subclasses
[i
], classes
[i
]);
1901 /* Final merger cleanup. */
1902 for (i
= 0; i
< words
; i
++)
1904 /* If one class is MEMORY, everything should be passed in
1906 if (classes
[i
] == X86_64_MEMORY_CLASS
)
1909 /* The X86_64_SSEUP_CLASS should be always preceded by
1910 X86_64_SSE_CLASS. */
1911 if (classes
[i
] == X86_64_SSEUP_CLASS
1912 && (i
== 0 || classes
[i
- 1] != X86_64_SSE_CLASS
))
1913 classes
[i
] = X86_64_SSE_CLASS
;
1915 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
1916 if (classes
[i
] == X86_64_X87UP_CLASS
1917 && (i
== 0 || classes
[i
- 1] != X86_64_X87_CLASS
))
1918 classes
[i
] = X86_64_SSE_CLASS
;
1923 /* Compute alignment needed. We align all types to natural boundaries with
1924 exception of XFmode that is aligned to 64bits. */
1925 if (mode
!= VOIDmode
&& mode
!= BLKmode
)
1927 int mode_alignment
= GET_MODE_BITSIZE (mode
);
1930 mode_alignment
= 128;
1931 else if (mode
== XCmode
)
1932 mode_alignment
= 256;
1933 /* Misaligned fields are always returned in memory. */
1934 if (bit_offset
% mode_alignment
)
1938 /* Classification of atomic types. */
1948 if (bit_offset
+ GET_MODE_BITSIZE (mode
) <= 32)
1949 classes
[0] = X86_64_INTEGERSI_CLASS
;
1951 classes
[0] = X86_64_INTEGER_CLASS
;
1955 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
1958 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
1959 classes
[2] = classes
[3] = X86_64_INTEGER_CLASS
;
1962 if (!(bit_offset
% 64))
1963 classes
[0] = X86_64_SSESF_CLASS
;
1965 classes
[0] = X86_64_SSE_CLASS
;
1968 classes
[0] = X86_64_SSEDF_CLASS
;
1971 classes
[0] = X86_64_X87_CLASS
;
1972 classes
[1] = X86_64_X87UP_CLASS
;
1975 classes
[0] = X86_64_X87_CLASS
;
1976 classes
[1] = X86_64_X87UP_CLASS
;
1977 classes
[2] = X86_64_X87_CLASS
;
1978 classes
[3] = X86_64_X87UP_CLASS
;
1981 classes
[0] = X86_64_SSEDF_CLASS
;
1982 classes
[1] = X86_64_SSEDF_CLASS
;
1985 classes
[0] = X86_64_SSE_CLASS
;
1993 classes
[0] = X86_64_SSE_CLASS
;
1994 classes
[1] = X86_64_SSEUP_CLASS
;
2009 /* Examine the argument and return set number of register required in each
2010 class. Return 0 iff parameter should be passed in memory. */
2012 examine_argument (mode
, type
, in_return
, int_nregs
, sse_nregs
)
2013 enum machine_mode mode
;
2015 int *int_nregs
, *sse_nregs
;
2018 enum x86_64_reg_class
class[MAX_CLASSES
];
2019 int n
= classify_argument (mode
, type
, class, 0);
2025 for (n
--; n
>= 0; n
--)
2028 case X86_64_INTEGER_CLASS
:
2029 case X86_64_INTEGERSI_CLASS
:
2032 case X86_64_SSE_CLASS
:
2033 case X86_64_SSESF_CLASS
:
2034 case X86_64_SSEDF_CLASS
:
2037 case X86_64_NO_CLASS
:
2038 case X86_64_SSEUP_CLASS
:
2040 case X86_64_X87_CLASS
:
2041 case X86_64_X87UP_CLASS
:
2045 case X86_64_MEMORY_CLASS
:
2050 /* Construct container for the argument used by GCC interface. See
2051 FUNCTION_ARG for the detailed description. */
2053 construct_container (mode
, type
, in_return
, nintregs
, nsseregs
, intreg
, sse_regno
)
2054 enum machine_mode mode
;
2057 int nintregs
, nsseregs
;
2061 enum machine_mode tmpmode
;
2063 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
2064 enum x86_64_reg_class
class[MAX_CLASSES
];
2068 int needed_sseregs
, needed_intregs
;
2069 rtx exp
[MAX_CLASSES
];
2072 n
= classify_argument (mode
, type
, class, 0);
2073 if (TARGET_DEBUG_ARG
)
2076 fprintf (stderr
, "Memory class\n");
2079 fprintf (stderr
, "Classes:");
2080 for (i
= 0; i
< n
; i
++)
2082 fprintf (stderr
, " %s", x86_64_reg_class_name
[class[i
]]);
2084 fprintf (stderr
, "\n");
2089 if (!examine_argument (mode
, type
, in_return
, &needed_intregs
, &needed_sseregs
))
2091 if (needed_intregs
> nintregs
|| needed_sseregs
> nsseregs
)
2094 /* First construct simple cases. Avoid SCmode, since we want to use
2095 single register to pass this type. */
2096 if (n
== 1 && mode
!= SCmode
)
2099 case X86_64_INTEGER_CLASS
:
2100 case X86_64_INTEGERSI_CLASS
:
2101 return gen_rtx_REG (mode
, intreg
[0]);
2102 case X86_64_SSE_CLASS
:
2103 case X86_64_SSESF_CLASS
:
2104 case X86_64_SSEDF_CLASS
:
2105 return gen_rtx_REG (mode
, SSE_REGNO (sse_regno
));
2106 case X86_64_X87_CLASS
:
2107 return gen_rtx_REG (mode
, FIRST_STACK_REG
);
2108 case X86_64_NO_CLASS
:
2109 /* Zero sized array, struct or class. */
2114 if (n
== 2 && class[0] == X86_64_SSE_CLASS
&& class[1] == X86_64_SSEUP_CLASS
)
2115 return gen_rtx_REG (mode
, SSE_REGNO (sse_regno
));
2117 && class[0] == X86_64_X87_CLASS
&& class[1] == X86_64_X87UP_CLASS
)
2118 return gen_rtx_REG (TFmode
, FIRST_STACK_REG
);
2119 if (n
== 2 && class[0] == X86_64_INTEGER_CLASS
2120 && class[1] == X86_64_INTEGER_CLASS
2121 && (mode
== CDImode
|| mode
== TImode
)
2122 && intreg
[0] + 1 == intreg
[1])
2123 return gen_rtx_REG (mode
, intreg
[0]);
2125 && class[0] == X86_64_X87_CLASS
&& class[1] == X86_64_X87UP_CLASS
2126 && class[2] == X86_64_X87_CLASS
&& class[3] == X86_64_X87UP_CLASS
)
2127 return gen_rtx_REG (TCmode
, FIRST_STACK_REG
);
2129 /* Otherwise figure out the entries of the PARALLEL. */
2130 for (i
= 0; i
< n
; i
++)
2134 case X86_64_NO_CLASS
:
2136 case X86_64_INTEGER_CLASS
:
2137 case X86_64_INTEGERSI_CLASS
:
2138 /* Merge TImodes on aligned occassions here too. */
2139 if (i
* 8 + 8 > bytes
)
2140 tmpmode
= mode_for_size ((bytes
- i
* 8) * BITS_PER_UNIT
, MODE_INT
, 0);
2141 else if (class[i
] == X86_64_INTEGERSI_CLASS
)
2145 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2146 if (tmpmode
== BLKmode
)
2148 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
2149 gen_rtx_REG (tmpmode
, *intreg
),
2153 case X86_64_SSESF_CLASS
:
2154 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
2155 gen_rtx_REG (SFmode
,
2156 SSE_REGNO (sse_regno
)),
2160 case X86_64_SSEDF_CLASS
:
2161 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
2162 gen_rtx_REG (DFmode
,
2163 SSE_REGNO (sse_regno
)),
2167 case X86_64_SSE_CLASS
:
2168 if (i
< n
- 1 && class[i
+ 1] == X86_64_SSEUP_CLASS
)
2172 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
2173 gen_rtx_REG (tmpmode
,
2174 SSE_REGNO (sse_regno
)),
2176 if (tmpmode
== TImode
)
2184 ret
= gen_rtx_PARALLEL (mode
, rtvec_alloc (nexps
));
2185 for (i
= 0; i
< nexps
; i
++)
2186 XVECEXP (ret
, 0, i
) = exp
[i
];
2190 /* Update the data in CUM to advance over an argument
2191 of mode MODE and data type TYPE.
2192 (TYPE is null for libcalls where that information may not be available.) */
2195 function_arg_advance (cum
, mode
, type
, named
)
2196 CUMULATIVE_ARGS
*cum
; /* current arg information */
2197 enum machine_mode mode
; /* current arg mode */
2198 tree type
; /* type of the argument or 0 if lib support */
2199 int named
; /* whether or not the argument was named */
2202 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
2203 int words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
2205 if (TARGET_DEBUG_ARG
)
2207 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
2208 words
, cum
->words
, cum
->nregs
, GET_MODE_NAME (mode
), named
);
2211 int int_nregs
, sse_nregs
;
2212 if (!examine_argument (mode
, type
, 0, &int_nregs
, &sse_nregs
))
2213 cum
->words
+= words
;
2214 else if (sse_nregs
<= cum
->sse_nregs
&& int_nregs
<= cum
->nregs
)
2216 cum
->nregs
-= int_nregs
;
2217 cum
->sse_nregs
-= sse_nregs
;
2218 cum
->regno
+= int_nregs
;
2219 cum
->sse_regno
+= sse_nregs
;
2222 cum
->words
+= words
;
2226 if (TARGET_SSE
&& mode
== TImode
)
2228 cum
->sse_words
+= words
;
2229 cum
->sse_nregs
-= 1;
2230 cum
->sse_regno
+= 1;
2231 if (cum
->sse_nregs
<= 0)
2239 cum
->words
+= words
;
2240 cum
->nregs
-= words
;
2241 cum
->regno
+= words
;
2243 if (cum
->nregs
<= 0)
2253 /* Define where to put the arguments to a function.
2254 Value is zero to push the argument on the stack,
2255 or a hard register in which to store the argument.
2257 MODE is the argument's machine mode.
2258 TYPE is the data type of the argument (as a tree).
2259 This is null for libcalls where that information may
2261 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2262 the preceding args and about the function being called.
2263 NAMED is nonzero if this argument is a named parameter
2264 (otherwise it is an extra parameter matching an ellipsis). */
2267 function_arg (cum
, mode
, type
, named
)
2268 CUMULATIVE_ARGS
*cum
; /* current arg information */
2269 enum machine_mode mode
; /* current arg mode */
2270 tree type
; /* type of the argument or 0 if lib support */
2271 int named
; /* != 0 for normal args, == 0 for ... args */
2275 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
2276 int words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
2278 /* Handle an hidden AL argument containing number of registers for varargs
2279 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2281 if (mode
== VOIDmode
)
2284 return GEN_INT (cum
->maybe_vaarg
2285 ? (cum
->sse_nregs
< 0
2293 ret
= construct_container (mode
, type
, 0, cum
->nregs
, cum
->sse_nregs
,
2294 &x86_64_int_parameter_registers
[cum
->regno
],
2299 /* For now, pass fp/complex values on the stack. */
2311 if (words
<= cum
->nregs
)
2313 int regno
= cum
->regno
;
2315 /* Fastcall allocates the first two DWORD (SImode) or
2316 smaller arguments to ECX and EDX. */
2319 if (mode
== BLKmode
|| mode
== DImode
)
2322 /* ECX not EAX is the first allocated register. */
2326 ret
= gen_rtx_REG (mode
, regno
);
2331 ret
= gen_rtx_REG (mode
, cum
->sse_regno
);
2335 if (TARGET_DEBUG_ARG
)
2338 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
2339 words
, cum
->words
, cum
->nregs
, GET_MODE_NAME (mode
), named
);
2342 print_simple_rtl (stderr
, ret
);
2344 fprintf (stderr
, ", stack");
2346 fprintf (stderr
, " )\n");
2352 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
2355 contains_128bit_aligned_vector_p (type
)
2358 enum machine_mode mode
= TYPE_MODE (type
);
2359 if (SSE_REG_MODE_P (mode
)
2360 && (!TYPE_USER_ALIGN (type
) || TYPE_ALIGN (type
) > 128))
2362 if (TYPE_ALIGN (type
) < 128)
2365 if (AGGREGATE_TYPE_P (type
))
2367 /* Walk the agregates recursivly. */
2368 if (TREE_CODE (type
) == RECORD_TYPE
2369 || TREE_CODE (type
) == UNION_TYPE
2370 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
2374 if (TYPE_BINFO (type
) != NULL
2375 && TYPE_BINFO_BASETYPES (type
) != NULL
)
2377 tree bases
= TYPE_BINFO_BASETYPES (type
);
2378 int n_bases
= TREE_VEC_LENGTH (bases
);
2381 for (i
= 0; i
< n_bases
; ++i
)
2383 tree binfo
= TREE_VEC_ELT (bases
, i
);
2384 tree type
= BINFO_TYPE (binfo
);
2386 if (contains_128bit_aligned_vector_p (type
))
2390 /* And now merge the fields of structure. */
2391 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
2393 if (TREE_CODE (field
) == FIELD_DECL
2394 && contains_128bit_aligned_vector_p (TREE_TYPE (field
)))
2398 /* Just for use if some languages passes arrays by value. */
2399 else if (TREE_CODE (type
) == ARRAY_TYPE
)
2401 if (contains_128bit_aligned_vector_p (TREE_TYPE (type
)))
2410 /* A C expression that indicates when an argument must be passed by
2411 reference. If nonzero for an argument, a copy of that argument is
2412 made in memory and a pointer to the argument is passed instead of
2413 the argument itself. The pointer is passed in whatever way is
2414 appropriate for passing a pointer to that type. */
2417 function_arg_pass_by_reference (cum
, mode
, type
, named
)
2418 CUMULATIVE_ARGS
*cum ATTRIBUTE_UNUSED
;
2419 enum machine_mode mode ATTRIBUTE_UNUSED
;
2421 int named ATTRIBUTE_UNUSED
;
2426 if (type
&& int_size_in_bytes (type
) == -1)
2428 if (TARGET_DEBUG_ARG
)
2429 fprintf (stderr
, "function_arg_pass_by_reference\n");
2436 /* Gives the alignment boundary, in bits, of an argument with the specified mode
2440 ix86_function_arg_boundary (mode
, type
)
2441 enum machine_mode mode
;
2446 align
= TYPE_ALIGN (type
);
2448 align
= GET_MODE_ALIGNMENT (mode
);
2449 if (align
< PARM_BOUNDARY
)
2450 align
= PARM_BOUNDARY
;
2453 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
2454 make an exception for SSE modes since these require 128bit
2457 The handling here differs from field_alignment. ICC aligns MMX
2458 arguments to 4 byte boundaries, while structure fields are aligned
2459 to 8 byte boundaries. */
2462 if (!SSE_REG_MODE_P (mode
))
2463 align
= PARM_BOUNDARY
;
2467 if (!contains_128bit_aligned_vector_p (type
))
2468 align
= PARM_BOUNDARY
;
2470 if (align
!= PARM_BOUNDARY
&& !TARGET_SSE
)
2478 /* Return true if N is a possible register number of function value. */
2480 ix86_function_value_regno_p (regno
)
2485 return ((regno
) == 0
2486 || ((regno
) == FIRST_FLOAT_REG
&& TARGET_FLOAT_RETURNS_IN_80387
)
2487 || ((regno
) == FIRST_SSE_REG
&& TARGET_SSE
));
2489 return ((regno
) == 0 || (regno
) == FIRST_FLOAT_REG
2490 || ((regno
) == FIRST_SSE_REG
&& TARGET_SSE
)
2491 || ((regno
) == FIRST_FLOAT_REG
&& TARGET_FLOAT_RETURNS_IN_80387
));
2494 /* Define how to find the value returned by a function.
2495 VALTYPE is the data type of the value (as a tree).
2496 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2497 otherwise, FUNC is 0. */
2499 ix86_function_value (valtype
)
2504 rtx ret
= construct_container (TYPE_MODE (valtype
), valtype
, 1,
2505 REGPARM_MAX
, SSE_REGPARM_MAX
,
2506 x86_64_int_return_registers
, 0);
2507 /* For zero sized structures, construct_continer return NULL, but we need
2508 to keep rest of compiler happy by returning meaningfull value. */
2510 ret
= gen_rtx_REG (TYPE_MODE (valtype
), 0);
2514 return gen_rtx_REG (TYPE_MODE (valtype
),
2515 ix86_value_regno (TYPE_MODE (valtype
)));
2518 /* Return false iff type is returned in memory. */
2520 ix86_return_in_memory (type
)
2523 int needed_intregs
, needed_sseregs
, size
;
2524 enum machine_mode mode
= TYPE_MODE (type
);
2527 return !examine_argument (mode
, type
, 1, &needed_intregs
, &needed_sseregs
);
2529 if (mode
== BLKmode
)
2532 size
= int_size_in_bytes (type
);
2534 if (MS_AGGREGATE_RETURN
&& AGGREGATE_TYPE_P (type
) && size
<= 8)
2537 if (VECTOR_MODE_P (mode
) || mode
== TImode
)
2539 /* User-created vectors small enough to fit in EAX. */
2543 /* MMX/3dNow values are returned on the stack, since we've
2544 got to EMMS/FEMMS before returning. */
2548 /* SSE values are returned in XMM0. */
2549 /* ??? Except when it doesn't exist? We have a choice of
2550 either (1) being abi incompatible with a -march switch,
2551 or (2) generating an error here. Given no good solution,
2552 I think the safest thing is one warning. The user won't
2553 be able to use -Werror, but... */
2564 warning ("SSE vector return without SSE enabled "
2578 /* Define how to find the value returned by a library function
2579 assuming the value has mode MODE. */
2581 ix86_libcall_value (mode
)
2582 enum machine_mode mode
;
2592 return gen_rtx_REG (mode
, FIRST_SSE_REG
);
2595 return gen_rtx_REG (mode
, FIRST_FLOAT_REG
);
2597 return gen_rtx_REG (mode
, 0);
2601 return gen_rtx_REG (mode
, ix86_value_regno (mode
));
2604 /* Given a mode, return the register to use for a return value. */
2607 ix86_value_regno (mode
)
2608 enum machine_mode mode
;
2610 /* Floating point return values in %st(0). */
2611 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
&& TARGET_FLOAT_RETURNS_IN_80387
)
2612 return FIRST_FLOAT_REG
;
2613 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
2614 we prevent this case when sse is not available. */
2615 if (mode
== TImode
|| (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
2616 return FIRST_SSE_REG
;
2617 /* Everything else in %eax. */
2621 /* Create the va_list data type. */
2624 ix86_build_va_list ()
2626 tree f_gpr
, f_fpr
, f_ovf
, f_sav
, record
, type_decl
;
2628 /* For i386 we use plain pointer to argument area. */
2630 return build_pointer_type (char_type_node
);
2632 record
= (*lang_hooks
.types
.make_type
) (RECORD_TYPE
);
2633 type_decl
= build_decl (TYPE_DECL
, get_identifier ("__va_list_tag"), record
);
2635 f_gpr
= build_decl (FIELD_DECL
, get_identifier ("gp_offset"),
2636 unsigned_type_node
);
2637 f_fpr
= build_decl (FIELD_DECL
, get_identifier ("fp_offset"),
2638 unsigned_type_node
);
2639 f_ovf
= build_decl (FIELD_DECL
, get_identifier ("overflow_arg_area"),
2641 f_sav
= build_decl (FIELD_DECL
, get_identifier ("reg_save_area"),
2644 DECL_FIELD_CONTEXT (f_gpr
) = record
;
2645 DECL_FIELD_CONTEXT (f_fpr
) = record
;
2646 DECL_FIELD_CONTEXT (f_ovf
) = record
;
2647 DECL_FIELD_CONTEXT (f_sav
) = record
;
2649 TREE_CHAIN (record
) = type_decl
;
2650 TYPE_NAME (record
) = type_decl
;
2651 TYPE_FIELDS (record
) = f_gpr
;
2652 TREE_CHAIN (f_gpr
) = f_fpr
;
2653 TREE_CHAIN (f_fpr
) = f_ovf
;
2654 TREE_CHAIN (f_ovf
) = f_sav
;
2656 layout_type (record
);
2658 /* The correct type is an array type of one element. */
2659 return build_array_type (record
, build_index_type (size_zero_node
));
2662 /* Perform any needed actions needed for a function that is receiving a
2663 variable number of arguments.
2667 MODE and TYPE are the mode and type of the current parameter.
2669 PRETEND_SIZE is a variable that should be set to the amount of stack
2670 that must be pushed by the prolog to pretend that our caller pushed
2673 Normally, this macro will push all remaining incoming registers on the
2674 stack and set PRETEND_SIZE to the length of the registers pushed. */
2677 ix86_setup_incoming_varargs (cum
, mode
, type
, pretend_size
, no_rtl
)
2678 CUMULATIVE_ARGS
*cum
;
2679 enum machine_mode mode
;
2681 int *pretend_size ATTRIBUTE_UNUSED
;
2685 CUMULATIVE_ARGS next_cum
;
2686 rtx save_area
= NULL_RTX
, mem
;
2699 /* Indicate to allocate space on the stack for varargs save area. */
2700 ix86_save_varrargs_registers
= 1;
2702 cfun
->stack_alignment_needed
= 128;
2704 fntype
= TREE_TYPE (current_function_decl
);
2705 stdarg_p
= (TYPE_ARG_TYPES (fntype
) != 0
2706 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype
)))
2707 != void_type_node
));
2709 /* For varargs, we do not want to skip the dummy va_dcl argument.
2710 For stdargs, we do want to skip the last named argument. */
2713 function_arg_advance (&next_cum
, mode
, type
, 1);
2716 save_area
= frame_pointer_rtx
;
2718 set
= get_varargs_alias_set ();
2720 for (i
= next_cum
.regno
; i
< ix86_regparm
; i
++)
2722 mem
= gen_rtx_MEM (Pmode
,
2723 plus_constant (save_area
, i
* UNITS_PER_WORD
));
2724 set_mem_alias_set (mem
, set
);
2725 emit_move_insn (mem
, gen_rtx_REG (Pmode
,
2726 x86_64_int_parameter_registers
[i
]));
2729 if (next_cum
.sse_nregs
)
2731 /* Now emit code to save SSE registers. The AX parameter contains number
2732 of SSE parameter regsiters used to call this function. We use
2733 sse_prologue_save insn template that produces computed jump across
2734 SSE saves. We need some preparation work to get this working. */
2736 label
= gen_label_rtx ();
2737 label_ref
= gen_rtx_LABEL_REF (Pmode
, label
);
2739 /* Compute address to jump to :
2740 label - 5*eax + nnamed_sse_arguments*5 */
2741 tmp_reg
= gen_reg_rtx (Pmode
);
2742 nsse_reg
= gen_reg_rtx (Pmode
);
2743 emit_insn (gen_zero_extendqidi2 (nsse_reg
, gen_rtx_REG (QImode
, 0)));
2744 emit_insn (gen_rtx_SET (VOIDmode
, tmp_reg
,
2745 gen_rtx_MULT (Pmode
, nsse_reg
,
2747 if (next_cum
.sse_regno
)
2750 gen_rtx_CONST (DImode
,
2751 gen_rtx_PLUS (DImode
,
2753 GEN_INT (next_cum
.sse_regno
* 4))));
2755 emit_move_insn (nsse_reg
, label_ref
);
2756 emit_insn (gen_subdi3 (nsse_reg
, nsse_reg
, tmp_reg
));
2758 /* Compute address of memory block we save into. We always use pointer
2759 pointing 127 bytes after first byte to store - this is needed to keep
2760 instruction size limited by 4 bytes. */
2761 tmp_reg
= gen_reg_rtx (Pmode
);
2762 emit_insn (gen_rtx_SET (VOIDmode
, tmp_reg
,
2763 plus_constant (save_area
,
2764 8 * REGPARM_MAX
+ 127)));
2765 mem
= gen_rtx_MEM (BLKmode
, plus_constant (tmp_reg
, -127));
2766 set_mem_alias_set (mem
, set
);
2767 set_mem_align (mem
, BITS_PER_WORD
);
2769 /* And finally do the dirty job! */
2770 emit_insn (gen_sse_prologue_save (mem
, nsse_reg
,
2771 GEN_INT (next_cum
.sse_regno
), label
));
2776 /* Implement va_start. */
2779 ix86_va_start (valist
, nextarg
)
2783 HOST_WIDE_INT words
, n_gpr
, n_fpr
;
2784 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
2785 tree gpr
, fpr
, ovf
, sav
, t
;
2787 /* Only 64bit target needs something special. */
2790 std_expand_builtin_va_start (valist
, nextarg
);
2794 f_gpr
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
2795 f_fpr
= TREE_CHAIN (f_gpr
);
2796 f_ovf
= TREE_CHAIN (f_fpr
);
2797 f_sav
= TREE_CHAIN (f_ovf
);
2799 valist
= build1 (INDIRECT_REF
, TREE_TYPE (TREE_TYPE (valist
)), valist
);
2800 gpr
= build (COMPONENT_REF
, TREE_TYPE (f_gpr
), valist
, f_gpr
);
2801 fpr
= build (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
);
2802 ovf
= build (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
);
2803 sav
= build (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
);
2805 /* Count number of gp and fp argument registers used. */
2806 words
= current_function_args_info
.words
;
2807 n_gpr
= current_function_args_info
.regno
;
2808 n_fpr
= current_function_args_info
.sse_regno
;
2810 if (TARGET_DEBUG_ARG
)
2811 fprintf (stderr
, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
2812 (int) words
, (int) n_gpr
, (int) n_fpr
);
2814 t
= build (MODIFY_EXPR
, TREE_TYPE (gpr
), gpr
,
2815 build_int_2 (n_gpr
* 8, 0));
2816 TREE_SIDE_EFFECTS (t
) = 1;
2817 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
2819 t
= build (MODIFY_EXPR
, TREE_TYPE (fpr
), fpr
,
2820 build_int_2 (n_fpr
* 16 + 8*REGPARM_MAX
, 0));
2821 TREE_SIDE_EFFECTS (t
) = 1;
2822 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
2824 /* Find the overflow area. */
2825 t
= make_tree (TREE_TYPE (ovf
), virtual_incoming_args_rtx
);
2827 t
= build (PLUS_EXPR
, TREE_TYPE (ovf
), t
,
2828 build_int_2 (words
* UNITS_PER_WORD
, 0));
2829 t
= build (MODIFY_EXPR
, TREE_TYPE (ovf
), ovf
, t
);
2830 TREE_SIDE_EFFECTS (t
) = 1;
2831 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
2833 /* Find the register save area.
2834 Prologue of the function save it right above stack frame. */
2835 t
= make_tree (TREE_TYPE (sav
), frame_pointer_rtx
);
2836 t
= build (MODIFY_EXPR
, TREE_TYPE (sav
), sav
, t
);
2837 TREE_SIDE_EFFECTS (t
) = 1;
2838 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
2841 /* Implement va_arg. */
2843 ix86_va_arg (valist
, type
)
2846 static const int intreg
[6] = { 0, 1, 2, 3, 4, 5 };
2847 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
2848 tree gpr
, fpr
, ovf
, sav
, t
;
2850 rtx lab_false
, lab_over
= NULL_RTX
;
2855 /* Only 64bit target needs something special. */
2858 return std_expand_builtin_va_arg (valist
, type
);
2861 f_gpr
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
2862 f_fpr
= TREE_CHAIN (f_gpr
);
2863 f_ovf
= TREE_CHAIN (f_fpr
);
2864 f_sav
= TREE_CHAIN (f_ovf
);
2866 valist
= build1 (INDIRECT_REF
, TREE_TYPE (TREE_TYPE (valist
)), valist
);
2867 gpr
= build (COMPONENT_REF
, TREE_TYPE (f_gpr
), valist
, f_gpr
);
2868 fpr
= build (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
);
2869 ovf
= build (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
);
2870 sav
= build (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
);
2872 size
= int_size_in_bytes (type
);
2875 /* Passed by reference. */
2877 type
= build_pointer_type (type
);
2878 size
= int_size_in_bytes (type
);
2880 rsize
= (size
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
2882 container
= construct_container (TYPE_MODE (type
), type
, 0,
2883 REGPARM_MAX
, SSE_REGPARM_MAX
, intreg
, 0);
2885 * Pull the value out of the saved registers ...
2888 addr_rtx
= gen_reg_rtx (Pmode
);
2892 rtx int_addr_rtx
, sse_addr_rtx
;
2893 int needed_intregs
, needed_sseregs
;
2896 lab_over
= gen_label_rtx ();
2897 lab_false
= gen_label_rtx ();
2899 examine_argument (TYPE_MODE (type
), type
, 0,
2900 &needed_intregs
, &needed_sseregs
);
2903 need_temp
= ((needed_intregs
&& TYPE_ALIGN (type
) > 64)
2904 || TYPE_ALIGN (type
) > 128);
2906 /* In case we are passing structure, verify that it is consetuctive block
2907 on the register save area. If not we need to do moves. */
2908 if (!need_temp
&& !REG_P (container
))
2910 /* Verify that all registers are strictly consetuctive */
2911 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container
, 0, 0), 0))))
2915 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
2917 rtx slot
= XVECEXP (container
, 0, i
);
2918 if (REGNO (XEXP (slot
, 0)) != FIRST_SSE_REG
+ (unsigned int) i
2919 || INTVAL (XEXP (slot
, 1)) != i
* 16)
2927 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
2929 rtx slot
= XVECEXP (container
, 0, i
);
2930 if (REGNO (XEXP (slot
, 0)) != (unsigned int) i
2931 || INTVAL (XEXP (slot
, 1)) != i
* 8)
2938 int_addr_rtx
= addr_rtx
;
2939 sse_addr_rtx
= addr_rtx
;
2943 int_addr_rtx
= gen_reg_rtx (Pmode
);
2944 sse_addr_rtx
= gen_reg_rtx (Pmode
);
2946 /* First ensure that we fit completely in registers. */
2949 emit_cmp_and_jump_insns (expand_expr
2950 (gpr
, NULL_RTX
, SImode
, EXPAND_NORMAL
),
2951 GEN_INT ((REGPARM_MAX
- needed_intregs
+
2952 1) * 8), GE
, const1_rtx
, SImode
,
2957 emit_cmp_and_jump_insns (expand_expr
2958 (fpr
, NULL_RTX
, SImode
, EXPAND_NORMAL
),
2959 GEN_INT ((SSE_REGPARM_MAX
-
2960 needed_sseregs
+ 1) * 16 +
2961 REGPARM_MAX
* 8), GE
, const1_rtx
,
2962 SImode
, 1, lab_false
);
2965 /* Compute index to start of area used for integer regs. */
2968 t
= build (PLUS_EXPR
, ptr_type_node
, sav
, gpr
);
2969 r
= expand_expr (t
, int_addr_rtx
, Pmode
, EXPAND_NORMAL
);
2970 if (r
!= int_addr_rtx
)
2971 emit_move_insn (int_addr_rtx
, r
);
2975 t
= build (PLUS_EXPR
, ptr_type_node
, sav
, fpr
);
2976 r
= expand_expr (t
, sse_addr_rtx
, Pmode
, EXPAND_NORMAL
);
2977 if (r
!= sse_addr_rtx
)
2978 emit_move_insn (sse_addr_rtx
, r
);
2986 /* Never use the memory itself, as it has the alias set. */
2987 x
= XEXP (assign_temp (type
, 0, 1, 0), 0);
2988 mem
= gen_rtx_MEM (BLKmode
, x
);
2989 force_operand (x
, addr_rtx
);
2990 set_mem_alias_set (mem
, get_varargs_alias_set ());
2991 set_mem_align (mem
, BITS_PER_UNIT
);
2993 for (i
= 0; i
< XVECLEN (container
, 0); i
++)
2995 rtx slot
= XVECEXP (container
, 0, i
);
2996 rtx reg
= XEXP (slot
, 0);
2997 enum machine_mode mode
= GET_MODE (reg
);
3003 if (SSE_REGNO_P (REGNO (reg
)))
3005 src_addr
= sse_addr_rtx
;
3006 src_offset
= (REGNO (reg
) - FIRST_SSE_REG
) * 16;
3010 src_addr
= int_addr_rtx
;
3011 src_offset
= REGNO (reg
) * 8;
3013 src_mem
= gen_rtx_MEM (mode
, src_addr
);
3014 set_mem_alias_set (src_mem
, get_varargs_alias_set ());
3015 src_mem
= adjust_address (src_mem
, mode
, src_offset
);
3016 dest_mem
= adjust_address (mem
, mode
, INTVAL (XEXP (slot
, 1)));
3017 emit_move_insn (dest_mem
, src_mem
);
3024 build (PLUS_EXPR
, TREE_TYPE (gpr
), gpr
,
3025 build_int_2 (needed_intregs
* 8, 0));
3026 t
= build (MODIFY_EXPR
, TREE_TYPE (gpr
), gpr
, t
);
3027 TREE_SIDE_EFFECTS (t
) = 1;
3028 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
3033 build (PLUS_EXPR
, TREE_TYPE (fpr
), fpr
,
3034 build_int_2 (needed_sseregs
* 16, 0));
3035 t
= build (MODIFY_EXPR
, TREE_TYPE (fpr
), fpr
, t
);
3036 TREE_SIDE_EFFECTS (t
) = 1;
3037 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
3040 emit_jump_insn (gen_jump (lab_over
));
3042 emit_label (lab_false
);
3045 /* ... otherwise out of the overflow area. */
3047 /* Care for on-stack alignment if needed. */
3048 if (FUNCTION_ARG_BOUNDARY (VOIDmode
, type
) <= 64)
3052 HOST_WIDE_INT align
= FUNCTION_ARG_BOUNDARY (VOIDmode
, type
) / 8;
3053 t
= build (PLUS_EXPR
, TREE_TYPE (ovf
), ovf
, build_int_2 (align
- 1, 0));
3054 t
= build (BIT_AND_EXPR
, TREE_TYPE (t
), t
, build_int_2 (-align
, -1));
3058 r
= expand_expr (t
, addr_rtx
, Pmode
, EXPAND_NORMAL
);
3060 emit_move_insn (addr_rtx
, r
);
3063 build (PLUS_EXPR
, TREE_TYPE (t
), t
,
3064 build_int_2 (rsize
* UNITS_PER_WORD
, 0));
3065 t
= build (MODIFY_EXPR
, TREE_TYPE (ovf
), ovf
, t
);
3066 TREE_SIDE_EFFECTS (t
) = 1;
3067 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
3070 emit_label (lab_over
);
3074 r
= gen_rtx_MEM (Pmode
, addr_rtx
);
3075 set_mem_alias_set (r
, get_varargs_alias_set ());
3076 emit_move_insn (addr_rtx
, r
);
3082 /* Return nonzero if OP is either a i387 or SSE fp register. */
3084 any_fp_register_operand (op
, mode
)
3086 enum machine_mode mode ATTRIBUTE_UNUSED
;
3088 return ANY_FP_REG_P (op
);
3091 /* Return nonzero if OP is an i387 fp register. */
3093 fp_register_operand (op
, mode
)
3095 enum machine_mode mode ATTRIBUTE_UNUSED
;
3097 return FP_REG_P (op
);
3100 /* Return nonzero if OP is a non-fp register_operand. */
3102 register_and_not_any_fp_reg_operand (op
, mode
)
3104 enum machine_mode mode
;
3106 return register_operand (op
, mode
) && !ANY_FP_REG_P (op
);
3109 /* Return nonzero of OP is a register operand other than an
3110 i387 fp register. */
3112 register_and_not_fp_reg_operand (op
, mode
)
3114 enum machine_mode mode
;
3116 return register_operand (op
, mode
) && !FP_REG_P (op
);
3119 /* Return nonzero if OP is general operand representable on x86_64. */
3122 x86_64_general_operand (op
, mode
)
3124 enum machine_mode mode
;
3127 return general_operand (op
, mode
);
3128 if (nonimmediate_operand (op
, mode
))
3130 return x86_64_sign_extended_value (op
);
3133 /* Return nonzero if OP is general operand representable on x86_64
3134 as either sign extended or zero extended constant. */
3137 x86_64_szext_general_operand (op
, mode
)
3139 enum machine_mode mode
;
3142 return general_operand (op
, mode
);
3143 if (nonimmediate_operand (op
, mode
))
3145 return x86_64_sign_extended_value (op
) || x86_64_zero_extended_value (op
);
3148 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
3151 x86_64_nonmemory_operand (op
, mode
)
3153 enum machine_mode mode
;
3156 return nonmemory_operand (op
, mode
);
3157 if (register_operand (op
, mode
))
3159 return x86_64_sign_extended_value (op
);
3162 /* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
3165 x86_64_movabs_operand (op
, mode
)
3167 enum machine_mode mode
;
3169 if (!TARGET_64BIT
|| !flag_pic
)
3170 return nonmemory_operand (op
, mode
);
3171 if (register_operand (op
, mode
) || x86_64_sign_extended_value (op
))
3173 if (CONSTANT_P (op
) && !symbolic_reference_mentioned_p (op
))
3178 /* Return nonzero if OPNUM's MEM should be matched
3179 in movabs* patterns. */
3182 ix86_check_movabs (insn
, opnum
)
3188 set
= PATTERN (insn
);
3189 if (GET_CODE (set
) == PARALLEL
)
3190 set
= XVECEXP (set
, 0, 0);
3191 if (GET_CODE (set
) != SET
)
3193 mem
= XEXP (set
, opnum
);
3194 while (GET_CODE (mem
) == SUBREG
)
3195 mem
= SUBREG_REG (mem
);
3196 if (GET_CODE (mem
) != MEM
)
3198 return (volatile_ok
|| !MEM_VOLATILE_P (mem
));
3201 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
3204 x86_64_szext_nonmemory_operand (op
, mode
)
3206 enum machine_mode mode
;
3209 return nonmemory_operand (op
, mode
);
3210 if (register_operand (op
, mode
))
3212 return x86_64_sign_extended_value (op
) || x86_64_zero_extended_value (op
);
3215 /* Return nonzero if OP is immediate operand representable on x86_64. */
3218 x86_64_immediate_operand (op
, mode
)
3220 enum machine_mode mode
;
3223 return immediate_operand (op
, mode
);
3224 return x86_64_sign_extended_value (op
);
3227 /* Return nonzero if OP is immediate operand representable on x86_64. */
3230 x86_64_zext_immediate_operand (op
, mode
)
3232 enum machine_mode mode ATTRIBUTE_UNUSED
;
3234 return x86_64_zero_extended_value (op
);
3237 /* Return nonzero if OP is (const_int 1), else return zero. */
3240 const_int_1_operand (op
, mode
)
3242 enum machine_mode mode ATTRIBUTE_UNUSED
;
3244 return (GET_CODE (op
) == CONST_INT
&& INTVAL (op
) == 1);
3247 /* Return nonzero if OP is CONST_INT >= 1 and <= 31 (a valid operand
3248 for shift & compare patterns, as shifting by 0 does not change flags),
3249 else return zero. */
3252 const_int_1_31_operand (op
, mode
)
3254 enum machine_mode mode ATTRIBUTE_UNUSED
;
3256 return (GET_CODE (op
) == CONST_INT
&& INTVAL (op
) >= 1 && INTVAL (op
) <= 31);
3259 /* Returns 1 if OP is either a symbol reference or a sum of a symbol
3260 reference and a constant. */
3263 symbolic_operand (op
, mode
)
3265 enum machine_mode mode ATTRIBUTE_UNUSED
;
3267 switch (GET_CODE (op
))
3275 if (GET_CODE (op
) == SYMBOL_REF
3276 || GET_CODE (op
) == LABEL_REF
3277 || (GET_CODE (op
) == UNSPEC
3278 && (XINT (op
, 1) == UNSPEC_GOT
3279 || XINT (op
, 1) == UNSPEC_GOTOFF
3280 || XINT (op
, 1) == UNSPEC_GOTPCREL
)))
3282 if (GET_CODE (op
) != PLUS
3283 || GET_CODE (XEXP (op
, 1)) != CONST_INT
)
3287 if (GET_CODE (op
) == SYMBOL_REF
3288 || GET_CODE (op
) == LABEL_REF
)
3290 /* Only @GOTOFF gets offsets. */
3291 if (GET_CODE (op
) != UNSPEC
3292 || XINT (op
, 1) != UNSPEC_GOTOFF
)
3295 op
= XVECEXP (op
, 0, 0);
3296 if (GET_CODE (op
) == SYMBOL_REF
3297 || GET_CODE (op
) == LABEL_REF
)
3306 /* Return true if the operand contains a @GOT or @GOTOFF reference. */
3309 pic_symbolic_operand (op
, mode
)
3311 enum machine_mode mode ATTRIBUTE_UNUSED
;
3313 if (GET_CODE (op
) != CONST
)
3318 if (GET_CODE (XEXP (op
, 0)) == UNSPEC
)
3323 if (GET_CODE (op
) == UNSPEC
)
3325 if (GET_CODE (op
) != PLUS
3326 || GET_CODE (XEXP (op
, 1)) != CONST_INT
)
3329 if (GET_CODE (op
) == UNSPEC
)
3335 /* Return true if OP is a symbolic operand that resolves locally. */
3338 local_symbolic_operand (op
, mode
)
3340 enum machine_mode mode ATTRIBUTE_UNUSED
;
3342 if (GET_CODE (op
) == CONST
3343 && GET_CODE (XEXP (op
, 0)) == PLUS
3344 && GET_CODE (XEXP (XEXP (op
, 0), 1)) == CONST_INT
)
3345 op
= XEXP (XEXP (op
, 0), 0);
3347 if (GET_CODE (op
) == LABEL_REF
)
3350 if (GET_CODE (op
) != SYMBOL_REF
)
3353 /* These we've been told are local by varasm and encode_section_info
3355 if (CONSTANT_POOL_ADDRESS_P (op
) || SYMBOL_REF_FLAG (op
))
3358 /* There is, however, a not insubstantial body of code in the rest of
3359 the compiler that assumes it can just stick the results of
3360 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */
3361 /* ??? This is a hack. Should update the body of the compiler to
3362 always create a DECL an invoke targetm.encode_section_info. */
3363 if (strncmp (XSTR (op
, 0), internal_label_prefix
,
3364 internal_label_prefix_len
) == 0)
3370 /* Test for various thread-local symbols. See ix86_encode_section_info. */
3373 tls_symbolic_operand (op
, mode
)
3375 enum machine_mode mode ATTRIBUTE_UNUSED
;
3377 const char *symbol_str
;
3379 if (GET_CODE (op
) != SYMBOL_REF
)
3381 symbol_str
= XSTR (op
, 0);
3383 if (symbol_str
[0] != '%')
3385 return strchr (tls_model_chars
, symbol_str
[1]) - tls_model_chars
;
3389 tls_symbolic_operand_1 (op
, kind
)
3391 enum tls_model kind
;
3393 const char *symbol_str
;
3395 if (GET_CODE (op
) != SYMBOL_REF
)
3397 symbol_str
= XSTR (op
, 0);
3399 return symbol_str
[0] == '%' && symbol_str
[1] == tls_model_chars
[kind
];
3403 global_dynamic_symbolic_operand (op
, mode
)
3405 enum machine_mode mode ATTRIBUTE_UNUSED
;
3407 return tls_symbolic_operand_1 (op
, TLS_MODEL_GLOBAL_DYNAMIC
);
3411 local_dynamic_symbolic_operand (op
, mode
)
3413 enum machine_mode mode ATTRIBUTE_UNUSED
;
3415 return tls_symbolic_operand_1 (op
, TLS_MODEL_LOCAL_DYNAMIC
);
3419 initial_exec_symbolic_operand (op
, mode
)
3421 enum machine_mode mode ATTRIBUTE_UNUSED
;
3423 return tls_symbolic_operand_1 (op
, TLS_MODEL_INITIAL_EXEC
);
3427 local_exec_symbolic_operand (op
, mode
)
3429 enum machine_mode mode ATTRIBUTE_UNUSED
;
3431 return tls_symbolic_operand_1 (op
, TLS_MODEL_LOCAL_EXEC
);
3434 /* Test for a valid operand for a call instruction. Don't allow the
3435 arg pointer register or virtual regs since they may decay into
3436 reg + const, which the patterns can't handle. */
3439 call_insn_operand (op
, mode
)
3441 enum machine_mode mode ATTRIBUTE_UNUSED
;
3443 /* Disallow indirect through a virtual register. This leads to
3444 compiler aborts when trying to eliminate them. */
3445 if (GET_CODE (op
) == REG
3446 && (op
== arg_pointer_rtx
3447 || op
== frame_pointer_rtx
3448 || (REGNO (op
) >= FIRST_PSEUDO_REGISTER
3449 && REGNO (op
) <= LAST_VIRTUAL_REGISTER
)))
3452 /* Disallow `call 1234'. Due to varying assembler lameness this
3453 gets either rejected or translated to `call .+1234'. */
3454 if (GET_CODE (op
) == CONST_INT
)
3457 /* Explicitly allow SYMBOL_REF even if pic. */
3458 if (GET_CODE (op
) == SYMBOL_REF
)
3461 /* Otherwise we can allow any general_operand in the address. */
3462 return general_operand (op
, Pmode
);
3466 constant_call_address_operand (op
, mode
)
3468 enum machine_mode mode ATTRIBUTE_UNUSED
;
3470 if (GET_CODE (op
) == CONST
3471 && GET_CODE (XEXP (op
, 0)) == PLUS
3472 && GET_CODE (XEXP (XEXP (op
, 0), 1)) == CONST_INT
)
3473 op
= XEXP (XEXP (op
, 0), 0);
3474 return GET_CODE (op
) == SYMBOL_REF
;
3477 /* Match exactly zero and one. */
3480 const0_operand (op
, mode
)
3482 enum machine_mode mode
;
3484 return op
== CONST0_RTX (mode
);
3488 const1_operand (op
, mode
)
3490 enum machine_mode mode ATTRIBUTE_UNUSED
;
3492 return op
== const1_rtx
;
3495 /* Match 2, 4, or 8. Used for leal multiplicands. */
3498 const248_operand (op
, mode
)
3500 enum machine_mode mode ATTRIBUTE_UNUSED
;
3502 return (GET_CODE (op
) == CONST_INT
3503 && (INTVAL (op
) == 2 || INTVAL (op
) == 4 || INTVAL (op
) == 8));
3506 /* True if this is a constant appropriate for an increment or decremenmt. */
3509 incdec_operand (op
, mode
)
3511 enum machine_mode mode ATTRIBUTE_UNUSED
;
3513 /* On Pentium4, the inc and dec operations causes extra dependency on flag
3514 registers, since carry flag is not set. */
3515 if (TARGET_PENTIUM4
&& !optimize_size
)
3517 return op
== const1_rtx
|| op
== constm1_rtx
;
3520 /* Return nonzero if OP is acceptable as operand of DImode shift
3524 shiftdi_operand (op
, mode
)
3526 enum machine_mode mode ATTRIBUTE_UNUSED
;
3529 return nonimmediate_operand (op
, mode
);
3531 return register_operand (op
, mode
);
3534 /* Return false if this is the stack pointer, or any other fake
3535 register eliminable to the stack pointer. Otherwise, this is
3538 This is used to prevent esp from being used as an index reg.
3539 Which would only happen in pathological cases. */
3542 reg_no_sp_operand (op
, mode
)
3544 enum machine_mode mode
;
3547 if (GET_CODE (t
) == SUBREG
)
3549 if (t
== stack_pointer_rtx
|| t
== arg_pointer_rtx
|| t
== frame_pointer_rtx
)
3552 return register_operand (op
, mode
);
3556 mmx_reg_operand (op
, mode
)
3558 enum machine_mode mode ATTRIBUTE_UNUSED
;
3560 return MMX_REG_P (op
);
3563 /* Return false if this is any eliminable register. Otherwise
3567 general_no_elim_operand (op
, mode
)
3569 enum machine_mode mode
;
3572 if (GET_CODE (t
) == SUBREG
)
3574 if (t
== arg_pointer_rtx
|| t
== frame_pointer_rtx
3575 || t
== virtual_incoming_args_rtx
|| t
== virtual_stack_vars_rtx
3576 || t
== virtual_stack_dynamic_rtx
)
3579 && REGNO (t
) >= FIRST_VIRTUAL_REGISTER
3580 && REGNO (t
) <= LAST_VIRTUAL_REGISTER
)
3583 return general_operand (op
, mode
);
3586 /* Return false if this is any eliminable register. Otherwise
3587 register_operand or const_int. */
3590 nonmemory_no_elim_operand (op
, mode
)
3592 enum machine_mode mode
;
3595 if (GET_CODE (t
) == SUBREG
)
3597 if (t
== arg_pointer_rtx
|| t
== frame_pointer_rtx
3598 || t
== virtual_incoming_args_rtx
|| t
== virtual_stack_vars_rtx
3599 || t
== virtual_stack_dynamic_rtx
)
3602 return GET_CODE (op
) == CONST_INT
|| register_operand (op
, mode
);
3605 /* Return false if this is any eliminable register or stack register,
3606 otherwise work like register_operand. */
3609 index_register_operand (op
, mode
)
3611 enum machine_mode mode
;
3614 if (GET_CODE (t
) == SUBREG
)
3618 if (t
== arg_pointer_rtx
3619 || t
== frame_pointer_rtx
3620 || t
== virtual_incoming_args_rtx
3621 || t
== virtual_stack_vars_rtx
3622 || t
== virtual_stack_dynamic_rtx
3623 || REGNO (t
) == STACK_POINTER_REGNUM
)
3626 return general_operand (op
, mode
);
3629 /* Return true if op is a Q_REGS class register. */
3632 q_regs_operand (op
, mode
)
3634 enum machine_mode mode
;
3636 if (mode
!= VOIDmode
&& GET_MODE (op
) != mode
)
3638 if (GET_CODE (op
) == SUBREG
)
3639 op
= SUBREG_REG (op
);
3640 return ANY_QI_REG_P (op
);
3643 /* Return true if op is a NON_Q_REGS class register. */
3646 non_q_regs_operand (op
, mode
)
3648 enum machine_mode mode
;
3650 if (mode
!= VOIDmode
&& GET_MODE (op
) != mode
)
3652 if (GET_CODE (op
) == SUBREG
)
3653 op
= SUBREG_REG (op
);
3654 return NON_QI_REG_P (op
);
3657 /* Return 1 when OP is operand acceptable for standard SSE move. */
3659 vector_move_operand (op
, mode
)
3661 enum machine_mode mode
;
3663 if (nonimmediate_operand (op
, mode
))
3665 if (GET_MODE (op
) != mode
&& mode
!= VOIDmode
)
3667 return (op
== CONST0_RTX (GET_MODE (op
)));
3670 /* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
3673 sse_comparison_operator (op
, mode
)
3675 enum machine_mode mode ATTRIBUTE_UNUSED
;
3677 enum rtx_code code
= GET_CODE (op
);
3680 /* Operations supported directly. */
3690 /* These are equivalent to ones above in non-IEEE comparisons. */
3697 return !TARGET_IEEE_FP
;
3702 /* Return 1 if OP is a valid comparison operator in valid mode. */
3704 ix86_comparison_operator (op
, mode
)
3706 enum machine_mode mode
;
3708 enum machine_mode inmode
;
3709 enum rtx_code code
= GET_CODE (op
);
3710 if (mode
!= VOIDmode
&& GET_MODE (op
) != mode
)
3712 if (GET_RTX_CLASS (code
) != '<')
3714 inmode
= GET_MODE (XEXP (op
, 0));
3716 if (inmode
== CCFPmode
|| inmode
== CCFPUmode
)
3718 enum rtx_code second_code
, bypass_code
;
3719 ix86_fp_comparison_codes (code
, &bypass_code
, &code
, &second_code
);
3720 return (bypass_code
== NIL
&& second_code
== NIL
);
3727 if (inmode
== CCmode
|| inmode
== CCGCmode
3728 || inmode
== CCGOCmode
|| inmode
== CCNOmode
)
3731 case LTU
: case GTU
: case LEU
: case ORDERED
: case UNORDERED
: case GEU
:
3732 if (inmode
== CCmode
)
3736 if (inmode
== CCmode
|| inmode
== CCGCmode
|| inmode
== CCNOmode
)
3744 /* Return 1 if OP is a comparison operator that can be issued by fcmov. */
3747 fcmov_comparison_operator (op
, mode
)
3749 enum machine_mode mode
;
3751 enum machine_mode inmode
;
3752 enum rtx_code code
= GET_CODE (op
);
3753 if (mode
!= VOIDmode
&& GET_MODE (op
) != mode
)
3755 if (GET_RTX_CLASS (code
) != '<')
3757 inmode
= GET_MODE (XEXP (op
, 0));
3758 if (inmode
== CCFPmode
|| inmode
== CCFPUmode
)
3760 enum rtx_code second_code
, bypass_code
;
3761 ix86_fp_comparison_codes (code
, &bypass_code
, &code
, &second_code
);
3762 if (bypass_code
!= NIL
|| second_code
!= NIL
)
3764 code
= ix86_fp_compare_code_to_integer (code
);
3766 /* i387 supports just limited amount of conditional codes. */
3769 case LTU
: case GTU
: case LEU
: case GEU
:
3770 if (inmode
== CCmode
|| inmode
== CCFPmode
|| inmode
== CCFPUmode
)
3773 case ORDERED
: case UNORDERED
:
3781 /* Return 1 if OP is a binary operator that can be promoted to wider mode. */
3784 promotable_binary_operator (op
, mode
)
3786 enum machine_mode mode ATTRIBUTE_UNUSED
;
3788 switch (GET_CODE (op
))
3791 /* Modern CPUs have same latency for HImode and SImode multiply,
3792 but 386 and 486 do HImode multiply faster. */
3793 return ix86_cpu
> PROCESSOR_I486
;
3805 /* Nearly general operand, but accept any const_double, since we wish
3806 to be able to drop them into memory rather than have them get pulled
3810 cmp_fp_expander_operand (op
, mode
)
3812 enum machine_mode mode
;
3814 if (mode
!= VOIDmode
&& mode
!= GET_MODE (op
))
3816 if (GET_CODE (op
) == CONST_DOUBLE
)
3818 return general_operand (op
, mode
);
3821 /* Match an SI or HImode register for a zero_extract. */
3824 ext_register_operand (op
, mode
)
3826 enum machine_mode mode ATTRIBUTE_UNUSED
;
3829 if ((!TARGET_64BIT
|| GET_MODE (op
) != DImode
)
3830 && GET_MODE (op
) != SImode
&& GET_MODE (op
) != HImode
)
3833 if (!register_operand (op
, VOIDmode
))
3836 /* Be curefull to accept only registers having upper parts. */
3837 regno
= REG_P (op
) ? REGNO (op
) : REGNO (SUBREG_REG (op
));
3838 return (regno
> LAST_VIRTUAL_REGISTER
|| regno
< 4);
3841 /* Return 1 if this is a valid binary floating-point operation.
3842 OP is the expression matched, and MODE is its mode. */
3845 binary_fp_operator (op
, mode
)
3847 enum machine_mode mode
;
3849 if (mode
!= VOIDmode
&& mode
!= GET_MODE (op
))
3852 switch (GET_CODE (op
))
3858 return GET_MODE_CLASS (GET_MODE (op
)) == MODE_FLOAT
;
3866 mult_operator (op
, mode
)
3868 enum machine_mode mode ATTRIBUTE_UNUSED
;
3870 return GET_CODE (op
) == MULT
;
3874 div_operator (op
, mode
)
3876 enum machine_mode mode ATTRIBUTE_UNUSED
;
3878 return GET_CODE (op
) == DIV
;
3882 arith_or_logical_operator (op
, mode
)
3884 enum machine_mode mode
;
3886 return ((mode
== VOIDmode
|| GET_MODE (op
) == mode
)
3887 && (GET_RTX_CLASS (GET_CODE (op
)) == 'c'
3888 || GET_RTX_CLASS (GET_CODE (op
)) == '2'));
3891 /* Returns 1 if OP is memory operand with a displacement. */
3894 memory_displacement_operand (op
, mode
)
3896 enum machine_mode mode
;
3898 struct ix86_address parts
;
3900 if (! memory_operand (op
, mode
))
3903 if (! ix86_decompose_address (XEXP (op
, 0), &parts
))
3906 return parts
.disp
!= NULL_RTX
;
3909 /* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
3910 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
3912 ??? It seems likely that this will only work because cmpsi is an
3913 expander, and no actual insns use this. */
3916 cmpsi_operand (op
, mode
)
3918 enum machine_mode mode
;
3920 if (nonimmediate_operand (op
, mode
))
3923 if (GET_CODE (op
) == AND
3924 && GET_MODE (op
) == SImode
3925 && GET_CODE (XEXP (op
, 0)) == ZERO_EXTRACT
3926 && GET_CODE (XEXP (XEXP (op
, 0), 1)) == CONST_INT
3927 && GET_CODE (XEXP (XEXP (op
, 0), 2)) == CONST_INT
3928 && INTVAL (XEXP (XEXP (op
, 0), 1)) == 8
3929 && INTVAL (XEXP (XEXP (op
, 0), 2)) == 8
3930 && GET_CODE (XEXP (op
, 1)) == CONST_INT
)
3936 /* Returns 1 if OP is memory operand that can not be represented by the
3940 long_memory_operand (op
, mode
)
3942 enum machine_mode mode
;
3944 if (! memory_operand (op
, mode
))
3947 return memory_address_length (op
) != 0;
3950 /* Return nonzero if the rtx is known aligned. */
3953 aligned_operand (op
, mode
)
3955 enum machine_mode mode
;
3957 struct ix86_address parts
;
3959 if (!general_operand (op
, mode
))
3962 /* Registers and immediate operands are always "aligned". */
3963 if (GET_CODE (op
) != MEM
)
3966 /* Don't even try to do any aligned optimizations with volatiles. */
3967 if (MEM_VOLATILE_P (op
))
3972 /* Pushes and pops are only valid on the stack pointer. */
3973 if (GET_CODE (op
) == PRE_DEC
3974 || GET_CODE (op
) == POST_INC
)
3977 /* Decode the address. */
3978 if (! ix86_decompose_address (op
, &parts
))
3981 if (parts
.base
&& GET_CODE (parts
.base
) == SUBREG
)
3982 parts
.base
= SUBREG_REG (parts
.base
);
3983 if (parts
.index
&& GET_CODE (parts
.index
) == SUBREG
)
3984 parts
.index
= SUBREG_REG (parts
.index
);
3986 /* Look for some component that isn't known to be aligned. */
3990 && REGNO_POINTER_ALIGN (REGNO (parts
.index
)) < 32)
3995 if (REGNO_POINTER_ALIGN (REGNO (parts
.base
)) < 32)
4000 if (GET_CODE (parts
.disp
) != CONST_INT
4001 || (INTVAL (parts
.disp
) & 3) != 0)
4005 /* Didn't find one -- this must be an aligned address. */
4009 /* Return true if the constant is something that can be loaded with
4010 a special instruction. Only handle 0.0 and 1.0; others are less
4014 standard_80387_constant_p (x
)
4017 if (GET_CODE (x
) != CONST_DOUBLE
|| !FLOAT_MODE_P (GET_MODE (x
)))
4019 /* Note that on the 80387, other constants, such as pi, that we should support
4020 too. On some machines, these are much slower to load as standard constant,
4021 than to load from doubles in memory. */
4022 if (x
== CONST0_RTX (GET_MODE (x
)))
4024 if (x
== CONST1_RTX (GET_MODE (x
)))
4029 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
4032 standard_sse_constant_p (x
)
4035 if (x
== const0_rtx
)
4037 return (x
== CONST0_RTX (GET_MODE (x
)));
4040 /* Returns 1 if OP contains a symbol reference */
4043 symbolic_reference_mentioned_p (op
)
4046 register const char *fmt
;
4049 if (GET_CODE (op
) == SYMBOL_REF
|| GET_CODE (op
) == LABEL_REF
)
4052 fmt
= GET_RTX_FORMAT (GET_CODE (op
));
4053 for (i
= GET_RTX_LENGTH (GET_CODE (op
)) - 1; i
>= 0; i
--)
4059 for (j
= XVECLEN (op
, i
) - 1; j
>= 0; j
--)
4060 if (symbolic_reference_mentioned_p (XVECEXP (op
, i
, j
)))
4064 else if (fmt
[i
] == 'e' && symbolic_reference_mentioned_p (XEXP (op
, i
)))
4071 /* Return 1 if it is appropriate to emit `ret' instructions in the
4072 body of a function. Do this only if the epilogue is simple, needing a
4073 couple of insns. Prior to reloading, we can't tell how many registers
4074 must be saved, so return 0 then. Return 0 if there is no frame
4075 marker to de-allocate.
4077 If NON_SAVING_SETJMP is defined and true, then it is not possible
4078 for the epilogue to be simple, so return 0. This is a special case
4079 since NON_SAVING_SETJMP will not cause regs_ever_live to change
4080 until final, but jump_optimize may need to know sooner if a
4084 ix86_can_use_return_insn_p ()
4086 struct ix86_frame frame
;
4088 #ifdef NON_SAVING_SETJMP
4089 if (NON_SAVING_SETJMP
&& current_function_calls_setjmp
)
4093 if (! reload_completed
|| frame_pointer_needed
)
4096 /* Don't allow more than 32 pop, since that's all we can do
4097 with one instruction. */
4098 if (current_function_pops_args
4099 && current_function_args_size
>= 32768)
4102 ix86_compute_frame_layout (&frame
);
4103 return frame
.to_allocate
== 0 && frame
.nregs
== 0;
4106 /* Return 1 if VALUE can be stored in the sign extended immediate field. */
4108 x86_64_sign_extended_value (value
)
4111 switch (GET_CODE (value
))
4113 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
4114 to be at least 32 and this all acceptable constants are
4115 represented as CONST_INT. */
4117 if (HOST_BITS_PER_WIDE_INT
== 32)
4121 HOST_WIDE_INT val
= trunc_int_for_mode (INTVAL (value
), DImode
);
4122 return trunc_int_for_mode (val
, SImode
) == val
;
4126 /* For certain code models, the symbolic references are known to fit.
4127 in CM_SMALL_PIC model we know it fits if it is local to the shared
4128 library. Don't count TLS SYMBOL_REFs here, since they should fit
4129 only if inside of UNSPEC handled below. */
4131 return (ix86_cmodel
== CM_SMALL
|| ix86_cmodel
== CM_KERNEL
);
4133 /* For certain code models, the code is near as well. */
4135 return (ix86_cmodel
== CM_SMALL
|| ix86_cmodel
== CM_MEDIUM
4136 || ix86_cmodel
== CM_KERNEL
);
4138 /* We also may accept the offsetted memory references in certain special
4141 if (GET_CODE (XEXP (value
, 0)) == UNSPEC
)
4142 switch (XINT (XEXP (value
, 0), 1))
4144 case UNSPEC_GOTPCREL
:
4146 case UNSPEC_GOTNTPOFF
:
4152 if (GET_CODE (XEXP (value
, 0)) == PLUS
)
4154 rtx op1
= XEXP (XEXP (value
, 0), 0);
4155 rtx op2
= XEXP (XEXP (value
, 0), 1);
4156 HOST_WIDE_INT offset
;
4158 if (ix86_cmodel
== CM_LARGE
)
4160 if (GET_CODE (op2
) != CONST_INT
)
4162 offset
= trunc_int_for_mode (INTVAL (op2
), DImode
);
4163 switch (GET_CODE (op1
))
4166 /* For CM_SMALL assume that latest object is 16MB before
4167 end of 31bits boundary. We may also accept pretty
4168 large negative constants knowing that all objects are
4169 in the positive half of address space. */
4170 if (ix86_cmodel
== CM_SMALL
4171 && offset
< 16*1024*1024
4172 && trunc_int_for_mode (offset
, SImode
) == offset
)
4174 /* For CM_KERNEL we know that all object resist in the
4175 negative half of 32bits address space. We may not
4176 accept negative offsets, since they may be just off
4177 and we may accept pretty large positive ones. */
4178 if (ix86_cmodel
== CM_KERNEL
4180 && trunc_int_for_mode (offset
, SImode
) == offset
)
4184 /* These conditions are similar to SYMBOL_REF ones, just the
4185 constraints for code models differ. */
4186 if ((ix86_cmodel
== CM_SMALL
|| ix86_cmodel
== CM_MEDIUM
)
4187 && offset
< 16*1024*1024
4188 && trunc_int_for_mode (offset
, SImode
) == offset
)
4190 if (ix86_cmodel
== CM_KERNEL
4192 && trunc_int_for_mode (offset
, SImode
) == offset
)
4196 switch (XINT (op1
, 1))
4201 && trunc_int_for_mode (offset
, SImode
) == offset
)
4215 /* Return 1 if VALUE can be stored in the zero extended immediate field. */
4217 x86_64_zero_extended_value (value
)
4220 switch (GET_CODE (value
))
4223 if (HOST_BITS_PER_WIDE_INT
== 32)
4224 return (GET_MODE (value
) == VOIDmode
4225 && !CONST_DOUBLE_HIGH (value
));
4229 if (HOST_BITS_PER_WIDE_INT
== 32)
4230 return INTVAL (value
) >= 0;
4232 return !(INTVAL (value
) & ~(HOST_WIDE_INT
) 0xffffffff);
4235 /* For certain code models, the symbolic references are known to fit. */
4237 return ix86_cmodel
== CM_SMALL
;
4239 /* For certain code models, the code is near as well. */
4241 return ix86_cmodel
== CM_SMALL
|| ix86_cmodel
== CM_MEDIUM
;
4243 /* We also may accept the offsetted memory references in certain special
4246 if (GET_CODE (XEXP (value
, 0)) == PLUS
)
4248 rtx op1
= XEXP (XEXP (value
, 0), 0);
4249 rtx op2
= XEXP (XEXP (value
, 0), 1);
4251 if (ix86_cmodel
== CM_LARGE
)
4253 switch (GET_CODE (op1
))
4257 /* For small code model we may accept pretty large positive
4258 offsets, since one bit is available for free. Negative
4259 offsets are limited by the size of NULL pointer area
4260 specified by the ABI. */
4261 if (ix86_cmodel
== CM_SMALL
4262 && GET_CODE (op2
) == CONST_INT
4263 && trunc_int_for_mode (INTVAL (op2
), DImode
) > -0x10000
4264 && (trunc_int_for_mode (INTVAL (op2
), SImode
)
4267 /* ??? For the kernel, we may accept adjustment of
4268 -0x10000000, since we know that it will just convert
4269 negative address space to positive, but perhaps this
4270 is not worthwhile. */
4273 /* These conditions are similar to SYMBOL_REF ones, just the
4274 constraints for code models differ. */
4275 if ((ix86_cmodel
== CM_SMALL
|| ix86_cmodel
== CM_MEDIUM
)
4276 && GET_CODE (op2
) == CONST_INT
4277 && trunc_int_for_mode (INTVAL (op2
), DImode
) > -0x10000
4278 && (trunc_int_for_mode (INTVAL (op2
), SImode
)
4292 /* Value should be nonzero if functions must have frame pointers.
4293 Zero means the frame pointer need not be set up (and parms may
4294 be accessed via the stack pointer) in functions that seem suitable. */
4297 ix86_frame_pointer_required ()
4299 /* If we accessed previous frames, then the generated code expects
4300 to be able to access the saved ebp value in our frame. */
4301 if (cfun
->machine
->accesses_prev_frame
)
4304 /* Several x86 os'es need a frame pointer for other reasons,
4305 usually pertaining to setjmp. */
4306 if (SUBTARGET_FRAME_POINTER_REQUIRED
)
4309 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
4310 the frame pointer by default. Turn it back on now if we've not
4311 got a leaf function. */
4312 if (TARGET_OMIT_LEAF_FRAME_POINTER
4313 && (!current_function_is_leaf
))
4316 if (current_function_profile
)
4322 /* Record that the current function accesses previous call frames. */
4325 ix86_setup_frame_addresses ()
4327 cfun
->machine
->accesses_prev_frame
= 1;
4330 #if defined(HAVE_GAS_HIDDEN) && (defined(SUPPORTS_ONE_ONLY) && SUPPORTS_ONE_ONLY)
4331 # define USE_HIDDEN_LINKONCE 1
4333 # define USE_HIDDEN_LINKONCE 0
4336 static int pic_labels_used
;
4338 /* Fills in the label name that should be used for a pc thunk for
4339 the given register. */
4342 get_pc_thunk_name (name
, regno
)
4346 if (USE_HIDDEN_LINKONCE
)
4347 sprintf (name
, "__i686.get_pc_thunk.%s", reg_names
[regno
]);
4349 ASM_GENERATE_INTERNAL_LABEL (name
, "LPR", regno
);
4353 /* This function generates code for -fpic that loads %ebx with
4354 the return address of the caller and then returns. */
4357 ix86_asm_file_end (file
)
4363 for (regno
= 0; regno
< 8; ++regno
)
4367 if (! ((pic_labels_used
>> regno
) & 1))
4370 get_pc_thunk_name (name
, regno
);
4372 if (USE_HIDDEN_LINKONCE
)
4376 decl
= build_decl (FUNCTION_DECL
, get_identifier (name
),
4378 TREE_PUBLIC (decl
) = 1;
4379 TREE_STATIC (decl
) = 1;
4380 DECL_ONE_ONLY (decl
) = 1;
4382 (*targetm
.asm_out
.unique_section
) (decl
, 0);
4383 named_section (decl
, NULL
, 0);
4385 (*targetm
.asm_out
.globalize_label
) (file
, name
);
4386 fputs ("\t.hidden\t", file
);
4387 assemble_name (file
, name
);
4389 ASM_DECLARE_FUNCTION_NAME (file
, name
, decl
);
4394 ASM_OUTPUT_LABEL (file
, name
);
4397 xops
[0] = gen_rtx_REG (SImode
, regno
);
4398 xops
[1] = gen_rtx_MEM (SImode
, stack_pointer_rtx
);
4399 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops
);
4400 output_asm_insn ("ret", xops
);
4404 /* Emit code for the SET_GOT patterns. */
4407 output_set_got (dest
)
4413 xops
[1] = gen_rtx_SYMBOL_REF (Pmode
, GOT_SYMBOL_NAME
);
4415 if (! TARGET_DEEP_BRANCH_PREDICTION
|| !flag_pic
)
4417 xops
[2] = gen_rtx_LABEL_REF (Pmode
, gen_label_rtx ());
4420 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops
);
4422 output_asm_insn ("call\t%a2", xops
);
4425 /* Output the "canonical" label name ("Lxx$pb") here too. This
4426 is what will be referred to by the Mach-O PIC subsystem. */
4427 ASM_OUTPUT_LABEL (asm_out_file
, machopic_function_base_name ());
4429 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, "L",
4430 CODE_LABEL_NUMBER (XEXP (xops
[2], 0)));
4433 output_asm_insn ("pop{l}\t%0", xops
);
4438 get_pc_thunk_name (name
, REGNO (dest
));
4439 pic_labels_used
|= 1 << REGNO (dest
);
4441 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (name
));
4442 xops
[2] = gen_rtx_MEM (QImode
, xops
[2]);
4443 output_asm_insn ("call\t%X2", xops
);
4446 if (!flag_pic
|| TARGET_DEEP_BRANCH_PREDICTION
)
4447 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops
);
4448 else if (!TARGET_MACHO
)
4449 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops
);
4454 /* Generate an "push" pattern for input ARG. */
4460 return gen_rtx_SET (VOIDmode
,
4462 gen_rtx_PRE_DEC (Pmode
,
4463 stack_pointer_rtx
)),
4467 /* Return >= 0 if there is an unused call-clobbered register available
4468 for the entire function. */
4471 ix86_select_alt_pic_regnum ()
4473 if (current_function_is_leaf
&& !current_function_profile
)
4476 for (i
= 2; i
>= 0; --i
)
4477 if (!regs_ever_live
[i
])
4481 return INVALID_REGNUM
;
4484 /* Return 1 if we need to save REGNO. */
4486 ix86_save_reg (regno
, maybe_eh_return
)
4488 int maybe_eh_return
;
4490 if (pic_offset_table_rtx
4491 && regno
== REAL_PIC_OFFSET_TABLE_REGNUM
4492 && (regs_ever_live
[REAL_PIC_OFFSET_TABLE_REGNUM
]
4493 || current_function_profile
4494 || current_function_calls_eh_return
4495 || current_function_uses_const_pool
))
4497 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM
)
4502 if (current_function_calls_eh_return
&& maybe_eh_return
)
4507 unsigned test
= EH_RETURN_DATA_REGNO (i
);
4508 if (test
== INVALID_REGNUM
)
4515 return (regs_ever_live
[regno
]
4516 && !call_used_regs
[regno
]
4517 && !fixed_regs
[regno
]
4518 && (regno
!= HARD_FRAME_POINTER_REGNUM
|| !frame_pointer_needed
));
4521 /* Return number of registers to be saved on the stack. */
4529 for (regno
= FIRST_PSEUDO_REGISTER
- 1; regno
>= 0; regno
--)
4530 if (ix86_save_reg (regno
, true))
4535 /* Return the offset between two registers, one to be eliminated, and the other
4536 its replacement, at the start of a routine. */
4539 ix86_initial_elimination_offset (from
, to
)
4543 struct ix86_frame frame
;
4544 ix86_compute_frame_layout (&frame
);
4546 if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
4547 return frame
.hard_frame_pointer_offset
;
4548 else if (from
== FRAME_POINTER_REGNUM
4549 && to
== HARD_FRAME_POINTER_REGNUM
)
4550 return frame
.hard_frame_pointer_offset
- frame
.frame_pointer_offset
;
4553 if (to
!= STACK_POINTER_REGNUM
)
4555 else if (from
== ARG_POINTER_REGNUM
)
4556 return frame
.stack_pointer_offset
;
4557 else if (from
!= FRAME_POINTER_REGNUM
)
4560 return frame
.stack_pointer_offset
- frame
.frame_pointer_offset
;
4564 /* Fill structure ix86_frame about frame of currently computed function. */
4567 ix86_compute_frame_layout (frame
)
4568 struct ix86_frame
*frame
;
4570 HOST_WIDE_INT total_size
;
4571 int stack_alignment_needed
= cfun
->stack_alignment_needed
/ BITS_PER_UNIT
;
4573 int preferred_alignment
= cfun
->preferred_stack_boundary
/ BITS_PER_UNIT
;
4574 HOST_WIDE_INT size
= get_frame_size ();
4576 frame
->nregs
= ix86_nsaved_regs ();
4579 /* Skip return address and saved base pointer. */
4580 offset
= frame_pointer_needed
? UNITS_PER_WORD
* 2 : UNITS_PER_WORD
;
4582 frame
->hard_frame_pointer_offset
= offset
;
4584 /* Do some sanity checking of stack_alignment_needed and
4585 preferred_alignment, since i386 port is the only using those features
4586 that may break easily. */
4588 if (size
&& !stack_alignment_needed
)
4590 if (preferred_alignment
< STACK_BOUNDARY
/ BITS_PER_UNIT
)
4592 if (preferred_alignment
> PREFERRED_STACK_BOUNDARY
/ BITS_PER_UNIT
)
4594 if (stack_alignment_needed
> PREFERRED_STACK_BOUNDARY
/ BITS_PER_UNIT
)
4597 if (stack_alignment_needed
< STACK_BOUNDARY
/ BITS_PER_UNIT
)
4598 stack_alignment_needed
= STACK_BOUNDARY
/ BITS_PER_UNIT
;
4600 /* Register save area */
4601 offset
+= frame
->nregs
* UNITS_PER_WORD
;
4604 if (ix86_save_varrargs_registers
)
4606 offset
+= X86_64_VARARGS_SIZE
;
4607 frame
->va_arg_size
= X86_64_VARARGS_SIZE
;
4610 frame
->va_arg_size
= 0;
4612 /* Align start of frame for local function. */
4613 frame
->padding1
= ((offset
+ stack_alignment_needed
- 1)
4614 & -stack_alignment_needed
) - offset
;
4616 offset
+= frame
->padding1
;
4618 /* Frame pointer points here. */
4619 frame
->frame_pointer_offset
= offset
;
4623 /* Add outgoing arguments area. Can be skipped if we eliminated
4624 all the function calls as dead code. */
4625 if (ACCUMULATE_OUTGOING_ARGS
&& !current_function_is_leaf
)
4627 offset
+= current_function_outgoing_args_size
;
4628 frame
->outgoing_arguments_size
= current_function_outgoing_args_size
;
4631 frame
->outgoing_arguments_size
= 0;
4633 /* Align stack boundary. Only needed if we're calling another function
4635 if (!current_function_is_leaf
|| current_function_calls_alloca
)
4636 frame
->padding2
= ((offset
+ preferred_alignment
- 1)
4637 & -preferred_alignment
) - offset
;
4639 frame
->padding2
= 0;
4641 offset
+= frame
->padding2
;
4643 /* We've reached end of stack frame. */
4644 frame
->stack_pointer_offset
= offset
;
4646 /* Size prologue needs to allocate. */
4647 frame
->to_allocate
=
4648 (size
+ frame
->padding1
+ frame
->padding2
4649 + frame
->outgoing_arguments_size
+ frame
->va_arg_size
);
4651 if (TARGET_64BIT
&& TARGET_RED_ZONE
&& current_function_sp_is_unchanging
4652 && current_function_is_leaf
)
4654 frame
->red_zone_size
= frame
->to_allocate
;
4655 if (frame
->red_zone_size
> RED_ZONE_SIZE
- RED_ZONE_RESERVE
)
4656 frame
->red_zone_size
= RED_ZONE_SIZE
- RED_ZONE_RESERVE
;
4659 frame
->red_zone_size
= 0;
4660 frame
->to_allocate
-= frame
->red_zone_size
;
4661 frame
->stack_pointer_offset
-= frame
->red_zone_size
;
4663 fprintf (stderr
, "nregs: %i\n", frame
->nregs
);
4664 fprintf (stderr
, "size: %i\n", size
);
4665 fprintf (stderr
, "alignment1: %i\n", stack_alignment_needed
);
4666 fprintf (stderr
, "padding1: %i\n", frame
->padding1
);
4667 fprintf (stderr
, "va_arg: %i\n", frame
->va_arg_size
);
4668 fprintf (stderr
, "padding2: %i\n", frame
->padding2
);
4669 fprintf (stderr
, "to_allocate: %i\n", frame
->to_allocate
);
4670 fprintf (stderr
, "red_zone_size: %i\n", frame
->red_zone_size
);
4671 fprintf (stderr
, "frame_pointer_offset: %i\n", frame
->frame_pointer_offset
);
4672 fprintf (stderr
, "hard_frame_pointer_offset: %i\n",
4673 frame
->hard_frame_pointer_offset
);
4674 fprintf (stderr
, "stack_pointer_offset: %i\n", frame
->stack_pointer_offset
);
4678 /* Emit code to save registers in the prologue. */
4681 ix86_emit_save_regs ()
4686 for (regno
= FIRST_PSEUDO_REGISTER
- 1; regno
>= 0; regno
--)
4687 if (ix86_save_reg (regno
, true))
4689 insn
= emit_insn (gen_push (gen_rtx_REG (Pmode
, regno
)));
4690 RTX_FRAME_RELATED_P (insn
) = 1;
4694 /* Emit code to save registers using MOV insns. First register
4695 is restored from POINTER + OFFSET. */
4697 ix86_emit_save_regs_using_mov (pointer
, offset
)
4699 HOST_WIDE_INT offset
;
4704 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
4705 if (ix86_save_reg (regno
, true))
4707 insn
= emit_move_insn (adjust_address (gen_rtx_MEM (Pmode
, pointer
),
4709 gen_rtx_REG (Pmode
, regno
));
4710 RTX_FRAME_RELATED_P (insn
) = 1;
4711 offset
+= UNITS_PER_WORD
;
4715 /* Expand the prologue into a bunch of separate insns. */
4718 ix86_expand_prologue ()
4722 struct ix86_frame frame
;
4724 HOST_WIDE_INT allocate
;
4728 use_fast_prologue_epilogue
4729 = !expensive_function_p (FAST_PROLOGUE_INSN_COUNT
);
4730 if (TARGET_PROLOGUE_USING_MOVE
)
4731 use_mov
= use_fast_prologue_epilogue
;
4733 ix86_compute_frame_layout (&frame
);
4735 /* Note: AT&T enter does NOT have reversed args. Enter is probably
4736 slower on all targets. Also sdb doesn't like it. */
4738 if (frame_pointer_needed
)
4740 insn
= emit_insn (gen_push (hard_frame_pointer_rtx
));
4741 RTX_FRAME_RELATED_P (insn
) = 1;
4743 insn
= emit_move_insn (hard_frame_pointer_rtx
, stack_pointer_rtx
);
4744 RTX_FRAME_RELATED_P (insn
) = 1;
4747 allocate
= frame
.to_allocate
;
4748 /* In case we are dealing only with single register and empty frame,
4749 push is equivalent of the mov+add sequence. */
4750 if (allocate
== 0 && frame
.nregs
<= 1)
4754 ix86_emit_save_regs ();
4756 allocate
+= frame
.nregs
* UNITS_PER_WORD
;
4760 else if (! TARGET_STACK_PROBE
|| allocate
< CHECK_STACK_LIMIT
)
4762 insn
= emit_insn (gen_pro_epilogue_adjust_stack
4763 (stack_pointer_rtx
, stack_pointer_rtx
,
4764 GEN_INT (-allocate
)));
4765 RTX_FRAME_RELATED_P (insn
) = 1;
4769 /* Only valid for Win32. */
4770 rtx eax
= gen_rtx_REG (SImode
, 0);
4771 bool eax_live
= ix86_eax_live_at_start_p ();
4778 emit_insn (gen_push (eax
));
4782 insn
= emit_move_insn (eax
, GEN_INT (allocate
));
4783 RTX_FRAME_RELATED_P (insn
) = 1;
4785 insn
= emit_insn (gen_allocate_stack_worker (eax
));
4786 RTX_FRAME_RELATED_P (insn
) = 1;
4790 rtx t
= plus_constant (stack_pointer_rtx
, allocate
);
4791 emit_move_insn (eax
, gen_rtx_MEM (SImode
, t
));
4797 if (!frame_pointer_needed
|| !frame
.to_allocate
)
4798 ix86_emit_save_regs_using_mov (stack_pointer_rtx
, frame
.to_allocate
);
4800 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx
,
4801 -frame
.nregs
* UNITS_PER_WORD
);
4804 #ifdef SUBTARGET_PROLOGUE
4808 pic_reg_used
= false;
4809 if (pic_offset_table_rtx
4810 && (regs_ever_live
[REAL_PIC_OFFSET_TABLE_REGNUM
]
4811 || current_function_profile
))
4813 unsigned int alt_pic_reg_used
= ix86_select_alt_pic_regnum ();
4815 if (alt_pic_reg_used
!= INVALID_REGNUM
)
4816 REGNO (pic_offset_table_rtx
) = alt_pic_reg_used
;
4818 pic_reg_used
= true;
4823 insn
= emit_insn (gen_set_got (pic_offset_table_rtx
));
4825 /* Even with accurate pre-reload life analysis, we can wind up
4826 deleting all references to the pic register after reload.
4827 Consider if cross-jumping unifies two sides of a branch
4828 controled by a comparison vs the only read from a global.
4829 In which case, allow the set_got to be deleted, though we're
4830 too late to do anything about the ebx save in the prologue. */
4831 REG_NOTES (insn
) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD
, const0_rtx
, NULL
);
4834 /* Prevent function calls from be scheduled before the call to mcount.
4835 In the pic_reg_used case, make sure that the got load isn't deleted. */
4836 if (current_function_profile
)
4837 emit_insn (gen_blockage (pic_reg_used
? pic_offset_table_rtx
: const0_rtx
));
4840 /* Emit code to restore saved registers using MOV insns. First register
4841 is restored from POINTER + OFFSET. */
4843 ix86_emit_restore_regs_using_mov (pointer
, offset
, maybe_eh_return
)
4846 int maybe_eh_return
;
4850 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
4851 if (ix86_save_reg (regno
, maybe_eh_return
))
4853 emit_move_insn (gen_rtx_REG (Pmode
, regno
),
4854 adjust_address (gen_rtx_MEM (Pmode
, pointer
),
4856 offset
+= UNITS_PER_WORD
;
4860 /* Restore function stack, frame, and registers. */
4863 ix86_expand_epilogue (style
)
4867 int sp_valid
= !frame_pointer_needed
|| current_function_sp_is_unchanging
;
4868 struct ix86_frame frame
;
4869 HOST_WIDE_INT offset
;
4871 ix86_compute_frame_layout (&frame
);
4873 /* Calculate start of saved registers relative to ebp. Special care
4874 must be taken for the normal return case of a function using
4875 eh_return: the eax and edx registers are marked as saved, but not
4876 restored along this path. */
4877 offset
= frame
.nregs
;
4878 if (current_function_calls_eh_return
&& style
!= 2)
4880 offset
*= -UNITS_PER_WORD
;
4882 /* If we're only restoring one register and sp is not valid then
4883 using a move instruction to restore the register since it's
4884 less work than reloading sp and popping the register.
4886 The default code result in stack adjustment using add/lea instruction,
4887 while this code results in LEAVE instruction (or discrete equivalent),
4888 so it is profitable in some other cases as well. Especially when there
4889 are no registers to restore. We also use this code when TARGET_USE_LEAVE
4890 and there is exactly one register to pop. This heruistic may need some
4891 tuning in future. */
4892 if ((!sp_valid
&& frame
.nregs
<= 1)
4893 || (TARGET_EPILOGUE_USING_MOVE
4894 && use_fast_prologue_epilogue
4895 && (frame
.nregs
> 1 || frame
.to_allocate
))
4896 || (frame_pointer_needed
&& !frame
.nregs
&& frame
.to_allocate
)
4897 || (frame_pointer_needed
&& TARGET_USE_LEAVE
4898 && use_fast_prologue_epilogue
&& frame
.nregs
== 1)
4899 || current_function_calls_eh_return
)
4901 /* Restore registers. We can use ebp or esp to address the memory
4902 locations. If both are available, default to ebp, since offsets
4903 are known to be small. Only exception is esp pointing directly to the
4904 end of block of saved registers, where we may simplify addressing
4907 if (!frame_pointer_needed
|| (sp_valid
&& !frame
.to_allocate
))
4908 ix86_emit_restore_regs_using_mov (stack_pointer_rtx
,
4909 frame
.to_allocate
, style
== 2);
4911 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx
,
4912 offset
, style
== 2);
4914 /* eh_return epilogues need %ecx added to the stack pointer. */
4917 rtx tmp
, sa
= EH_RETURN_STACKADJ_RTX
;
4919 if (frame_pointer_needed
)
4921 tmp
= gen_rtx_PLUS (Pmode
, hard_frame_pointer_rtx
, sa
);
4922 tmp
= plus_constant (tmp
, UNITS_PER_WORD
);
4923 emit_insn (gen_rtx_SET (VOIDmode
, sa
, tmp
));
4925 tmp
= gen_rtx_MEM (Pmode
, hard_frame_pointer_rtx
);
4926 emit_move_insn (hard_frame_pointer_rtx
, tmp
);
4928 emit_insn (gen_pro_epilogue_adjust_stack
4929 (stack_pointer_rtx
, sa
, const0_rtx
));
4933 tmp
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, sa
);
4934 tmp
= plus_constant (tmp
, (frame
.to_allocate
4935 + frame
.nregs
* UNITS_PER_WORD
));
4936 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, tmp
));
4939 else if (!frame_pointer_needed
)
4940 emit_insn (gen_pro_epilogue_adjust_stack
4941 (stack_pointer_rtx
, stack_pointer_rtx
,
4942 GEN_INT (frame
.to_allocate
4943 + frame
.nregs
* UNITS_PER_WORD
)));
4944 /* If not an i386, mov & pop is faster than "leave". */
4945 else if (TARGET_USE_LEAVE
|| optimize_size
|| !use_fast_prologue_epilogue
)
4946 emit_insn (TARGET_64BIT
? gen_leave_rex64 () : gen_leave ());
4949 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx
,
4950 hard_frame_pointer_rtx
,
4953 emit_insn (gen_popdi1 (hard_frame_pointer_rtx
));
4955 emit_insn (gen_popsi1 (hard_frame_pointer_rtx
));
4960 /* First step is to deallocate the stack frame so that we can
4961 pop the registers. */
4964 if (!frame_pointer_needed
)
4966 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx
,
4967 hard_frame_pointer_rtx
,
4970 else if (frame
.to_allocate
)
4971 emit_insn (gen_pro_epilogue_adjust_stack
4972 (stack_pointer_rtx
, stack_pointer_rtx
,
4973 GEN_INT (frame
.to_allocate
)));
4975 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
4976 if (ix86_save_reg (regno
, false))
4979 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode
, regno
)));
4981 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode
, regno
)));
4983 if (frame_pointer_needed
)
4985 /* Leave results in shorter dependency chains on CPUs that are
4986 able to grok it fast. */
4987 if (TARGET_USE_LEAVE
)
4988 emit_insn (TARGET_64BIT
? gen_leave_rex64 () : gen_leave ());
4989 else if (TARGET_64BIT
)
4990 emit_insn (gen_popdi1 (hard_frame_pointer_rtx
));
4992 emit_insn (gen_popsi1 (hard_frame_pointer_rtx
));
4996 /* Sibcall epilogues don't want a return instruction. */
5000 if (current_function_pops_args
&& current_function_args_size
)
5002 rtx popc
= GEN_INT (current_function_pops_args
);
5004 /* i386 can only pop 64K bytes. If asked to pop more, pop
5005 return address, do explicit add, and jump indirectly to the
5008 if (current_function_pops_args
>= 65536)
5010 rtx ecx
= gen_rtx_REG (SImode
, 2);
5012 /* There are is no "pascal" calling convention in 64bit ABI. */
5016 emit_insn (gen_popsi1 (ecx
));
5017 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, popc
));
5018 emit_jump_insn (gen_return_indirect_internal (ecx
));
5021 emit_jump_insn (gen_return_pop_internal (popc
));
5024 emit_jump_insn (gen_return_internal ());
5027 /* Reset from the function's potential modifications. */
5030 ix86_output_function_epilogue (file
, size
)
5031 FILE *file ATTRIBUTE_UNUSED
;
5032 HOST_WIDE_INT size ATTRIBUTE_UNUSED
;
5034 if (pic_offset_table_rtx
)
5035 REGNO (pic_offset_table_rtx
) = REAL_PIC_OFFSET_TABLE_REGNUM
;
5038 /* Extract the parts of an RTL expression that is a valid memory address
5039 for an instruction. Return 0 if the structure of the address is
5040 grossly off. Return -1 if the address contains ASHIFT, so it is not
5041 strictly valid, but still used for computing length of lea instruction.
5045 ix86_decompose_address (addr
, out
)
5047 struct ix86_address
*out
;
5049 rtx base
= NULL_RTX
;
5050 rtx index
= NULL_RTX
;
5051 rtx disp
= NULL_RTX
;
5052 HOST_WIDE_INT scale
= 1;
5053 rtx scale_rtx
= NULL_RTX
;
5056 if (REG_P (addr
) || GET_CODE (addr
) == SUBREG
)
5058 else if (GET_CODE (addr
) == PLUS
)
5060 rtx op0
= XEXP (addr
, 0);
5061 rtx op1
= XEXP (addr
, 1);
5062 enum rtx_code code0
= GET_CODE (op0
);
5063 enum rtx_code code1
= GET_CODE (op1
);
5065 if (code0
== REG
|| code0
== SUBREG
)
5067 if (code1
== REG
|| code1
== SUBREG
)
5068 index
= op0
, base
= op1
; /* index + base */
5070 base
= op0
, disp
= op1
; /* base + displacement */
5072 else if (code0
== MULT
)
5074 index
= XEXP (op0
, 0);
5075 scale_rtx
= XEXP (op0
, 1);
5076 if (code1
== REG
|| code1
== SUBREG
)
5077 base
= op1
; /* index*scale + base */
5079 disp
= op1
; /* index*scale + disp */
5081 else if (code0
== PLUS
&& GET_CODE (XEXP (op0
, 0)) == MULT
)
5083 index
= XEXP (XEXP (op0
, 0), 0); /* index*scale + base + disp */
5084 scale_rtx
= XEXP (XEXP (op0
, 0), 1);
5085 base
= XEXP (op0
, 1);
5088 else if (code0
== PLUS
)
5090 index
= XEXP (op0
, 0); /* index + base + disp */
5091 base
= XEXP (op0
, 1);
5097 else if (GET_CODE (addr
) == MULT
)
5099 index
= XEXP (addr
, 0); /* index*scale */
5100 scale_rtx
= XEXP (addr
, 1);
5102 else if (GET_CODE (addr
) == ASHIFT
)
5106 /* We're called for lea too, which implements ashift on occasion. */
5107 index
= XEXP (addr
, 0);
5108 tmp
= XEXP (addr
, 1);
5109 if (GET_CODE (tmp
) != CONST_INT
)
5111 scale
= INTVAL (tmp
);
5112 if ((unsigned HOST_WIDE_INT
) scale
> 3)
5118 disp
= addr
; /* displacement */
5120 /* Extract the integral value of scale. */
5123 if (GET_CODE (scale_rtx
) != CONST_INT
)
5125 scale
= INTVAL (scale_rtx
);
5128 /* Allow arg pointer and stack pointer as index if there is not scaling */
5129 if (base
&& index
&& scale
== 1
5130 && (index
== arg_pointer_rtx
|| index
== frame_pointer_rtx
5131 || index
== stack_pointer_rtx
))
5138 /* Special case: %ebp cannot be encoded as a base without a displacement. */
5139 if ((base
== hard_frame_pointer_rtx
5140 || base
== frame_pointer_rtx
5141 || base
== arg_pointer_rtx
) && !disp
)
5144 /* Special case: on K6, [%esi] makes the instruction vector decoded.
5145 Avoid this by transforming to [%esi+0]. */
5146 if (ix86_cpu
== PROCESSOR_K6
&& !optimize_size
5147 && base
&& !index
&& !disp
5149 && REGNO_REG_CLASS (REGNO (base
)) == SIREG
)
5152 /* Special case: encode reg+reg instead of reg*2. */
5153 if (!base
&& index
&& scale
&& scale
== 2)
5154 base
= index
, scale
= 1;
5156 /* Special case: scaling cannot be encoded without base or displacement. */
5157 if (!base
&& !disp
&& index
&& scale
!= 1)
5168 /* Return cost of the memory address x.
5169 For i386, it is better to use a complex address than let gcc copy
5170 the address into a reg and make a new pseudo. But not if the address
5171 requires to two regs - that would mean more pseudos with longer
5174 ix86_address_cost (x
)
5177 struct ix86_address parts
;
5180 if (!ix86_decompose_address (x
, &parts
))
5183 if (parts
.base
&& GET_CODE (parts
.base
) == SUBREG
)
5184 parts
.base
= SUBREG_REG (parts
.base
);
5185 if (parts
.index
&& GET_CODE (parts
.index
) == SUBREG
)
5186 parts
.index
= SUBREG_REG (parts
.index
);
5188 /* More complex memory references are better. */
5189 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
5192 /* Attempt to minimize number of registers in the address. */
5194 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
))
5196 && (!REG_P (parts
.index
)
5197 || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)))
5201 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
)
5203 && (!REG_P (parts
.index
) || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)
5204 && parts
.base
!= parts
.index
)
5207 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
5208 since it's predecode logic can't detect the length of instructions
5209 and it degenerates to vector decoded. Increase cost of such
5210 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
5211 to split such addresses or even refuse such addresses at all.
5213 Following addressing modes are affected:
5218 The first and last case may be avoidable by explicitly coding the zero in
5219 memory address, but I don't have AMD-K6 machine handy to check this
5223 && ((!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
!= 1)
5224 || (parts
.disp
&& !parts
.base
&& parts
.index
&& parts
.scale
!= 1)
5225 || (!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
== 1)))
5231 /* If X is a machine specific address (i.e. a symbol or label being
5232 referenced as a displacement from the GOT implemented using an
5233 UNSPEC), then return the base term. Otherwise return X. */
5236 ix86_find_base_term (x
)
5243 if (GET_CODE (x
) != CONST
)
5246 if (GET_CODE (term
) == PLUS
5247 && (GET_CODE (XEXP (term
, 1)) == CONST_INT
5248 || GET_CODE (XEXP (term
, 1)) == CONST_DOUBLE
))
5249 term
= XEXP (term
, 0);
5250 if (GET_CODE (term
) != UNSPEC
5251 || XINT (term
, 1) != UNSPEC_GOTPCREL
)
5254 term
= XVECEXP (term
, 0, 0);
5256 if (GET_CODE (term
) != SYMBOL_REF
5257 && GET_CODE (term
) != LABEL_REF
)
5263 if (GET_CODE (x
) != PLUS
5264 || XEXP (x
, 0) != pic_offset_table_rtx
5265 || GET_CODE (XEXP (x
, 1)) != CONST
)
5268 term
= XEXP (XEXP (x
, 1), 0);
5270 if (GET_CODE (term
) == PLUS
&& GET_CODE (XEXP (term
, 1)) == CONST_INT
)
5271 term
= XEXP (term
, 0);
5273 if (GET_CODE (term
) != UNSPEC
5274 || XINT (term
, 1) != UNSPEC_GOTOFF
)
5277 term
= XVECEXP (term
, 0, 0);
5279 if (GET_CODE (term
) != SYMBOL_REF
5280 && GET_CODE (term
) != LABEL_REF
)
5286 /* Determine if a given RTX is a valid constant. We already know this
5287 satisfies CONSTANT_P. */
5290 legitimate_constant_p (x
)
5295 switch (GET_CODE (x
))
5298 /* TLS symbols are not constant. */
5299 if (tls_symbolic_operand (x
, Pmode
))
5304 inner
= XEXP (x
, 0);
5306 /* Offsets of TLS symbols are never valid.
5307 Discourage CSE from creating them. */
5308 if (GET_CODE (inner
) == PLUS
5309 && tls_symbolic_operand (XEXP (inner
, 0), Pmode
))
5312 /* Only some unspecs are valid as "constants". */
5313 if (GET_CODE (inner
) == UNSPEC
)
5314 switch (XINT (inner
, 1))
5317 return local_exec_symbolic_operand (XVECEXP (inner
, 0, 0), Pmode
);
5327 /* Otherwise we handle everything else in the move patterns. */
5331 /* Determine if it's legal to put X into the constant pool. This
5332 is not possible for the address of thread-local symbols, which
5333 is checked above. */
5336 ix86_cannot_force_const_mem (x
)
5339 return !legitimate_constant_p (x
);
5342 /* Determine if a given RTX is a valid constant address. */
5345 constant_address_p (x
)
5348 switch (GET_CODE (x
))
5355 return TARGET_64BIT
;
5358 /* For Mach-O, really believe the CONST. */
5361 /* Otherwise fall through. */
5363 return !flag_pic
&& legitimate_constant_p (x
);
5370 /* Nonzero if the constant value X is a legitimate general operand
5371 when generating PIC code. It is given that flag_pic is on and
5372 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
5375 legitimate_pic_operand_p (x
)
5380 switch (GET_CODE (x
))
5383 inner
= XEXP (x
, 0);
5385 /* Only some unspecs are valid as "constants". */
5386 if (GET_CODE (inner
) == UNSPEC
)
5387 switch (XINT (inner
, 1))
5390 return local_exec_symbolic_operand (XVECEXP (inner
, 0, 0), Pmode
);
5398 return legitimate_pic_address_disp_p (x
);
5405 /* Determine if a given CONST RTX is a valid memory displacement
5409 legitimate_pic_address_disp_p (disp
)
5414 /* In 64bit mode we can allow direct addresses of symbols and labels
5415 when they are not dynamic symbols. */
5418 /* TLS references should always be enclosed in UNSPEC. */
5419 if (tls_symbolic_operand (disp
, GET_MODE (disp
)))
5421 if (GET_CODE (disp
) == SYMBOL_REF
5422 && ix86_cmodel
== CM_SMALL_PIC
5423 && (CONSTANT_POOL_ADDRESS_P (disp
)
5424 || SYMBOL_REF_FLAG (disp
)))
5426 if (GET_CODE (disp
) == LABEL_REF
)
5428 if (GET_CODE (disp
) == CONST
5429 && GET_CODE (XEXP (disp
, 0)) == PLUS
5430 && ((GET_CODE (XEXP (XEXP (disp
, 0), 0)) == SYMBOL_REF
5431 && ix86_cmodel
== CM_SMALL_PIC
5432 && (CONSTANT_POOL_ADDRESS_P (XEXP (XEXP (disp
, 0), 0))
5433 || SYMBOL_REF_FLAG (XEXP (XEXP (disp
, 0), 0))))
5434 || GET_CODE (XEXP (XEXP (disp
, 0), 0)) == LABEL_REF
)
5435 && GET_CODE (XEXP (XEXP (disp
, 0), 1)) == CONST_INT
5436 && INTVAL (XEXP (XEXP (disp
, 0), 1)) < 16*1024*1024
5437 && INTVAL (XEXP (XEXP (disp
, 0), 1)) >= -16*1024*1024)
5440 if (GET_CODE (disp
) != CONST
)
5442 disp
= XEXP (disp
, 0);
5446 /* We are unsafe to allow PLUS expressions. This limit allowed distance
5447 of GOT tables. We should not need these anyway. */
5448 if (GET_CODE (disp
) != UNSPEC
5449 || XINT (disp
, 1) != UNSPEC_GOTPCREL
)
5452 if (GET_CODE (XVECEXP (disp
, 0, 0)) != SYMBOL_REF
5453 && GET_CODE (XVECEXP (disp
, 0, 0)) != LABEL_REF
)
5459 if (GET_CODE (disp
) == PLUS
)
5461 if (GET_CODE (XEXP (disp
, 1)) != CONST_INT
)
5463 disp
= XEXP (disp
, 0);
5467 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O. */
5468 if (TARGET_MACHO
&& GET_CODE (disp
) == MINUS
)
5470 if (GET_CODE (XEXP (disp
, 0)) == LABEL_REF
5471 || GET_CODE (XEXP (disp
, 0)) == SYMBOL_REF
)
5472 if (GET_CODE (XEXP (disp
, 1)) == SYMBOL_REF
)
5474 const char *sym_name
= XSTR (XEXP (disp
, 1), 0);
5475 if (strstr (sym_name
, "$pb") != 0)
5480 if (GET_CODE (disp
) != UNSPEC
)
5483 switch (XINT (disp
, 1))
5488 return GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
;
5490 return local_symbolic_operand (XVECEXP (disp
, 0, 0), Pmode
);
5491 case UNSPEC_GOTTPOFF
:
5492 case UNSPEC_GOTNTPOFF
:
5493 case UNSPEC_INDNTPOFF
:
5496 return initial_exec_symbolic_operand (XVECEXP (disp
, 0, 0), Pmode
);
5498 return local_exec_symbolic_operand (XVECEXP (disp
, 0, 0), Pmode
);
5500 return local_dynamic_symbolic_operand (XVECEXP (disp
, 0, 0), Pmode
);
5506 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
5507 memory address for an instruction. The MODE argument is the machine mode
5508 for the MEM expression that wants to use this address.
5510 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
5511 convert common non-canonical forms to canonical form so that they will
5515 legitimate_address_p (mode
, addr
, strict
)
5516 enum machine_mode mode
;
5520 struct ix86_address parts
;
5521 rtx base
, index
, disp
;
5522 HOST_WIDE_INT scale
;
5523 const char *reason
= NULL
;
5524 rtx reason_rtx
= NULL_RTX
;
5526 if (TARGET_DEBUG_ADDR
)
5529 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
5530 GET_MODE_NAME (mode
), strict
);
5534 if (GET_CODE (addr
) == UNSPEC
&& XINT (addr
, 1) == UNSPEC_TP
)
5536 if (TARGET_DEBUG_ADDR
)
5537 fprintf (stderr
, "Success.\n");
5541 if (ix86_decompose_address (addr
, &parts
) <= 0)
5543 reason
= "decomposition failed";
5548 index
= parts
.index
;
5550 scale
= parts
.scale
;
5552 /* Validate base register.
5554 Don't allow SUBREG's here, it can lead to spill failures when the base
5555 is one word out of a two word structure, which is represented internally
5563 if (GET_CODE (base
) == SUBREG
)
5564 reg
= SUBREG_REG (base
);
5568 if (GET_CODE (reg
) != REG
)
5570 reason
= "base is not a register";
5574 if (GET_MODE (base
) != Pmode
)
5576 reason
= "base is not in Pmode";
5580 if ((strict
&& ! REG_OK_FOR_BASE_STRICT_P (reg
))
5581 || (! strict
&& ! REG_OK_FOR_BASE_NONSTRICT_P (reg
)))
5583 reason
= "base is not valid";
5588 /* Validate index register.
5590 Don't allow SUBREG's here, it can lead to spill failures when the index
5591 is one word out of a two word structure, which is represented internally
5599 if (GET_CODE (index
) == SUBREG
)
5600 reg
= SUBREG_REG (index
);
5604 if (GET_CODE (reg
) != REG
)
5606 reason
= "index is not a register";
5610 if (GET_MODE (index
) != Pmode
)
5612 reason
= "index is not in Pmode";
5616 if ((strict
&& ! REG_OK_FOR_INDEX_STRICT_P (reg
))
5617 || (! strict
&& ! REG_OK_FOR_INDEX_NONSTRICT_P (reg
)))
5619 reason
= "index is not valid";
5624 /* Validate scale factor. */
5627 reason_rtx
= GEN_INT (scale
);
5630 reason
= "scale without index";
5634 if (scale
!= 2 && scale
!= 4 && scale
!= 8)
5636 reason
= "scale is not a valid multiplier";
5641 /* Validate displacement. */
5646 if (GET_CODE (disp
) == CONST
5647 && GET_CODE (XEXP (disp
, 0)) == UNSPEC
)
5648 switch (XINT (XEXP (disp
, 0), 1))
5652 case UNSPEC_GOTPCREL
:
5655 goto is_legitimate_pic
;
5657 case UNSPEC_GOTTPOFF
:
5658 case UNSPEC_GOTNTPOFF
:
5659 case UNSPEC_INDNTPOFF
:
5665 reason
= "invalid address unspec";
5669 else if (flag_pic
&& (SYMBOLIC_CONST (disp
)
5671 && !machopic_operand_p (disp
)
5676 if (TARGET_64BIT
&& (index
|| base
))
5678 /* foo@dtpoff(%rX) is ok. */
5679 if (GET_CODE (disp
) != CONST
5680 || GET_CODE (XEXP (disp
, 0)) != PLUS
5681 || GET_CODE (XEXP (XEXP (disp
, 0), 0)) != UNSPEC
5682 || GET_CODE (XEXP (XEXP (disp
, 0), 1)) != CONST_INT
5683 || (XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_DTPOFF
5684 && XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_NTPOFF
))
5686 reason
= "non-constant pic memory reference";
5690 else if (! legitimate_pic_address_disp_p (disp
))
5692 reason
= "displacement is an invalid pic construct";
5696 /* This code used to verify that a symbolic pic displacement
5697 includes the pic_offset_table_rtx register.
5699 While this is good idea, unfortunately these constructs may
5700 be created by "adds using lea" optimization for incorrect
5709 This code is nonsensical, but results in addressing
5710 GOT table with pic_offset_table_rtx base. We can't
5711 just refuse it easily, since it gets matched by
5712 "addsi3" pattern, that later gets split to lea in the
5713 case output register differs from input. While this
5714 can be handled by separate addsi pattern for this case
5715 that never results in lea, this seems to be easier and
5716 correct fix for crash to disable this test. */
5718 else if (!CONSTANT_ADDRESS_P (disp
))
5720 reason
= "displacement is not constant";
5723 else if (TARGET_64BIT
&& !x86_64_sign_extended_value (disp
))
5725 reason
= "displacement is out of range";
5728 else if (!TARGET_64BIT
&& GET_CODE (disp
) == CONST_DOUBLE
)
5730 reason
= "displacement is a const_double";
5735 /* Everything looks valid. */
5736 if (TARGET_DEBUG_ADDR
)
5737 fprintf (stderr
, "Success.\n");
5741 if (TARGET_DEBUG_ADDR
)
5743 fprintf (stderr
, "Error: %s\n", reason
);
5744 debug_rtx (reason_rtx
);
5749 /* Return an unique alias set for the GOT. */
5751 static HOST_WIDE_INT
5752 ix86_GOT_alias_set ()
5754 static HOST_WIDE_INT set
= -1;
5756 set
= new_alias_set ();
5760 /* Return a legitimate reference for ORIG (an address) using the
5761 register REG. If REG is 0, a new pseudo is generated.
5763 There are two types of references that must be handled:
5765 1. Global data references must load the address from the GOT, via
5766 the PIC reg. An insn is emitted to do this load, and the reg is
5769 2. Static data references, constant pool addresses, and code labels
5770 compute the address as an offset from the GOT, whose base is in
5771 the PIC reg. Static data objects have SYMBOL_REF_FLAG set to
5772 differentiate them from global data objects. The returned
5773 address is the PIC reg + an unspec constant.
5775 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
5776 reg also appears in the address. */
5779 legitimize_pic_address (orig
, reg
)
5789 reg
= gen_reg_rtx (Pmode
);
5790 /* Use the generic Mach-O PIC machinery. */
5791 return machopic_legitimize_pic_address (orig
, GET_MODE (orig
), reg
);
5794 if (TARGET_64BIT
&& legitimate_pic_address_disp_p (addr
))
5796 else if (!TARGET_64BIT
&& local_symbolic_operand (addr
, Pmode
))
5798 /* This symbol may be referenced via a displacement from the PIC
5799 base address (@GOTOFF). */
5801 if (reload_in_progress
)
5802 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
5803 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
5804 new = gen_rtx_CONST (Pmode
, new);
5805 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
5809 emit_move_insn (reg
, new);
5813 else if (GET_CODE (addr
) == SYMBOL_REF
)
5817 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTPCREL
);
5818 new = gen_rtx_CONST (Pmode
, new);
5819 new = gen_rtx_MEM (Pmode
, new);
5820 RTX_UNCHANGING_P (new) = 1;
5821 set_mem_alias_set (new, ix86_GOT_alias_set ());
5824 reg
= gen_reg_rtx (Pmode
);
5825 /* Use directly gen_movsi, otherwise the address is loaded
5826 into register for CSE. We don't want to CSE this addresses,
5827 instead we CSE addresses from the GOT table, so skip this. */
5828 emit_insn (gen_movsi (reg
, new));
5833 /* This symbol must be referenced via a load from the
5834 Global Offset Table (@GOT). */
5836 if (reload_in_progress
)
5837 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
5838 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOT
);
5839 new = gen_rtx_CONST (Pmode
, new);
5840 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
5841 new = gen_rtx_MEM (Pmode
, new);
5842 RTX_UNCHANGING_P (new) = 1;
5843 set_mem_alias_set (new, ix86_GOT_alias_set ());
5846 reg
= gen_reg_rtx (Pmode
);
5847 emit_move_insn (reg
, new);
5853 if (GET_CODE (addr
) == CONST
)
5855 addr
= XEXP (addr
, 0);
5857 /* We must match stuff we generate before. Assume the only
5858 unspecs that can get here are ours. Not that we could do
5859 anything with them anyway... */
5860 if (GET_CODE (addr
) == UNSPEC
5861 || (GET_CODE (addr
) == PLUS
5862 && GET_CODE (XEXP (addr
, 0)) == UNSPEC
))
5864 if (GET_CODE (addr
) != PLUS
)
5867 if (GET_CODE (addr
) == PLUS
)
5869 rtx op0
= XEXP (addr
, 0), op1
= XEXP (addr
, 1);
5871 /* Check first to see if this is a constant offset from a @GOTOFF
5872 symbol reference. */
5873 if (local_symbolic_operand (op0
, Pmode
)
5874 && GET_CODE (op1
) == CONST_INT
)
5878 if (reload_in_progress
)
5879 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
5880 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, op0
),
5882 new = gen_rtx_PLUS (Pmode
, new, op1
);
5883 new = gen_rtx_CONST (Pmode
, new);
5884 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
5888 emit_move_insn (reg
, new);
5894 if (INTVAL (op1
) < -16*1024*1024
5895 || INTVAL (op1
) >= 16*1024*1024)
5896 new = gen_rtx_PLUS (Pmode
, op0
, force_reg (Pmode
, op1
));
5901 base
= legitimize_pic_address (XEXP (addr
, 0), reg
);
5902 new = legitimize_pic_address (XEXP (addr
, 1),
5903 base
== reg
? NULL_RTX
: reg
);
5905 if (GET_CODE (new) == CONST_INT
)
5906 new = plus_constant (base
, INTVAL (new));
5909 if (GET_CODE (new) == PLUS
&& CONSTANT_P (XEXP (new, 1)))
5911 base
= gen_rtx_PLUS (Pmode
, base
, XEXP (new, 0));
5912 new = XEXP (new, 1);
5914 new = gen_rtx_PLUS (Pmode
, base
, new);
5923 ix86_encode_section_info (decl
, first
)
5925 int first ATTRIBUTE_UNUSED
;
5927 bool local_p
= (*targetm
.binds_local_p
) (decl
);
5930 rtl
= DECL_P (decl
) ? DECL_RTL (decl
) : TREE_CST_RTL (decl
);
5931 if (GET_CODE (rtl
) != MEM
)
5933 symbol
= XEXP (rtl
, 0);
5934 if (GET_CODE (symbol
) != SYMBOL_REF
)
5937 /* For basic x86, if using PIC, mark a SYMBOL_REF for a non-global
5938 symbol so that we may access it directly in the GOT. */
5941 SYMBOL_REF_FLAG (symbol
) = local_p
;
5943 /* For ELF, encode thread-local data with %[GLil] for "global dynamic",
5944 "local dynamic", "initial exec" or "local exec" TLS models
5947 if (TREE_CODE (decl
) == VAR_DECL
&& DECL_THREAD_LOCAL (decl
))
5949 const char *symbol_str
;
5952 enum tls_model kind
= decl_tls_model (decl
);
5954 if (TARGET_64BIT
&& ! flag_pic
)
5956 /* x86-64 doesn't allow non-pic code for shared libraries,
5957 so don't generate GD/LD TLS models for non-pic code. */
5960 case TLS_MODEL_GLOBAL_DYNAMIC
:
5961 kind
= TLS_MODEL_INITIAL_EXEC
; break;
5962 case TLS_MODEL_LOCAL_DYNAMIC
:
5963 kind
= TLS_MODEL_LOCAL_EXEC
; break;
5969 symbol_str
= XSTR (symbol
, 0);
5971 if (symbol_str
[0] == '%')
5973 if (symbol_str
[1] == tls_model_chars
[kind
])
5977 len
= strlen (symbol_str
) + 1;
5978 newstr
= alloca (len
+ 2);
5981 newstr
[1] = tls_model_chars
[kind
];
5982 memcpy (newstr
+ 2, symbol_str
, len
);
5984 XSTR (symbol
, 0) = ggc_alloc_string (newstr
, len
+ 2 - 1);
5988 /* Undo the above when printing symbol names. */
5991 ix86_strip_name_encoding (str
)
6001 /* Load the thread pointer into a register. */
6004 get_thread_pointer ()
6008 tp
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
), UNSPEC_TP
);
6009 tp
= gen_rtx_MEM (Pmode
, tp
);
6010 RTX_UNCHANGING_P (tp
) = 1;
6011 set_mem_alias_set (tp
, ix86_GOT_alias_set ());
6012 tp
= force_reg (Pmode
, tp
);
6017 /* Try machine-dependent ways of modifying an illegitimate address
6018 to be legitimate. If we find one, return the new, valid address.
6019 This macro is used in only one place: `memory_address' in explow.c.
6021 OLDX is the address as it was before break_out_memory_refs was called.
6022 In some cases it is useful to look at this to decide what needs to be done.
6024 MODE and WIN are passed so that this macro can use
6025 GO_IF_LEGITIMATE_ADDRESS.
6027 It is always safe for this macro to do nothing. It exists to recognize
6028 opportunities to optimize the output.
6030 For the 80386, we handle X+REG by loading X into a register R and
6031 using R+REG. R will go in a general reg and indexing will be used.
6032 However, if REG is a broken-out memory address or multiplication,
6033 nothing needs to be done because REG can certainly go in a general reg.
6035 When -fpic is used, special handling is needed for symbolic references.
6036 See comments by legitimize_pic_address in i386.c for details. */
6039 legitimize_address (x
, oldx
, mode
)
6041 register rtx oldx ATTRIBUTE_UNUSED
;
6042 enum machine_mode mode
;
6047 if (TARGET_DEBUG_ADDR
)
6049 fprintf (stderr
, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
6050 GET_MODE_NAME (mode
));
6054 log
= tls_symbolic_operand (x
, mode
);
6057 rtx dest
, base
, off
, pic
;
6062 case TLS_MODEL_GLOBAL_DYNAMIC
:
6063 dest
= gen_reg_rtx (Pmode
);
6066 rtx rax
= gen_rtx_REG (Pmode
, 0), insns
;
6069 emit_call_insn (gen_tls_global_dynamic_64 (rax
, x
));
6070 insns
= get_insns ();
6073 emit_libcall_block (insns
, dest
, rax
, x
);
6076 emit_insn (gen_tls_global_dynamic_32 (dest
, x
));
6079 case TLS_MODEL_LOCAL_DYNAMIC
:
6080 base
= gen_reg_rtx (Pmode
);
6083 rtx rax
= gen_rtx_REG (Pmode
, 0), insns
, note
;
6086 emit_call_insn (gen_tls_local_dynamic_base_64 (rax
));
6087 insns
= get_insns ();
6090 note
= gen_rtx_EXPR_LIST (VOIDmode
, const0_rtx
, NULL
);
6091 note
= gen_rtx_EXPR_LIST (VOIDmode
, ix86_tls_get_addr (), note
);
6092 emit_libcall_block (insns
, base
, rax
, note
);
6095 emit_insn (gen_tls_local_dynamic_base_32 (base
));
6097 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
), UNSPEC_DTPOFF
);
6098 off
= gen_rtx_CONST (Pmode
, off
);
6100 return gen_rtx_PLUS (Pmode
, base
, off
);
6102 case TLS_MODEL_INITIAL_EXEC
:
6106 type
= UNSPEC_GOTNTPOFF
;
6110 if (reload_in_progress
)
6111 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
6112 pic
= pic_offset_table_rtx
;
6113 type
= TARGET_GNU_TLS
? UNSPEC_GOTNTPOFF
: UNSPEC_GOTTPOFF
;
6115 else if (!TARGET_GNU_TLS
)
6117 pic
= gen_reg_rtx (Pmode
);
6118 emit_insn (gen_set_got (pic
));
6119 type
= UNSPEC_GOTTPOFF
;
6124 type
= UNSPEC_INDNTPOFF
;
6127 base
= get_thread_pointer ();
6129 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
), type
);
6130 off
= gen_rtx_CONST (Pmode
, off
);
6132 off
= gen_rtx_PLUS (Pmode
, pic
, off
);
6133 off
= gen_rtx_MEM (Pmode
, off
);
6134 RTX_UNCHANGING_P (off
) = 1;
6135 set_mem_alias_set (off
, ix86_GOT_alias_set ());
6136 dest
= gen_reg_rtx (Pmode
);
6138 if (TARGET_64BIT
|| TARGET_GNU_TLS
)
6140 emit_move_insn (dest
, off
);
6141 return gen_rtx_PLUS (Pmode
, base
, dest
);
6144 emit_insn (gen_subsi3 (dest
, base
, off
));
6147 case TLS_MODEL_LOCAL_EXEC
:
6148 base
= get_thread_pointer ();
6150 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
),
6151 (TARGET_64BIT
|| TARGET_GNU_TLS
)
6152 ? UNSPEC_NTPOFF
: UNSPEC_TPOFF
);
6153 off
= gen_rtx_CONST (Pmode
, off
);
6155 if (TARGET_64BIT
|| TARGET_GNU_TLS
)
6156 return gen_rtx_PLUS (Pmode
, base
, off
);
6159 dest
= gen_reg_rtx (Pmode
);
6160 emit_insn (gen_subsi3 (dest
, base
, off
));
6171 if (flag_pic
&& SYMBOLIC_CONST (x
))
6172 return legitimize_pic_address (x
, 0);
6174 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
6175 if (GET_CODE (x
) == ASHIFT
6176 && GET_CODE (XEXP (x
, 1)) == CONST_INT
6177 && (log
= (unsigned) exact_log2 (INTVAL (XEXP (x
, 1)))) < 4)
6180 x
= gen_rtx_MULT (Pmode
, force_reg (Pmode
, XEXP (x
, 0)),
6181 GEN_INT (1 << log
));
6184 if (GET_CODE (x
) == PLUS
)
6186 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
6188 if (GET_CODE (XEXP (x
, 0)) == ASHIFT
6189 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == CONST_INT
6190 && (log
= (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x
, 0), 1)))) < 4)
6193 XEXP (x
, 0) = gen_rtx_MULT (Pmode
,
6194 force_reg (Pmode
, XEXP (XEXP (x
, 0), 0)),
6195 GEN_INT (1 << log
));
6198 if (GET_CODE (XEXP (x
, 1)) == ASHIFT
6199 && GET_CODE (XEXP (XEXP (x
, 1), 1)) == CONST_INT
6200 && (log
= (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x
, 1), 1)))) < 4)
6203 XEXP (x
, 1) = gen_rtx_MULT (Pmode
,
6204 force_reg (Pmode
, XEXP (XEXP (x
, 1), 0)),
6205 GEN_INT (1 << log
));
6208 /* Put multiply first if it isn't already. */
6209 if (GET_CODE (XEXP (x
, 1)) == MULT
)
6211 rtx tmp
= XEXP (x
, 0);
6212 XEXP (x
, 0) = XEXP (x
, 1);
6217 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
6218 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
6219 created by virtual register instantiation, register elimination, and
6220 similar optimizations. */
6221 if (GET_CODE (XEXP (x
, 0)) == MULT
&& GET_CODE (XEXP (x
, 1)) == PLUS
)
6224 x
= gen_rtx_PLUS (Pmode
,
6225 gen_rtx_PLUS (Pmode
, XEXP (x
, 0),
6226 XEXP (XEXP (x
, 1), 0)),
6227 XEXP (XEXP (x
, 1), 1));
6231 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
6232 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
6233 else if (GET_CODE (x
) == PLUS
&& GET_CODE (XEXP (x
, 0)) == PLUS
6234 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
6235 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == PLUS
6236 && CONSTANT_P (XEXP (x
, 1)))
6239 rtx other
= NULL_RTX
;
6241 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
6243 constant
= XEXP (x
, 1);
6244 other
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
6246 else if (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 1), 1)) == CONST_INT
)
6248 constant
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
6249 other
= XEXP (x
, 1);
6257 x
= gen_rtx_PLUS (Pmode
,
6258 gen_rtx_PLUS (Pmode
, XEXP (XEXP (x
, 0), 0),
6259 XEXP (XEXP (XEXP (x
, 0), 1), 0)),
6260 plus_constant (other
, INTVAL (constant
)));
6264 if (changed
&& legitimate_address_p (mode
, x
, FALSE
))
6267 if (GET_CODE (XEXP (x
, 0)) == MULT
)
6270 XEXP (x
, 0) = force_operand (XEXP (x
, 0), 0);
6273 if (GET_CODE (XEXP (x
, 1)) == MULT
)
6276 XEXP (x
, 1) = force_operand (XEXP (x
, 1), 0);
6280 && GET_CODE (XEXP (x
, 1)) == REG
6281 && GET_CODE (XEXP (x
, 0)) == REG
)
6284 if (flag_pic
&& SYMBOLIC_CONST (XEXP (x
, 1)))
6287 x
= legitimize_pic_address (x
, 0);
6290 if (changed
&& legitimate_address_p (mode
, x
, FALSE
))
6293 if (GET_CODE (XEXP (x
, 0)) == REG
)
6295 register rtx temp
= gen_reg_rtx (Pmode
);
6296 register rtx val
= force_operand (XEXP (x
, 1), temp
);
6298 emit_move_insn (temp
, val
);
6304 else if (GET_CODE (XEXP (x
, 1)) == REG
)
6306 register rtx temp
= gen_reg_rtx (Pmode
);
6307 register rtx val
= force_operand (XEXP (x
, 0), temp
);
6309 emit_move_insn (temp
, val
);
6319 /* Print an integer constant expression in assembler syntax. Addition
6320 and subtraction are the only arithmetic that may appear in these
6321 expressions. FILE is the stdio stream to write to, X is the rtx, and
6322 CODE is the operand print code from the output string. */
6325 output_pic_addr_const (file
, x
, code
)
6332 switch (GET_CODE (x
))
6342 assemble_name (file
, XSTR (x
, 0));
6343 if (!TARGET_MACHO
&& code
== 'P' && ! SYMBOL_REF_FLAG (x
))
6344 fputs ("@PLT", file
);
6351 ASM_GENERATE_INTERNAL_LABEL (buf
, "L", CODE_LABEL_NUMBER (x
));
6352 assemble_name (asm_out_file
, buf
);
6356 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
6360 /* This used to output parentheses around the expression,
6361 but that does not work on the 386 (either ATT or BSD assembler). */
6362 output_pic_addr_const (file
, XEXP (x
, 0), code
);
6366 if (GET_MODE (x
) == VOIDmode
)
6368 /* We can use %d if the number is <32 bits and positive. */
6369 if (CONST_DOUBLE_HIGH (x
) || CONST_DOUBLE_LOW (x
) < 0)
6370 fprintf (file
, "0x%lx%08lx",
6371 (unsigned long) CONST_DOUBLE_HIGH (x
),
6372 (unsigned long) CONST_DOUBLE_LOW (x
));
6374 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, CONST_DOUBLE_LOW (x
));
6377 /* We can't handle floating point constants;
6378 PRINT_OPERAND must handle them. */
6379 output_operand_lossage ("floating constant misused");
6383 /* Some assemblers need integer constants to appear first. */
6384 if (GET_CODE (XEXP (x
, 0)) == CONST_INT
)
6386 output_pic_addr_const (file
, XEXP (x
, 0), code
);
6388 output_pic_addr_const (file
, XEXP (x
, 1), code
);
6390 else if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
6392 output_pic_addr_const (file
, XEXP (x
, 1), code
);
6394 output_pic_addr_const (file
, XEXP (x
, 0), code
);
6402 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? '(' : '[', file
);
6403 output_pic_addr_const (file
, XEXP (x
, 0), code
);
6405 output_pic_addr_const (file
, XEXP (x
, 1), code
);
6407 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? ')' : ']', file
);
6411 if (XVECLEN (x
, 0) != 1)
6413 output_pic_addr_const (file
, XVECEXP (x
, 0, 0), code
);
6414 switch (XINT (x
, 1))
6417 fputs ("@GOT", file
);
6420 fputs ("@GOTOFF", file
);
6422 case UNSPEC_GOTPCREL
:
6423 fputs ("@GOTPCREL(%rip)", file
);
6425 case UNSPEC_GOTTPOFF
:
6426 /* FIXME: This might be @TPOFF in Sun ld too. */
6427 fputs ("@GOTTPOFF", file
);
6430 fputs ("@TPOFF", file
);
6434 fputs ("@TPOFF", file
);
6436 fputs ("@NTPOFF", file
);
6439 fputs ("@DTPOFF", file
);
6441 case UNSPEC_GOTNTPOFF
:
6443 fputs ("@GOTTPOFF(%rip)", file
);
6445 fputs ("@GOTNTPOFF", file
);
6447 case UNSPEC_INDNTPOFF
:
6448 fputs ("@INDNTPOFF", file
);
6451 output_operand_lossage ("invalid UNSPEC as operand");
6457 output_operand_lossage ("invalid expression as operand");
6461 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
6462 We need to handle our special PIC relocations. */
6465 i386_dwarf_output_addr_const (file
, x
)
6470 fprintf (file
, "%s", TARGET_64BIT
? ASM_QUAD
: ASM_LONG
);
6474 fprintf (file
, "%s", ASM_LONG
);
6477 output_pic_addr_const (file
, x
, '\0');
6479 output_addr_const (file
, x
);
6483 /* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
6484 We need to emit DTP-relative relocations. */
6487 i386_output_dwarf_dtprel (file
, size
, x
)
6492 fputs (ASM_LONG
, file
);
6493 output_addr_const (file
, x
);
6494 fputs ("@DTPOFF", file
);
6500 fputs (", 0", file
);
6507 /* In the name of slightly smaller debug output, and to cater to
6508 general assembler losage, recognize PIC+GOTOFF and turn it back
6509 into a direct symbol reference. */
6512 i386_simplify_dwarf_addr (orig_x
)
6517 if (GET_CODE (x
) == MEM
)
6522 if (GET_CODE (x
) != CONST
6523 || GET_CODE (XEXP (x
, 0)) != UNSPEC
6524 || XINT (XEXP (x
, 0), 1) != UNSPEC_GOTPCREL
6525 || GET_CODE (orig_x
) != MEM
)
6527 return XVECEXP (XEXP (x
, 0), 0, 0);
6530 if (GET_CODE (x
) != PLUS
6531 || GET_CODE (XEXP (x
, 1)) != CONST
)
6534 if (GET_CODE (XEXP (x
, 0)) == REG
6535 && REGNO (XEXP (x
, 0)) == PIC_OFFSET_TABLE_REGNUM
)
6536 /* %ebx + GOT/GOTOFF */
6538 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
6540 /* %ebx + %reg * scale + GOT/GOTOFF */
6542 if (GET_CODE (XEXP (y
, 0)) == REG
6543 && REGNO (XEXP (y
, 0)) == PIC_OFFSET_TABLE_REGNUM
)
6545 else if (GET_CODE (XEXP (y
, 1)) == REG
6546 && REGNO (XEXP (y
, 1)) == PIC_OFFSET_TABLE_REGNUM
)
6550 if (GET_CODE (y
) != REG
6551 && GET_CODE (y
) != MULT
6552 && GET_CODE (y
) != ASHIFT
)
6558 x
= XEXP (XEXP (x
, 1), 0);
6559 if (GET_CODE (x
) == UNSPEC
6560 && ((XINT (x
, 1) == UNSPEC_GOT
&& GET_CODE (orig_x
) == MEM
)
6561 || (XINT (x
, 1) == UNSPEC_GOTOFF
&& GET_CODE (orig_x
) != MEM
)))
6564 return gen_rtx_PLUS (Pmode
, y
, XVECEXP (x
, 0, 0));
6565 return XVECEXP (x
, 0, 0);
6568 if (GET_CODE (x
) == PLUS
6569 && GET_CODE (XEXP (x
, 0)) == UNSPEC
6570 && GET_CODE (XEXP (x
, 1)) == CONST_INT
6571 && ((XINT (XEXP (x
, 0), 1) == UNSPEC_GOT
&& GET_CODE (orig_x
) == MEM
)
6572 || (XINT (XEXP (x
, 0), 1) == UNSPEC_GOTOFF
6573 && GET_CODE (orig_x
) != MEM
)))
6575 x
= gen_rtx_PLUS (VOIDmode
, XVECEXP (XEXP (x
, 0), 0, 0), XEXP (x
, 1));
6577 return gen_rtx_PLUS (Pmode
, y
, x
);
6585 put_condition_code (code
, mode
, reverse
, fp
, file
)
6587 enum machine_mode mode
;
6593 if (mode
== CCFPmode
|| mode
== CCFPUmode
)
6595 enum rtx_code second_code
, bypass_code
;
6596 ix86_fp_comparison_codes (code
, &bypass_code
, &code
, &second_code
);
6597 if (bypass_code
!= NIL
|| second_code
!= NIL
)
6599 code
= ix86_fp_compare_code_to_integer (code
);
6603 code
= reverse_condition (code
);
6614 if (mode
!= CCmode
&& mode
!= CCNOmode
&& mode
!= CCGCmode
)
6619 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
6620 Those same assemblers have the same but opposite losage on cmov. */
6623 suffix
= fp
? "nbe" : "a";
6626 if (mode
== CCNOmode
|| mode
== CCGOCmode
)
6628 else if (mode
== CCmode
|| mode
== CCGCmode
)
6639 if (mode
== CCNOmode
|| mode
== CCGOCmode
)
6641 else if (mode
== CCmode
|| mode
== CCGCmode
)
6650 suffix
= fp
? "nb" : "ae";
6653 if (mode
!= CCmode
&& mode
!= CCGCmode
&& mode
!= CCNOmode
)
6663 suffix
= fp
? "u" : "p";
6666 suffix
= fp
? "nu" : "np";
6671 fputs (suffix
, file
);
6675 print_reg (x
, code
, file
)
6680 if (REGNO (x
) == ARG_POINTER_REGNUM
6681 || REGNO (x
) == FRAME_POINTER_REGNUM
6682 || REGNO (x
) == FLAGS_REG
6683 || REGNO (x
) == FPSR_REG
)
6686 if (ASSEMBLER_DIALECT
== ASM_ATT
|| USER_LABEL_PREFIX
[0] == 0)
6689 if (code
== 'w' || MMX_REG_P (x
))
6691 else if (code
== 'b')
6693 else if (code
== 'k')
6695 else if (code
== 'q')
6697 else if (code
== 'y')
6699 else if (code
== 'h')
6702 code
= GET_MODE_SIZE (GET_MODE (x
));
6704 /* Irritatingly, AMD extended registers use different naming convention
6705 from the normal registers. */
6706 if (REX_INT_REG_P (x
))
6713 error ("extended registers have no high halves");
6716 fprintf (file
, "r%ib", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
6719 fprintf (file
, "r%iw", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
6722 fprintf (file
, "r%id", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
6725 fprintf (file
, "r%i", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
6728 error ("unsupported operand size for extended register");
6736 if (STACK_TOP_P (x
))
6738 fputs ("st(0)", file
);
6745 if (! ANY_FP_REG_P (x
))
6746 putc (code
== 8 && TARGET_64BIT
? 'r' : 'e', file
);
6750 fputs (hi_reg_name
[REGNO (x
)], file
);
6753 fputs (qi_reg_name
[REGNO (x
)], file
);
6756 fputs (qi_high_reg_name
[REGNO (x
)], file
);
6763 /* Locate some local-dynamic symbol still in use by this function
6764 so that we can print its name in some tls_local_dynamic_base
6768 get_some_local_dynamic_name ()
6772 if (cfun
->machine
->some_ld_name
)
6773 return cfun
->machine
->some_ld_name
;
6775 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
6777 && for_each_rtx (&PATTERN (insn
), get_some_local_dynamic_name_1
, 0))
6778 return cfun
->machine
->some_ld_name
;
6784 get_some_local_dynamic_name_1 (px
, data
)
6786 void *data ATTRIBUTE_UNUSED
;
6790 if (GET_CODE (x
) == SYMBOL_REF
6791 && local_dynamic_symbolic_operand (x
, Pmode
))
6793 cfun
->machine
->some_ld_name
= XSTR (x
, 0);
6801 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
6802 C -- print opcode suffix for set/cmov insn.
6803 c -- like C, but print reversed condition
6804 F,f -- likewise, but for floating-point.
6805 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
6807 R -- print the prefix for register names.
6808 z -- print the opcode suffix for the size of the current operand.
6809 * -- print a star (in certain assembler syntax)
6810 A -- print an absolute memory reference.
6811 w -- print the operand as if it's a "word" (HImode) even if it isn't.
6812 s -- print a shift double count, followed by the assemblers argument
6814 b -- print the QImode name of the register for the indicated operand.
6815 %b0 would print %al if operands[0] is reg 0.
6816 w -- likewise, print the HImode name of the register.
6817 k -- likewise, print the SImode name of the register.
6818 q -- likewise, print the DImode name of the register.
6819 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
6820 y -- print "st(0)" instead of "st" as a register.
6821 D -- print condition for SSE cmp instruction.
6822 P -- if PIC, print an @PLT suffix.
6823 X -- don't print any sort of PIC '@' suffix for a symbol.
6824 & -- print some in-use local-dynamic symbol name.
6828 print_operand (file
, x
, code
)
6838 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6843 assemble_name (file
, get_some_local_dynamic_name ());
6847 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6849 else if (ASSEMBLER_DIALECT
== ASM_INTEL
)
6851 /* Intel syntax. For absolute addresses, registers should not
6852 be surrounded by braces. */
6853 if (GET_CODE (x
) != REG
)
6856 PRINT_OPERAND (file
, x
, 0);
6864 PRINT_OPERAND (file
, x
, 0);
6869 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6874 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6879 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6884 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6889 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6894 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6899 /* 387 opcodes don't get size suffixes if the operands are
6901 if (STACK_REG_P (x
))
6904 /* Likewise if using Intel opcodes. */
6905 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
6908 /* This is the size of op from size of operand. */
6909 switch (GET_MODE_SIZE (GET_MODE (x
)))
6912 #ifdef HAVE_GAS_FILDS_FISTS
6918 if (GET_MODE (x
) == SFmode
)
6933 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
6935 #ifdef GAS_MNEMONICS
6961 if (GET_CODE (x
) == CONST_INT
|| ! SHIFT_DOUBLE_OMITS_COUNT
)
6963 PRINT_OPERAND (file
, x
, 0);
6969 /* Little bit of braindamage here. The SSE compare instructions
6970 does use completely different names for the comparisons that the
6971 fp conditional moves. */
6972 switch (GET_CODE (x
))
6987 fputs ("unord", file
);
6991 fputs ("neq", file
);
6995 fputs ("nlt", file
);
6999 fputs ("nle", file
);
7002 fputs ("ord", file
);
7010 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7011 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7013 switch (GET_MODE (x
))
7015 case HImode
: putc ('w', file
); break;
7017 case SFmode
: putc ('l', file
); break;
7019 case DFmode
: putc ('q', file
); break;
7027 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 0, 0, file
);
7030 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7031 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7034 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 0, 1, file
);
7037 /* Like above, but reverse condition */
7039 /* Check to see if argument to %c is really a constant
7040 and not a condition code which needs to be reversed. */
7041 if (GET_RTX_CLASS (GET_CODE (x
)) != '<')
7043 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
7046 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 1, 0, file
);
7049 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7050 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7053 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 1, 1, file
);
7059 if (!optimize
|| optimize_size
|| !TARGET_BRANCH_PREDICTION_HINTS
)
7062 x
= find_reg_note (current_output_insn
, REG_BR_PROB
, 0);
7065 int pred_val
= INTVAL (XEXP (x
, 0));
7067 if (pred_val
< REG_BR_PROB_BASE
* 45 / 100
7068 || pred_val
> REG_BR_PROB_BASE
* 55 / 100)
7070 int taken
= pred_val
> REG_BR_PROB_BASE
/ 2;
7071 int cputaken
= final_forward_branch_p (current_output_insn
) == 0;
7073 /* Emit hints only in the case default branch prediction
7074 heruistics would fail. */
7075 if (taken
!= cputaken
)
7077 /* We use 3e (DS) prefix for taken branches and
7078 2e (CS) prefix for not taken branches. */
7080 fputs ("ds ; ", file
);
7082 fputs ("cs ; ", file
);
7089 output_operand_lossage ("invalid operand code `%c'", code
);
7093 if (GET_CODE (x
) == REG
)
7095 PRINT_REG (x
, code
, file
);
7098 else if (GET_CODE (x
) == MEM
)
7100 /* No `byte ptr' prefix for call instructions. */
7101 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& code
!= 'X' && code
!= 'P')
7104 switch (GET_MODE_SIZE (GET_MODE (x
)))
7106 case 1: size
= "BYTE"; break;
7107 case 2: size
= "WORD"; break;
7108 case 4: size
= "DWORD"; break;
7109 case 8: size
= "QWORD"; break;
7110 case 12: size
= "XWORD"; break;
7111 case 16: size
= "XMMWORD"; break;
7116 /* Check for explicit size override (codes 'b', 'w' and 'k') */
7119 else if (code
== 'w')
7121 else if (code
== 'k')
7125 fputs (" PTR ", file
);
7129 if (flag_pic
&& CONSTANT_ADDRESS_P (x
))
7130 output_pic_addr_const (file
, x
, code
);
7131 /* Avoid (%rip) for call operands. */
7132 else if (CONSTANT_ADDRESS_P (x
) && code
== 'P'
7133 && GET_CODE (x
) != CONST_INT
)
7134 output_addr_const (file
, x
);
7135 else if (this_is_asm_operands
&& ! address_operand (x
, VOIDmode
))
7136 output_operand_lossage ("invalid constraints for operand");
7141 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == SFmode
)
7146 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
7147 REAL_VALUE_TO_TARGET_SINGLE (r
, l
);
7149 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7151 fprintf (file
, "0x%lx", l
);
7154 /* These float cases don't actually occur as immediate operands. */
7155 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == DFmode
)
7159 real_to_decimal (dstr
, CONST_DOUBLE_REAL_VALUE (x
), sizeof (dstr
), 0, 1);
7160 fprintf (file
, "%s", dstr
);
7163 else if (GET_CODE (x
) == CONST_DOUBLE
7164 && (GET_MODE (x
) == XFmode
|| GET_MODE (x
) == TFmode
))
7168 real_to_decimal (dstr
, CONST_DOUBLE_REAL_VALUE (x
), sizeof (dstr
), 0, 1);
7169 fprintf (file
, "%s", dstr
);
7176 if (GET_CODE (x
) == CONST_INT
|| GET_CODE (x
) == CONST_DOUBLE
)
7178 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7181 else if (GET_CODE (x
) == CONST
|| GET_CODE (x
) == SYMBOL_REF
7182 || GET_CODE (x
) == LABEL_REF
)
7184 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7187 fputs ("OFFSET FLAT:", file
);
7190 if (GET_CODE (x
) == CONST_INT
)
7191 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
7193 output_pic_addr_const (file
, x
, code
);
7195 output_addr_const (file
, x
);
7199 /* Print a memory operand whose address is ADDR. */
7202 print_operand_address (file
, addr
)
7206 struct ix86_address parts
;
7207 rtx base
, index
, disp
;
7210 if (GET_CODE (addr
) == UNSPEC
&& XINT (addr
, 1) == UNSPEC_TP
)
7212 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
7213 fputs ("DWORD PTR ", file
);
7214 if (ASSEMBLER_DIALECT
== ASM_ATT
|| USER_LABEL_PREFIX
[0] == 0)
7217 fputs ("fs:0", file
);
7219 fputs ("gs:0", file
);
7223 if (! ix86_decompose_address (addr
, &parts
))
7227 index
= parts
.index
;
7229 scale
= parts
.scale
;
7231 if (!base
&& !index
)
7233 /* Displacement only requires special attention. */
7235 if (GET_CODE (disp
) == CONST_INT
)
7237 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
7239 if (USER_LABEL_PREFIX
[0] == 0)
7241 fputs ("ds:", file
);
7243 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (addr
));
7246 output_pic_addr_const (file
, addr
, 0);
7248 output_addr_const (file
, addr
);
7250 /* Use one byte shorter RIP relative addressing for 64bit mode. */
7252 && ((GET_CODE (addr
) == SYMBOL_REF
7253 && ! tls_symbolic_operand (addr
, GET_MODE (addr
)))
7254 || GET_CODE (addr
) == LABEL_REF
7255 || (GET_CODE (addr
) == CONST
7256 && GET_CODE (XEXP (addr
, 0)) == PLUS
7257 && (GET_CODE (XEXP (XEXP (addr
, 0), 0)) == SYMBOL_REF
7258 || GET_CODE (XEXP (XEXP (addr
, 0), 0)) == LABEL_REF
)
7259 && GET_CODE (XEXP (XEXP (addr
, 0), 1)) == CONST_INT
)))
7260 fputs ("(%rip)", file
);
7264 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7269 output_pic_addr_const (file
, disp
, 0);
7270 else if (GET_CODE (disp
) == LABEL_REF
)
7271 output_asm_label (disp
);
7273 output_addr_const (file
, disp
);
7278 PRINT_REG (base
, 0, file
);
7282 PRINT_REG (index
, 0, file
);
7284 fprintf (file
, ",%d", scale
);
7290 rtx offset
= NULL_RTX
;
7294 /* Pull out the offset of a symbol; print any symbol itself. */
7295 if (GET_CODE (disp
) == CONST
7296 && GET_CODE (XEXP (disp
, 0)) == PLUS
7297 && GET_CODE (XEXP (XEXP (disp
, 0), 1)) == CONST_INT
)
7299 offset
= XEXP (XEXP (disp
, 0), 1);
7300 disp
= gen_rtx_CONST (VOIDmode
,
7301 XEXP (XEXP (disp
, 0), 0));
7305 output_pic_addr_const (file
, disp
, 0);
7306 else if (GET_CODE (disp
) == LABEL_REF
)
7307 output_asm_label (disp
);
7308 else if (GET_CODE (disp
) == CONST_INT
)
7311 output_addr_const (file
, disp
);
7317 PRINT_REG (base
, 0, file
);
7320 if (INTVAL (offset
) >= 0)
7322 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
7326 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
7333 PRINT_REG (index
, 0, file
);
7335 fprintf (file
, "*%d", scale
);
7343 output_addr_const_extra (file
, x
)
7349 if (GET_CODE (x
) != UNSPEC
)
7352 op
= XVECEXP (x
, 0, 0);
7353 switch (XINT (x
, 1))
7355 case UNSPEC_GOTTPOFF
:
7356 output_addr_const (file
, op
);
7357 /* FIXME: This might be @TPOFF in Sun ld. */
7358 fputs ("@GOTTPOFF", file
);
7361 output_addr_const (file
, op
);
7362 fputs ("@TPOFF", file
);
7365 output_addr_const (file
, op
);
7367 fputs ("@TPOFF", file
);
7369 fputs ("@NTPOFF", file
);
7372 output_addr_const (file
, op
);
7373 fputs ("@DTPOFF", file
);
7375 case UNSPEC_GOTNTPOFF
:
7376 output_addr_const (file
, op
);
7378 fputs ("@GOTTPOFF(%rip)", file
);
7380 fputs ("@GOTNTPOFF", file
);
7382 case UNSPEC_INDNTPOFF
:
7383 output_addr_const (file
, op
);
7384 fputs ("@INDNTPOFF", file
);
7394 /* Split one or more DImode RTL references into pairs of SImode
7395 references. The RTL can be REG, offsettable MEM, integer constant, or
7396 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7397 split and "num" is its length. lo_half and hi_half are output arrays
7398 that parallel "operands". */
7401 split_di (operands
, num
, lo_half
, hi_half
)
7404 rtx lo_half
[], hi_half
[];
7408 rtx op
= operands
[num
];
7410 /* simplify_subreg refuse to split volatile memory addresses,
7411 but we still have to handle it. */
7412 if (GET_CODE (op
) == MEM
)
7414 lo_half
[num
] = adjust_address (op
, SImode
, 0);
7415 hi_half
[num
] = adjust_address (op
, SImode
, 4);
7419 lo_half
[num
] = simplify_gen_subreg (SImode
, op
,
7420 GET_MODE (op
) == VOIDmode
7421 ? DImode
: GET_MODE (op
), 0);
7422 hi_half
[num
] = simplify_gen_subreg (SImode
, op
,
7423 GET_MODE (op
) == VOIDmode
7424 ? DImode
: GET_MODE (op
), 4);
7428 /* Split one or more TImode RTL references into pairs of SImode
7429 references. The RTL can be REG, offsettable MEM, integer constant, or
7430 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7431 split and "num" is its length. lo_half and hi_half are output arrays
7432 that parallel "operands". */
7435 split_ti (operands
, num
, lo_half
, hi_half
)
7438 rtx lo_half
[], hi_half
[];
7442 rtx op
= operands
[num
];
7444 /* simplify_subreg refuse to split volatile memory addresses, but we
7445 still have to handle it. */
7446 if (GET_CODE (op
) == MEM
)
7448 lo_half
[num
] = adjust_address (op
, DImode
, 0);
7449 hi_half
[num
] = adjust_address (op
, DImode
, 8);
7453 lo_half
[num
] = simplify_gen_subreg (DImode
, op
, TImode
, 0);
7454 hi_half
[num
] = simplify_gen_subreg (DImode
, op
, TImode
, 8);
7459 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
7460 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
7461 is the expression of the binary operation. The output may either be
7462 emitted here, or returned to the caller, like all output_* functions.
7464 There is no guarantee that the operands are the same mode, as they
7465 might be within FLOAT or FLOAT_EXTEND expressions. */
7467 #ifndef SYSV386_COMPAT
7468 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
7469 wants to fix the assemblers because that causes incompatibility
7470 with gcc. No-one wants to fix gcc because that causes
7471 incompatibility with assemblers... You can use the option of
7472 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
7473 #define SYSV386_COMPAT 1
7477 output_387_binary_op (insn
, operands
)
7481 static char buf
[30];
7484 int is_sse
= SSE_REG_P (operands
[0]) | SSE_REG_P (operands
[1]) | SSE_REG_P (operands
[2]);
7486 #ifdef ENABLE_CHECKING
7487 /* Even if we do not want to check the inputs, this documents input
7488 constraints. Which helps in understanding the following code. */
7489 if (STACK_REG_P (operands
[0])
7490 && ((REG_P (operands
[1])
7491 && REGNO (operands
[0]) == REGNO (operands
[1])
7492 && (STACK_REG_P (operands
[2]) || GET_CODE (operands
[2]) == MEM
))
7493 || (REG_P (operands
[2])
7494 && REGNO (operands
[0]) == REGNO (operands
[2])
7495 && (STACK_REG_P (operands
[1]) || GET_CODE (operands
[1]) == MEM
)))
7496 && (STACK_TOP_P (operands
[1]) || STACK_TOP_P (operands
[2])))
7502 switch (GET_CODE (operands
[3]))
7505 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
7506 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
7514 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
7515 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
7523 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
7524 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
7532 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
7533 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
7547 if (GET_MODE (operands
[0]) == SFmode
)
7548 strcat (buf
, "ss\t{%2, %0|%0, %2}");
7550 strcat (buf
, "sd\t{%2, %0|%0, %2}");
7555 switch (GET_CODE (operands
[3]))
7559 if (REG_P (operands
[2]) && REGNO (operands
[0]) == REGNO (operands
[2]))
7561 rtx temp
= operands
[2];
7562 operands
[2] = operands
[1];
7566 /* know operands[0] == operands[1]. */
7568 if (GET_CODE (operands
[2]) == MEM
)
7574 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
7576 if (STACK_TOP_P (operands
[0]))
7577 /* How is it that we are storing to a dead operand[2]?
7578 Well, presumably operands[1] is dead too. We can't
7579 store the result to st(0) as st(0) gets popped on this
7580 instruction. Instead store to operands[2] (which I
7581 think has to be st(1)). st(1) will be popped later.
7582 gcc <= 2.8.1 didn't have this check and generated
7583 assembly code that the Unixware assembler rejected. */
7584 p
= "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7586 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7590 if (STACK_TOP_P (operands
[0]))
7591 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7593 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7598 if (GET_CODE (operands
[1]) == MEM
)
7604 if (GET_CODE (operands
[2]) == MEM
)
7610 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
7613 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
7614 derived assemblers, confusingly reverse the direction of
7615 the operation for fsub{r} and fdiv{r} when the
7616 destination register is not st(0). The Intel assembler
7617 doesn't have this brain damage. Read !SYSV386_COMPAT to
7618 figure out what the hardware really does. */
7619 if (STACK_TOP_P (operands
[0]))
7620 p
= "{p\t%0, %2|rp\t%2, %0}";
7622 p
= "{rp\t%2, %0|p\t%0, %2}";
7624 if (STACK_TOP_P (operands
[0]))
7625 /* As above for fmul/fadd, we can't store to st(0). */
7626 p
= "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7628 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7633 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
7636 if (STACK_TOP_P (operands
[0]))
7637 p
= "{rp\t%0, %1|p\t%1, %0}";
7639 p
= "{p\t%1, %0|rp\t%0, %1}";
7641 if (STACK_TOP_P (operands
[0]))
7642 p
= "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
7644 p
= "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
7649 if (STACK_TOP_P (operands
[0]))
7651 if (STACK_TOP_P (operands
[1]))
7652 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7654 p
= "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
7657 else if (STACK_TOP_P (operands
[1]))
7660 p
= "{\t%1, %0|r\t%0, %1}";
7662 p
= "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
7668 p
= "{r\t%2, %0|\t%0, %2}";
7670 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7683 /* Output code to initialize control word copies used by
7684 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
7685 is set to control word rounding downwards. */
7687 emit_i387_cw_initialization (normal
, round_down
)
7688 rtx normal
, round_down
;
7690 rtx reg
= gen_reg_rtx (HImode
);
7692 emit_insn (gen_x86_fnstcw_1 (normal
));
7693 emit_move_insn (reg
, normal
);
7694 if (!TARGET_PARTIAL_REG_STALL
&& !optimize_size
7696 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0xc)));
7698 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0xc00)));
7699 emit_move_insn (round_down
, reg
);
7702 /* Output code for INSN to convert a float to a signed int. OPERANDS
7703 are the insn operands. The output may be [HSD]Imode and the input
7704 operand may be [SDX]Fmode. */
7707 output_fix_trunc (insn
, operands
)
7711 int stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
7712 int dimode_p
= GET_MODE (operands
[0]) == DImode
;
7714 /* Jump through a hoop or two for DImode, since the hardware has no
7715 non-popping instruction. We used to do this a different way, but
7716 that was somewhat fragile and broke with post-reload splitters. */
7717 if (dimode_p
&& !stack_top_dies
)
7718 output_asm_insn ("fld\t%y1", operands
);
7720 if (!STACK_TOP_P (operands
[1]))
7723 if (GET_CODE (operands
[0]) != MEM
)
7726 output_asm_insn ("fldcw\t%3", operands
);
7727 if (stack_top_dies
|| dimode_p
)
7728 output_asm_insn ("fistp%z0\t%0", operands
);
7730 output_asm_insn ("fist%z0\t%0", operands
);
7731 output_asm_insn ("fldcw\t%2", operands
);
7736 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
7737 should be used and 2 when fnstsw should be used. UNORDERED_P is true
7738 when fucom should be used. */
7741 output_fp_compare (insn
, operands
, eflags_p
, unordered_p
)
7744 int eflags_p
, unordered_p
;
7747 rtx cmp_op0
= operands
[0];
7748 rtx cmp_op1
= operands
[1];
7749 int is_sse
= SSE_REG_P (operands
[0]) | SSE_REG_P (operands
[1]);
7754 cmp_op1
= operands
[2];
7758 if (GET_MODE (operands
[0]) == SFmode
)
7760 return "ucomiss\t{%1, %0|%0, %1}";
7762 return "comiss\t{%1, %0|%0, %1}";
7765 return "ucomisd\t{%1, %0|%0, %1}";
7767 return "comisd\t{%1, %0|%0, %1}";
7770 if (! STACK_TOP_P (cmp_op0
))
7773 stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
7775 if (STACK_REG_P (cmp_op1
)
7777 && find_regno_note (insn
, REG_DEAD
, REGNO (cmp_op1
))
7778 && REGNO (cmp_op1
) != FIRST_STACK_REG
)
7780 /* If both the top of the 387 stack dies, and the other operand
7781 is also a stack register that dies, then this must be a
7782 `fcompp' float compare */
7786 /* There is no double popping fcomi variant. Fortunately,
7787 eflags is immune from the fstp's cc clobbering. */
7789 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands
);
7791 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands
);
7799 return "fucompp\n\tfnstsw\t%0";
7801 return "fcompp\n\tfnstsw\t%0";
7814 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
7816 static const char * const alt
[24] =
7828 "fcomi\t{%y1, %0|%0, %y1}",
7829 "fcomip\t{%y1, %0|%0, %y1}",
7830 "fucomi\t{%y1, %0|%0, %y1}",
7831 "fucomip\t{%y1, %0|%0, %y1}",
7838 "fcom%z2\t%y2\n\tfnstsw\t%0",
7839 "fcomp%z2\t%y2\n\tfnstsw\t%0",
7840 "fucom%z2\t%y2\n\tfnstsw\t%0",
7841 "fucomp%z2\t%y2\n\tfnstsw\t%0",
7843 "ficom%z2\t%y2\n\tfnstsw\t%0",
7844 "ficomp%z2\t%y2\n\tfnstsw\t%0",
7852 mask
= eflags_p
<< 3;
7853 mask
|= (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
) << 2;
7854 mask
|= unordered_p
<< 1;
7855 mask
|= stack_top_dies
;
7868 ix86_output_addr_vec_elt (file
, value
)
7872 const char *directive
= ASM_LONG
;
7877 directive
= ASM_QUAD
;
7883 fprintf (file
, "%s%s%d\n", directive
, LPREFIX
, value
);
7887 ix86_output_addr_diff_elt (file
, value
, rel
)
7892 fprintf (file
, "%s%s%d-%s%d\n",
7893 ASM_LONG
, LPREFIX
, value
, LPREFIX
, rel
);
7894 else if (HAVE_AS_GOTOFF_IN_DATA
)
7895 fprintf (file
, "%s%s%d@GOTOFF\n", ASM_LONG
, LPREFIX
, value
);
7897 else if (TARGET_MACHO
)
7898 fprintf (file
, "%s%s%d-%s\n", ASM_LONG
, LPREFIX
, value
,
7899 machopic_function_base_name () + 1);
7902 asm_fprintf (file
, "%s%U%s+[.-%s%d]\n",
7903 ASM_LONG
, GOT_SYMBOL_NAME
, LPREFIX
, value
);
7906 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
7910 ix86_expand_clear (dest
)
7915 /* We play register width games, which are only valid after reload. */
7916 if (!reload_completed
)
7919 /* Avoid HImode and its attendant prefix byte. */
7920 if (GET_MODE_SIZE (GET_MODE (dest
)) < 4)
7921 dest
= gen_rtx_REG (SImode
, REGNO (dest
));
7923 tmp
= gen_rtx_SET (VOIDmode
, dest
, const0_rtx
);
7925 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
7926 if (reload_completed
&& (!TARGET_USE_MOV0
|| optimize_size
))
7928 rtx clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, 17));
7929 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, clob
));
7935 /* X is an unchanging MEM. If it is a constant pool reference, return
7936 the constant pool rtx, else NULL. */
7939 maybe_get_pool_constant (x
)
7944 if (flag_pic
&& ! TARGET_64BIT
)
7946 if (GET_CODE (x
) != PLUS
)
7948 if (XEXP (x
, 0) != pic_offset_table_rtx
)
7951 if (GET_CODE (x
) != CONST
)
7954 if (GET_CODE (x
) != UNSPEC
)
7956 if (XINT (x
, 1) != UNSPEC_GOTOFF
)
7958 x
= XVECEXP (x
, 0, 0);
7961 if (GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
7962 return get_pool_constant (x
);
7968 ix86_expand_move (mode
, operands
)
7969 enum machine_mode mode
;
7972 int strict
= (reload_in_progress
|| reload_completed
);
7973 rtx insn
, op0
, op1
, tmp
;
7978 if (tls_symbolic_operand (op1
, Pmode
))
7980 op1
= legitimize_address (op1
, op1
, VOIDmode
);
7981 if (GET_CODE (op0
) == MEM
)
7983 tmp
= gen_reg_rtx (mode
);
7984 emit_insn (gen_rtx_SET (VOIDmode
, tmp
, op1
));
7988 else if (flag_pic
&& mode
== Pmode
&& symbolic_operand (op1
, Pmode
))
7993 rtx temp
= ((reload_in_progress
7994 || ((op0
&& GET_CODE (op0
) == REG
)
7996 ? op0
: gen_reg_rtx (Pmode
));
7997 op1
= machopic_indirect_data_reference (op1
, temp
);
7998 op1
= machopic_legitimize_pic_address (op1
, mode
,
7999 temp
== op1
? 0 : temp
);
8003 if (MACHOPIC_INDIRECT
)
8004 op1
= machopic_indirect_data_reference (op1
, 0);
8008 insn
= gen_rtx_SET (VOIDmode
, op0
, op1
);
8012 #endif /* TARGET_MACHO */
8013 if (GET_CODE (op0
) == MEM
)
8014 op1
= force_reg (Pmode
, op1
);
8018 if (GET_CODE (temp
) != REG
)
8019 temp
= gen_reg_rtx (Pmode
);
8020 temp
= legitimize_pic_address (op1
, temp
);
8028 if (GET_CODE (op0
) == MEM
8029 && (PUSH_ROUNDING (GET_MODE_SIZE (mode
)) != GET_MODE_SIZE (mode
)
8030 || !push_operand (op0
, mode
))
8031 && GET_CODE (op1
) == MEM
)
8032 op1
= force_reg (mode
, op1
);
8034 if (push_operand (op0
, mode
)
8035 && ! general_no_elim_operand (op1
, mode
))
8036 op1
= copy_to_mode_reg (mode
, op1
);
8038 /* Force large constants in 64bit compilation into register
8039 to get them CSEed. */
8040 if (TARGET_64BIT
&& mode
== DImode
8041 && immediate_operand (op1
, mode
)
8042 && !x86_64_zero_extended_value (op1
)
8043 && !register_operand (op0
, mode
)
8044 && optimize
&& !reload_completed
&& !reload_in_progress
)
8045 op1
= copy_to_mode_reg (mode
, op1
);
8047 if (FLOAT_MODE_P (mode
))
8049 /* If we are loading a floating point constant to a register,
8050 force the value to memory now, since we'll get better code
8051 out the back end. */
8055 else if (GET_CODE (op1
) == CONST_DOUBLE
)
8057 op1
= validize_mem (force_const_mem (mode
, op1
));
8058 if (!register_operand (op0
, mode
))
8060 rtx temp
= gen_reg_rtx (mode
);
8061 emit_insn (gen_rtx_SET (VOIDmode
, temp
, op1
));
8062 emit_move_insn (op0
, temp
);
8069 insn
= gen_rtx_SET (VOIDmode
, op0
, op1
);
8075 ix86_expand_vector_move (mode
, operands
)
8076 enum machine_mode mode
;
8079 /* Force constants other than zero into memory. We do not know how
8080 the instructions used to build constants modify the upper 64 bits
8081 of the register, once we have that information we may be able
8082 to handle some of them more efficiently. */
8083 if ((reload_in_progress
| reload_completed
) == 0
8084 && register_operand (operands
[0], mode
)
8085 && CONSTANT_P (operands
[1]) && operands
[1] != CONST0_RTX (mode
))
8087 operands
[1] = force_const_mem (mode
, operands
[1]);
8088 emit_move_insn (operands
[0], operands
[1]);
8092 /* Make operand1 a register if it isn't already. */
8094 && !register_operand (operands
[0], mode
)
8095 && !register_operand (operands
[1], mode
))
8097 rtx temp
= force_reg (GET_MODE (operands
[1]), operands
[1]);
8098 emit_move_insn (operands
[0], temp
);
8102 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0], operands
[1]));
8105 /* Attempt to expand a binary operator. Make the expansion closer to the
8106 actual machine, then just general_operand, which will allow 3 separate
8107 memory references (one output, two input) in a single insn. */
8110 ix86_expand_binary_operator (code
, mode
, operands
)
8112 enum machine_mode mode
;
8115 int matching_memory
;
8116 rtx src1
, src2
, dst
, op
, clob
;
8122 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
8123 if (GET_RTX_CLASS (code
) == 'c'
8124 && (rtx_equal_p (dst
, src2
)
8125 || immediate_operand (src1
, mode
)))
8132 /* If the destination is memory, and we do not have matching source
8133 operands, do things in registers. */
8134 matching_memory
= 0;
8135 if (GET_CODE (dst
) == MEM
)
8137 if (rtx_equal_p (dst
, src1
))
8138 matching_memory
= 1;
8139 else if (GET_RTX_CLASS (code
) == 'c'
8140 && rtx_equal_p (dst
, src2
))
8141 matching_memory
= 2;
8143 dst
= gen_reg_rtx (mode
);
8146 /* Both source operands cannot be in memory. */
8147 if (GET_CODE (src1
) == MEM
&& GET_CODE (src2
) == MEM
)
8149 if (matching_memory
!= 2)
8150 src2
= force_reg (mode
, src2
);
8152 src1
= force_reg (mode
, src1
);
8155 /* If the operation is not commutable, source 1 cannot be a constant
8156 or non-matching memory. */
8157 if ((CONSTANT_P (src1
)
8158 || (!matching_memory
&& GET_CODE (src1
) == MEM
))
8159 && GET_RTX_CLASS (code
) != 'c')
8160 src1
= force_reg (mode
, src1
);
8162 /* If optimizing, copy to regs to improve CSE */
8163 if (optimize
&& ! no_new_pseudos
)
8165 if (GET_CODE (dst
) == MEM
)
8166 dst
= gen_reg_rtx (mode
);
8167 if (GET_CODE (src1
) == MEM
)
8168 src1
= force_reg (mode
, src1
);
8169 if (GET_CODE (src2
) == MEM
)
8170 src2
= force_reg (mode
, src2
);
8173 /* Emit the instruction. */
8175 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_ee (code
, mode
, src1
, src2
));
8176 if (reload_in_progress
)
8178 /* Reload doesn't know about the flags register, and doesn't know that
8179 it doesn't want to clobber it. We can only do this with PLUS. */
8186 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
8187 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
8190 /* Fix up the destination if needed. */
8191 if (dst
!= operands
[0])
8192 emit_move_insn (operands
[0], dst
);
8195 /* Return TRUE or FALSE depending on whether the binary operator meets the
8196 appropriate constraints. */
8199 ix86_binary_operator_ok (code
, mode
, operands
)
8201 enum machine_mode mode ATTRIBUTE_UNUSED
;
8204 /* Both source operands cannot be in memory. */
8205 if (GET_CODE (operands
[1]) == MEM
&& GET_CODE (operands
[2]) == MEM
)
8207 /* If the operation is not commutable, source 1 cannot be a constant. */
8208 if (CONSTANT_P (operands
[1]) && GET_RTX_CLASS (code
) != 'c')
8210 /* If the destination is memory, we must have a matching source operand. */
8211 if (GET_CODE (operands
[0]) == MEM
8212 && ! (rtx_equal_p (operands
[0], operands
[1])
8213 || (GET_RTX_CLASS (code
) == 'c'
8214 && rtx_equal_p (operands
[0], operands
[2]))))
8216 /* If the operation is not commutable and the source 1 is memory, we must
8217 have a matching destination. */
8218 if (GET_CODE (operands
[1]) == MEM
8219 && GET_RTX_CLASS (code
) != 'c'
8220 && ! rtx_equal_p (operands
[0], operands
[1]))
8225 /* Attempt to expand a unary operator. Make the expansion closer to the
8226 actual machine, then just general_operand, which will allow 2 separate
8227 memory references (one output, one input) in a single insn. */
8230 ix86_expand_unary_operator (code
, mode
, operands
)
8232 enum machine_mode mode
;
8235 int matching_memory
;
8236 rtx src
, dst
, op
, clob
;
8241 /* If the destination is memory, and we do not have matching source
8242 operands, do things in registers. */
8243 matching_memory
= 0;
8244 if (GET_CODE (dst
) == MEM
)
8246 if (rtx_equal_p (dst
, src
))
8247 matching_memory
= 1;
8249 dst
= gen_reg_rtx (mode
);
8252 /* When source operand is memory, destination must match. */
8253 if (!matching_memory
&& GET_CODE (src
) == MEM
)
8254 src
= force_reg (mode
, src
);
8256 /* If optimizing, copy to regs to improve CSE */
8257 if (optimize
&& ! no_new_pseudos
)
8259 if (GET_CODE (dst
) == MEM
)
8260 dst
= gen_reg_rtx (mode
);
8261 if (GET_CODE (src
) == MEM
)
8262 src
= force_reg (mode
, src
);
8265 /* Emit the instruction. */
8267 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_e (code
, mode
, src
));
8268 if (reload_in_progress
|| code
== NOT
)
8270 /* Reload doesn't know about the flags register, and doesn't know that
8271 it doesn't want to clobber it. */
8278 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
8279 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
8282 /* Fix up the destination if needed. */
8283 if (dst
!= operands
[0])
8284 emit_move_insn (operands
[0], dst
);
8287 /* Return TRUE or FALSE depending on whether the unary operator meets the
8288 appropriate constraints. */
8291 ix86_unary_operator_ok (code
, mode
, operands
)
8292 enum rtx_code code ATTRIBUTE_UNUSED
;
8293 enum machine_mode mode ATTRIBUTE_UNUSED
;
8294 rtx operands
[2] ATTRIBUTE_UNUSED
;
8296 /* If one of operands is memory, source and destination must match. */
8297 if ((GET_CODE (operands
[0]) == MEM
8298 || GET_CODE (operands
[1]) == MEM
)
8299 && ! rtx_equal_p (operands
[0], operands
[1]))
8304 /* Return TRUE or FALSE depending on whether the first SET in INSN
8305 has source and destination with matching CC modes, and that the
8306 CC mode is at least as constrained as REQ_MODE. */
8309 ix86_match_ccmode (insn
, req_mode
)
8311 enum machine_mode req_mode
;
8314 enum machine_mode set_mode
;
8316 set
= PATTERN (insn
);
8317 if (GET_CODE (set
) == PARALLEL
)
8318 set
= XVECEXP (set
, 0, 0);
8319 if (GET_CODE (set
) != SET
)
8321 if (GET_CODE (SET_SRC (set
)) != COMPARE
)
8324 set_mode
= GET_MODE (SET_DEST (set
));
8328 if (req_mode
!= CCNOmode
8329 && (req_mode
!= CCmode
8330 || XEXP (SET_SRC (set
), 1) != const0_rtx
))
8334 if (req_mode
== CCGCmode
)
8338 if (req_mode
== CCGOCmode
|| req_mode
== CCNOmode
)
8342 if (req_mode
== CCZmode
)
8352 return (GET_MODE (SET_SRC (set
)) == set_mode
);
8355 /* Generate insn patterns to do an integer compare of OPERANDS. */
8358 ix86_expand_int_compare (code
, op0
, op1
)
8362 enum machine_mode cmpmode
;
8365 cmpmode
= SELECT_CC_MODE (code
, op0
, op1
);
8366 flags
= gen_rtx_REG (cmpmode
, FLAGS_REG
);
8368 /* This is very simple, but making the interface the same as in the
8369 FP case makes the rest of the code easier. */
8370 tmp
= gen_rtx_COMPARE (cmpmode
, op0
, op1
);
8371 emit_insn (gen_rtx_SET (VOIDmode
, flags
, tmp
));
8373 /* Return the test that should be put into the flags user, i.e.
8374 the bcc, scc, or cmov instruction. */
8375 return gen_rtx_fmt_ee (code
, VOIDmode
, flags
, const0_rtx
);
8378 /* Figure out whether to use ordered or unordered fp comparisons.
8379 Return the appropriate mode to use. */
8382 ix86_fp_compare_mode (code
)
8383 enum rtx_code code ATTRIBUTE_UNUSED
;
8385 /* ??? In order to make all comparisons reversible, we do all comparisons
8386 non-trapping when compiling for IEEE. Once gcc is able to distinguish
8387 all forms trapping and nontrapping comparisons, we can make inequality
8388 comparisons trapping again, since it results in better code when using
8389 FCOM based compares. */
8390 return TARGET_IEEE_FP
? CCFPUmode
: CCFPmode
;
8394 ix86_cc_mode (code
, op0
, op1
)
8398 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_FLOAT
)
8399 return ix86_fp_compare_mode (code
);
8402 /* Only zero flag is needed. */
8404 case NE
: /* ZF!=0 */
8406 /* Codes needing carry flag. */
8407 case GEU
: /* CF=0 */
8408 case GTU
: /* CF=0 & ZF=0 */
8409 case LTU
: /* CF=1 */
8410 case LEU
: /* CF=1 | ZF=1 */
8412 /* Codes possibly doable only with sign flag when
8413 comparing against zero. */
8414 case GE
: /* SF=OF or SF=0 */
8415 case LT
: /* SF<>OF or SF=1 */
8416 if (op1
== const0_rtx
)
8419 /* For other cases Carry flag is not required. */
8421 /* Codes doable only with sign flag when comparing
8422 against zero, but we miss jump instruction for it
8423 so we need to use relational tests agains overflow
8424 that thus needs to be zero. */
8425 case GT
: /* ZF=0 & SF=OF */
8426 case LE
: /* ZF=1 | SF<>OF */
8427 if (op1
== const0_rtx
)
8431 /* strcmp pattern do (use flags) and combine may ask us for proper
8440 /* Return true if we should use an FCOMI instruction for this fp comparison. */
8443 ix86_use_fcomi_compare (code
)
8444 enum rtx_code code ATTRIBUTE_UNUSED
;
8446 enum rtx_code swapped_code
= swap_condition (code
);
8447 return ((ix86_fp_comparison_cost (code
) == ix86_fp_comparison_fcomi_cost (code
))
8448 || (ix86_fp_comparison_cost (swapped_code
)
8449 == ix86_fp_comparison_fcomi_cost (swapped_code
)));
8452 /* Swap, force into registers, or otherwise massage the two operands
8453 to a fp comparison. The operands are updated in place; the new
8454 comparsion code is returned. */
8456 static enum rtx_code
8457 ix86_prepare_fp_compare_args (code
, pop0
, pop1
)
8461 enum machine_mode fpcmp_mode
= ix86_fp_compare_mode (code
);
8462 rtx op0
= *pop0
, op1
= *pop1
;
8463 enum machine_mode op_mode
= GET_MODE (op0
);
8464 int is_sse
= SSE_REG_P (op0
) | SSE_REG_P (op1
);
8466 /* All of the unordered compare instructions only work on registers.
8467 The same is true of the XFmode compare instructions. The same is
8468 true of the fcomi compare instructions. */
8471 && (fpcmp_mode
== CCFPUmode
8472 || op_mode
== XFmode
8473 || op_mode
== TFmode
8474 || ix86_use_fcomi_compare (code
)))
8476 op0
= force_reg (op_mode
, op0
);
8477 op1
= force_reg (op_mode
, op1
);
8481 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
8482 things around if they appear profitable, otherwise force op0
8485 if (standard_80387_constant_p (op0
) == 0
8486 || (GET_CODE (op0
) == MEM
8487 && ! (standard_80387_constant_p (op1
) == 0
8488 || GET_CODE (op1
) == MEM
)))
8491 tmp
= op0
, op0
= op1
, op1
= tmp
;
8492 code
= swap_condition (code
);
8495 if (GET_CODE (op0
) != REG
)
8496 op0
= force_reg (op_mode
, op0
);
8498 if (CONSTANT_P (op1
))
8500 if (standard_80387_constant_p (op1
))
8501 op1
= force_reg (op_mode
, op1
);
8503 op1
= validize_mem (force_const_mem (op_mode
, op1
));
8507 /* Try to rearrange the comparison to make it cheaper. */
8508 if (ix86_fp_comparison_cost (code
)
8509 > ix86_fp_comparison_cost (swap_condition (code
))
8510 && (GET_CODE (op1
) == REG
|| !no_new_pseudos
))
8513 tmp
= op0
, op0
= op1
, op1
= tmp
;
8514 code
= swap_condition (code
);
8515 if (GET_CODE (op0
) != REG
)
8516 op0
= force_reg (op_mode
, op0
);
8524 /* Convert comparison codes we use to represent FP comparison to integer
8525 code that will result in proper branch. Return UNKNOWN if no such code
8527 static enum rtx_code
8528 ix86_fp_compare_code_to_integer (code
)
8558 /* Split comparison code CODE into comparisons we can do using branch
8559 instructions. BYPASS_CODE is comparison code for branch that will
8560 branch around FIRST_CODE and SECOND_CODE. If some of branches
8561 is not required, set value to NIL.
8562 We never require more than two branches. */
8564 ix86_fp_comparison_codes (code
, bypass_code
, first_code
, second_code
)
8565 enum rtx_code code
, *bypass_code
, *first_code
, *second_code
;
8571 /* The fcomi comparison sets flags as follows:
8581 case GT
: /* GTU - CF=0 & ZF=0 */
8582 case GE
: /* GEU - CF=0 */
8583 case ORDERED
: /* PF=0 */
8584 case UNORDERED
: /* PF=1 */
8585 case UNEQ
: /* EQ - ZF=1 */
8586 case UNLT
: /* LTU - CF=1 */
8587 case UNLE
: /* LEU - CF=1 | ZF=1 */
8588 case LTGT
: /* EQ - ZF=0 */
8590 case LT
: /* LTU - CF=1 - fails on unordered */
8592 *bypass_code
= UNORDERED
;
8594 case LE
: /* LEU - CF=1 | ZF=1 - fails on unordered */
8596 *bypass_code
= UNORDERED
;
8598 case EQ
: /* EQ - ZF=1 - fails on unordered */
8600 *bypass_code
= UNORDERED
;
8602 case NE
: /* NE - ZF=0 - fails on unordered */
8604 *second_code
= UNORDERED
;
8606 case UNGE
: /* GEU - CF=0 - fails on unordered */
8608 *second_code
= UNORDERED
;
8610 case UNGT
: /* GTU - CF=0 & ZF=0 - fails on unordered */
8612 *second_code
= UNORDERED
;
8617 if (!TARGET_IEEE_FP
)
8624 /* Return cost of comparison done fcom + arithmetics operations on AX.
8625 All following functions do use number of instructions as an cost metrics.
8626 In future this should be tweaked to compute bytes for optimize_size and
8627 take into account performance of various instructions on various CPUs. */
8629 ix86_fp_comparison_arithmetics_cost (code
)
8632 if (!TARGET_IEEE_FP
)
8634 /* The cost of code output by ix86_expand_fp_compare. */
8662 /* Return cost of comparison done using fcomi operation.
8663 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8665 ix86_fp_comparison_fcomi_cost (code
)
8668 enum rtx_code bypass_code
, first_code
, second_code
;
8669 /* Return arbitarily high cost when instruction is not supported - this
8670 prevents gcc from using it. */
8673 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
8674 return (bypass_code
!= NIL
|| second_code
!= NIL
) + 2;
8677 /* Return cost of comparison done using sahf operation.
8678 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8680 ix86_fp_comparison_sahf_cost (code
)
8683 enum rtx_code bypass_code
, first_code
, second_code
;
8684 /* Return arbitarily high cost when instruction is not preferred - this
8685 avoids gcc from using it. */
8686 if (!TARGET_USE_SAHF
&& !optimize_size
)
8688 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
8689 return (bypass_code
!= NIL
|| second_code
!= NIL
) + 3;
8692 /* Compute cost of the comparison done using any method.
8693 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8695 ix86_fp_comparison_cost (code
)
8698 int fcomi_cost
, sahf_cost
, arithmetics_cost
= 1024;
8701 fcomi_cost
= ix86_fp_comparison_fcomi_cost (code
);
8702 sahf_cost
= ix86_fp_comparison_sahf_cost (code
);
8704 min
= arithmetics_cost
= ix86_fp_comparison_arithmetics_cost (code
);
8705 if (min
> sahf_cost
)
8707 if (min
> fcomi_cost
)
8712 /* Generate insn patterns to do a floating point compare of OPERANDS. */
8715 ix86_expand_fp_compare (code
, op0
, op1
, scratch
, second_test
, bypass_test
)
8717 rtx op0
, op1
, scratch
;
8721 enum machine_mode fpcmp_mode
, intcmp_mode
;
8723 int cost
= ix86_fp_comparison_cost (code
);
8724 enum rtx_code bypass_code
, first_code
, second_code
;
8726 fpcmp_mode
= ix86_fp_compare_mode (code
);
8727 code
= ix86_prepare_fp_compare_args (code
, &op0
, &op1
);
8730 *second_test
= NULL_RTX
;
8732 *bypass_test
= NULL_RTX
;
8734 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
8736 /* Do fcomi/sahf based test when profitable. */
8737 if ((bypass_code
== NIL
|| bypass_test
)
8738 && (second_code
== NIL
|| second_test
)
8739 && ix86_fp_comparison_arithmetics_cost (code
) > cost
)
8743 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
8744 tmp
= gen_rtx_SET (VOIDmode
, gen_rtx_REG (fpcmp_mode
, FLAGS_REG
),
8750 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
8751 tmp2
= gen_rtx_UNSPEC (HImode
, gen_rtvec (1, tmp
), UNSPEC_FNSTSW
);
8753 scratch
= gen_reg_rtx (HImode
);
8754 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, tmp2
));
8755 emit_insn (gen_x86_sahf_1 (scratch
));
8758 /* The FP codes work out to act like unsigned. */
8759 intcmp_mode
= fpcmp_mode
;
8761 if (bypass_code
!= NIL
)
8762 *bypass_test
= gen_rtx_fmt_ee (bypass_code
, VOIDmode
,
8763 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
8765 if (second_code
!= NIL
)
8766 *second_test
= gen_rtx_fmt_ee (second_code
, VOIDmode
,
8767 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
8772 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
8773 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
8774 tmp2
= gen_rtx_UNSPEC (HImode
, gen_rtvec (1, tmp
), UNSPEC_FNSTSW
);
8776 scratch
= gen_reg_rtx (HImode
);
8777 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, tmp2
));
8779 /* In the unordered case, we have to check C2 for NaN's, which
8780 doesn't happen to work out to anything nice combination-wise.
8781 So do some bit twiddling on the value we've got in AH to come
8782 up with an appropriate set of condition codes. */
8784 intcmp_mode
= CCNOmode
;
8789 if (code
== GT
|| !TARGET_IEEE_FP
)
8791 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
8796 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
8797 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
8798 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x44)));
8799 intcmp_mode
= CCmode
;
8805 if (code
== LT
&& TARGET_IEEE_FP
)
8807 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
8808 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x01)));
8809 intcmp_mode
= CCmode
;
8814 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x01)));
8820 if (code
== GE
|| !TARGET_IEEE_FP
)
8822 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x05)));
8827 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
8828 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
,
8835 if (code
== LE
&& TARGET_IEEE_FP
)
8837 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
8838 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
8839 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
8840 intcmp_mode
= CCmode
;
8845 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
8851 if (code
== EQ
&& TARGET_IEEE_FP
)
8853 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
8854 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
8855 intcmp_mode
= CCmode
;
8860 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
8867 if (code
== NE
&& TARGET_IEEE_FP
)
8869 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
8870 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
,
8876 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
8882 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
8886 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
8895 /* Return the test that should be put into the flags user, i.e.
8896 the bcc, scc, or cmov instruction. */
8897 return gen_rtx_fmt_ee (code
, VOIDmode
,
8898 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
8903 ix86_expand_compare (code
, second_test
, bypass_test
)
8905 rtx
*second_test
, *bypass_test
;
8908 op0
= ix86_compare_op0
;
8909 op1
= ix86_compare_op1
;
8912 *second_test
= NULL_RTX
;
8914 *bypass_test
= NULL_RTX
;
8916 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_FLOAT
)
8917 ret
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
,
8918 second_test
, bypass_test
);
8920 ret
= ix86_expand_int_compare (code
, op0
, op1
);
8925 /* Return true if the CODE will result in nontrivial jump sequence. */
8927 ix86_fp_jump_nontrivial_p (code
)
8930 enum rtx_code bypass_code
, first_code
, second_code
;
8933 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
8934 return bypass_code
!= NIL
|| second_code
!= NIL
;
8938 ix86_expand_branch (code
, label
)
8944 switch (GET_MODE (ix86_compare_op0
))
8950 tmp
= ix86_expand_compare (code
, NULL
, NULL
);
8951 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
8952 gen_rtx_LABEL_REF (VOIDmode
, label
),
8954 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
8964 enum rtx_code bypass_code
, first_code
, second_code
;
8966 code
= ix86_prepare_fp_compare_args (code
, &ix86_compare_op0
,
8969 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
8971 /* Check whether we will use the natural sequence with one jump. If
8972 so, we can expand jump early. Otherwise delay expansion by
8973 creating compound insn to not confuse optimizers. */
8974 if (bypass_code
== NIL
&& second_code
== NIL
8977 ix86_split_fp_branch (code
, ix86_compare_op0
, ix86_compare_op1
,
8978 gen_rtx_LABEL_REF (VOIDmode
, label
),
8983 tmp
= gen_rtx_fmt_ee (code
, VOIDmode
,
8984 ix86_compare_op0
, ix86_compare_op1
);
8985 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
8986 gen_rtx_LABEL_REF (VOIDmode
, label
),
8988 tmp
= gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
);
8990 use_fcomi
= ix86_use_fcomi_compare (code
);
8991 vec
= rtvec_alloc (3 + !use_fcomi
);
8992 RTVEC_ELT (vec
, 0) = tmp
;
8994 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCFPmode
, 18));
8996 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCFPmode
, 17));
8999 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (HImode
));
9001 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode
, vec
));
9009 /* Expand DImode branch into multiple compare+branch. */
9011 rtx lo
[2], hi
[2], label2
;
9012 enum rtx_code code1
, code2
, code3
;
9014 if (CONSTANT_P (ix86_compare_op0
) && ! CONSTANT_P (ix86_compare_op1
))
9016 tmp
= ix86_compare_op0
;
9017 ix86_compare_op0
= ix86_compare_op1
;
9018 ix86_compare_op1
= tmp
;
9019 code
= swap_condition (code
);
9021 split_di (&ix86_compare_op0
, 1, lo
+0, hi
+0);
9022 split_di (&ix86_compare_op1
, 1, lo
+1, hi
+1);
9024 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
9025 avoid two branches. This costs one extra insn, so disable when
9026 optimizing for size. */
9028 if ((code
== EQ
|| code
== NE
)
9030 || hi
[1] == const0_rtx
|| lo
[1] == const0_rtx
))
9035 if (hi
[1] != const0_rtx
)
9036 xor1
= expand_binop (SImode
, xor_optab
, xor1
, hi
[1],
9037 NULL_RTX
, 0, OPTAB_WIDEN
);
9040 if (lo
[1] != const0_rtx
)
9041 xor0
= expand_binop (SImode
, xor_optab
, xor0
, lo
[1],
9042 NULL_RTX
, 0, OPTAB_WIDEN
);
9044 tmp
= expand_binop (SImode
, ior_optab
, xor1
, xor0
,
9045 NULL_RTX
, 0, OPTAB_WIDEN
);
9047 ix86_compare_op0
= tmp
;
9048 ix86_compare_op1
= const0_rtx
;
9049 ix86_expand_branch (code
, label
);
9053 /* Otherwise, if we are doing less-than or greater-or-equal-than,
9054 op1 is a constant and the low word is zero, then we can just
9055 examine the high word. */
9057 if (GET_CODE (hi
[1]) == CONST_INT
&& lo
[1] == const0_rtx
)
9060 case LT
: case LTU
: case GE
: case GEU
:
9061 ix86_compare_op0
= hi
[0];
9062 ix86_compare_op1
= hi
[1];
9063 ix86_expand_branch (code
, label
);
9069 /* Otherwise, we need two or three jumps. */
9071 label2
= gen_label_rtx ();
9074 code2
= swap_condition (code
);
9075 code3
= unsigned_condition (code
);
9079 case LT
: case GT
: case LTU
: case GTU
:
9082 case LE
: code1
= LT
; code2
= GT
; break;
9083 case GE
: code1
= GT
; code2
= LT
; break;
9084 case LEU
: code1
= LTU
; code2
= GTU
; break;
9085 case GEU
: code1
= GTU
; code2
= LTU
; break;
9087 case EQ
: code1
= NIL
; code2
= NE
; break;
9088 case NE
: code2
= NIL
; break;
9096 * if (hi(a) < hi(b)) goto true;
9097 * if (hi(a) > hi(b)) goto false;
9098 * if (lo(a) < lo(b)) goto true;
9102 ix86_compare_op0
= hi
[0];
9103 ix86_compare_op1
= hi
[1];
9106 ix86_expand_branch (code1
, label
);
9108 ix86_expand_branch (code2
, label2
);
9110 ix86_compare_op0
= lo
[0];
9111 ix86_compare_op1
= lo
[1];
9112 ix86_expand_branch (code3
, label
);
9115 emit_label (label2
);
9124 /* Split branch based on floating point condition. */
9126 ix86_split_fp_branch (code
, op1
, op2
, target1
, target2
, tmp
)
9128 rtx op1
, op2
, target1
, target2
, tmp
;
9131 rtx label
= NULL_RTX
;
9133 int bypass_probability
= -1, second_probability
= -1, probability
= -1;
9136 if (target2
!= pc_rtx
)
9139 code
= reverse_condition_maybe_unordered (code
);
9144 condition
= ix86_expand_fp_compare (code
, op1
, op2
,
9145 tmp
, &second
, &bypass
);
9147 if (split_branch_probability
>= 0)
9149 /* Distribute the probabilities across the jumps.
9150 Assume the BYPASS and SECOND to be always test
9152 probability
= split_branch_probability
;
9154 /* Value of 1 is low enough to make no need for probability
9155 to be updated. Later we may run some experiments and see
9156 if unordered values are more frequent in practice. */
9158 bypass_probability
= 1;
9160 second_probability
= 1;
9162 if (bypass
!= NULL_RTX
)
9164 label
= gen_label_rtx ();
9165 i
= emit_jump_insn (gen_rtx_SET
9167 gen_rtx_IF_THEN_ELSE (VOIDmode
,
9169 gen_rtx_LABEL_REF (VOIDmode
,
9172 if (bypass_probability
>= 0)
9174 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
9175 GEN_INT (bypass_probability
),
9178 i
= emit_jump_insn (gen_rtx_SET
9180 gen_rtx_IF_THEN_ELSE (VOIDmode
,
9181 condition
, target1
, target2
)));
9182 if (probability
>= 0)
9184 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
9185 GEN_INT (probability
),
9187 if (second
!= NULL_RTX
)
9189 i
= emit_jump_insn (gen_rtx_SET
9191 gen_rtx_IF_THEN_ELSE (VOIDmode
, second
, target1
,
9193 if (second_probability
>= 0)
9195 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
9196 GEN_INT (second_probability
),
9199 if (label
!= NULL_RTX
)
9204 ix86_expand_setcc (code
, dest
)
9208 rtx ret
, tmp
, tmpreg
;
9209 rtx second_test
, bypass_test
;
9211 if (GET_MODE (ix86_compare_op0
) == DImode
9213 return 0; /* FAIL */
9215 if (GET_MODE (dest
) != QImode
)
9218 ret
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
9219 PUT_MODE (ret
, QImode
);
9224 emit_insn (gen_rtx_SET (VOIDmode
, tmp
, ret
));
9225 if (bypass_test
|| second_test
)
9227 rtx test
= second_test
;
9229 rtx tmp2
= gen_reg_rtx (QImode
);
9236 PUT_CODE (test
, reverse_condition_maybe_unordered (GET_CODE (test
)));
9238 PUT_MODE (test
, QImode
);
9239 emit_insn (gen_rtx_SET (VOIDmode
, tmp2
, test
));
9242 emit_insn (gen_andqi3 (tmp
, tmpreg
, tmp2
));
9244 emit_insn (gen_iorqi3 (tmp
, tmpreg
, tmp2
));
9247 return 1; /* DONE */
9251 ix86_expand_int_movcc (operands
)
9254 enum rtx_code code
= GET_CODE (operands
[1]), compare_code
;
9255 rtx compare_seq
, compare_op
;
9256 rtx second_test
, bypass_test
;
9257 enum machine_mode mode
= GET_MODE (operands
[0]);
9259 /* When the compare code is not LTU or GEU, we can not use sbbl case.
9260 In case comparsion is done with immediate, we can convert it to LTU or
9261 GEU by altering the integer. */
9263 if ((code
== LEU
|| code
== GTU
)
9264 && GET_CODE (ix86_compare_op1
) == CONST_INT
9266 && INTVAL (ix86_compare_op1
) != -1
9267 /* For x86-64, the immediate field in the instruction is 32-bit
9268 signed, so we can't increment a DImode value above 0x7fffffff. */
9270 || GET_MODE (ix86_compare_op0
) != DImode
9271 || INTVAL (ix86_compare_op1
) != 0x7fffffff)
9272 && GET_CODE (operands
[2]) == CONST_INT
9273 && GET_CODE (operands
[3]) == CONST_INT
)
9279 ix86_compare_op1
= gen_int_mode (INTVAL (ix86_compare_op1
) + 1,
9280 GET_MODE (ix86_compare_op0
));
9284 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
9285 compare_seq
= get_insns ();
9288 compare_code
= GET_CODE (compare_op
);
9290 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
9291 HImode insns, we'd be swallowed in word prefix ops. */
9294 && (mode
!= DImode
|| TARGET_64BIT
)
9295 && GET_CODE (operands
[2]) == CONST_INT
9296 && GET_CODE (operands
[3]) == CONST_INT
)
9298 rtx out
= operands
[0];
9299 HOST_WIDE_INT ct
= INTVAL (operands
[2]);
9300 HOST_WIDE_INT cf
= INTVAL (operands
[3]);
9303 if ((compare_code
== LTU
|| compare_code
== GEU
)
9304 && !second_test
&& !bypass_test
)
9306 /* Detect overlap between destination and compare sources. */
9309 /* To simplify rest of code, restrict to the GEU case. */
9310 if (compare_code
== LTU
)
9312 HOST_WIDE_INT tmp
= ct
;
9315 compare_code
= reverse_condition (compare_code
);
9316 code
= reverse_condition (code
);
9320 if (reg_overlap_mentioned_p (out
, ix86_compare_op0
)
9321 || reg_overlap_mentioned_p (out
, ix86_compare_op1
))
9322 tmp
= gen_reg_rtx (mode
);
9324 emit_insn (compare_seq
);
9326 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp
));
9328 emit_insn (gen_x86_movsicc_0_m1 (tmp
));
9340 tmp
= expand_simple_binop (mode
, PLUS
,
9342 tmp
, 1, OPTAB_DIRECT
);
9353 tmp
= expand_simple_binop (mode
, IOR
,
9355 tmp
, 1, OPTAB_DIRECT
);
9357 else if (diff
== -1 && ct
)
9367 tmp
= expand_simple_unop (mode
, NOT
, tmp
, tmp
, 1);
9369 tmp
= expand_simple_binop (mode
, PLUS
,
9371 tmp
, 1, OPTAB_DIRECT
);
9379 * andl cf - ct, dest
9389 tmp
= expand_simple_unop (mode
, NOT
, tmp
, tmp
, 1);
9392 tmp
= expand_simple_binop (mode
, AND
,
9394 gen_int_mode (cf
- ct
, mode
),
9395 tmp
, 1, OPTAB_DIRECT
);
9397 tmp
= expand_simple_binop (mode
, PLUS
,
9399 tmp
, 1, OPTAB_DIRECT
);
9403 emit_move_insn (out
, tmp
);
9405 return 1; /* DONE */
9412 tmp
= ct
, ct
= cf
, cf
= tmp
;
9414 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0
)))
9416 /* We may be reversing unordered compare to normal compare, that
9417 is not valid in general (we may convert non-trapping condition
9418 to trapping one), however on i386 we currently emit all
9419 comparisons unordered. */
9420 compare_code
= reverse_condition_maybe_unordered (compare_code
);
9421 code
= reverse_condition_maybe_unordered (code
);
9425 compare_code
= reverse_condition (compare_code
);
9426 code
= reverse_condition (code
);
9431 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0
)) == MODE_INT
9432 && GET_CODE (ix86_compare_op1
) == CONST_INT
)
9434 if (ix86_compare_op1
== const0_rtx
9435 && (code
== LT
|| code
== GE
))
9436 compare_code
= code
;
9437 else if (ix86_compare_op1
== constm1_rtx
)
9441 else if (code
== GT
)
9446 /* Optimize dest = (op0 < 0) ? -1 : cf. */
9447 if (compare_code
!= NIL
9448 && GET_MODE (ix86_compare_op0
) == GET_MODE (out
)
9449 && (cf
== -1 || ct
== -1))
9451 /* If lea code below could be used, only optimize
9452 if it results in a 2 insn sequence. */
9454 if (! (diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
9455 || diff
== 3 || diff
== 5 || diff
== 9)
9456 || (compare_code
== LT
&& ct
== -1)
9457 || (compare_code
== GE
&& cf
== -1))
9460 * notl op1 (if necessary)
9468 code
= reverse_condition (code
);
9471 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
9472 ix86_compare_op1
, VOIDmode
, 0, -1);
9474 out
= expand_simple_binop (mode
, IOR
,
9476 out
, 1, OPTAB_DIRECT
);
9477 if (out
!= operands
[0])
9478 emit_move_insn (operands
[0], out
);
9480 return 1; /* DONE */
9484 if ((diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
9485 || diff
== 3 || diff
== 5 || diff
== 9)
9486 && (mode
!= DImode
|| x86_64_sign_extended_value (GEN_INT (cf
))))
9492 * lea cf(dest*(ct-cf)),dest
9496 * This also catches the degenerate setcc-only case.
9502 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
9503 ix86_compare_op1
, VOIDmode
, 0, 1);
9506 /* On x86_64 the lea instruction operates on Pmode, so we need
9507 to get arithmetics done in proper mode to match. */
9509 tmp
= copy_rtx (out
);
9513 out1
= copy_rtx (out
);
9514 tmp
= gen_rtx_MULT (mode
, out1
, GEN_INT (diff
& ~1));
9518 tmp
= gen_rtx_PLUS (mode
, tmp
, out1
);
9524 tmp
= gen_rtx_PLUS (mode
, tmp
, GEN_INT (cf
));
9528 && (GET_CODE (tmp
) != SUBREG
|| SUBREG_REG (tmp
) != out
))
9531 out
= force_operand (tmp
, copy_rtx (out
));
9533 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (out
), copy_rtx (tmp
)));
9535 if (out
!= operands
[0])
9536 emit_move_insn (operands
[0], copy_rtx (out
));
9538 return 1; /* DONE */
9542 * General case: Jumpful:
9543 * xorl dest,dest cmpl op1, op2
9544 * cmpl op1, op2 movl ct, dest
9546 * decl dest movl cf, dest
9547 * andl (cf-ct),dest 1:
9552 * This is reasonably steep, but branch mispredict costs are
9553 * high on modern cpus, so consider failing only if optimizing
9556 * %%% Parameterize branch_cost on the tuning architecture, then
9557 * use that. The 80386 couldn't care less about mispredicts.
9560 if (!optimize_size
&& !TARGET_CMOVE
)
9566 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0
)))
9567 /* We may be reversing unordered compare to normal compare,
9568 that is not valid in general (we may convert non-trapping
9569 condition to trapping one), however on i386 we currently
9570 emit all comparisons unordered. */
9571 code
= reverse_condition_maybe_unordered (code
);
9574 code
= reverse_condition (code
);
9575 if (compare_code
!= NIL
)
9576 compare_code
= reverse_condition (compare_code
);
9580 if (compare_code
!= NIL
)
9582 /* notl op1 (if needed)
9587 For x < 0 (resp. x <= -1) there will be no notl,
9588 so if possible swap the constants to get rid of the
9590 True/false will be -1/0 while code below (store flag
9591 followed by decrement) is 0/-1, so the constants need
9592 to be exchanged once more. */
9594 if (compare_code
== GE
|| !cf
)
9596 code
= reverse_condition (code
);
9601 HOST_WIDE_INT tmp
= cf
;
9606 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
9607 ix86_compare_op1
, VOIDmode
, 0, -1);
9611 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
9612 ix86_compare_op1
, VOIDmode
, 0, 1);
9614 out
= expand_simple_binop (mode
, PLUS
, out
, constm1_rtx
,
9615 out
, 1, OPTAB_DIRECT
);
9618 out
= expand_simple_binop (mode
, AND
, out
,
9619 gen_int_mode (cf
- ct
, mode
),
9620 out
, 1, OPTAB_DIRECT
);
9622 out
= expand_simple_binop (mode
, PLUS
, out
, GEN_INT (ct
),
9623 out
, 1, OPTAB_DIRECT
);
9624 if (out
!= operands
[0])
9625 emit_move_insn (operands
[0], out
);
9627 return 1; /* DONE */
9633 /* Try a few things more with specific constants and a variable. */
9636 rtx var
, orig_out
, out
, tmp
;
9639 return 0; /* FAIL */
9641 /* If one of the two operands is an interesting constant, load a
9642 constant with the above and mask it in with a logical operation. */
9644 if (GET_CODE (operands
[2]) == CONST_INT
)
9647 if (INTVAL (operands
[2]) == 0)
9648 operands
[3] = constm1_rtx
, op
= and_optab
;
9649 else if (INTVAL (operands
[2]) == -1)
9650 operands
[3] = const0_rtx
, op
= ior_optab
;
9652 return 0; /* FAIL */
9654 else if (GET_CODE (operands
[3]) == CONST_INT
)
9657 if (INTVAL (operands
[3]) == 0)
9658 operands
[2] = constm1_rtx
, op
= and_optab
;
9659 else if (INTVAL (operands
[3]) == -1)
9660 operands
[2] = const0_rtx
, op
= ior_optab
;
9662 return 0; /* FAIL */
9665 return 0; /* FAIL */
9667 orig_out
= operands
[0];
9668 tmp
= gen_reg_rtx (mode
);
9671 /* Recurse to get the constant loaded. */
9672 if (ix86_expand_int_movcc (operands
) == 0)
9673 return 0; /* FAIL */
9675 /* Mask in the interesting variable. */
9676 out
= expand_binop (mode
, op
, var
, tmp
, orig_out
, 0,
9678 if (out
!= orig_out
)
9679 emit_move_insn (orig_out
, out
);
9681 return 1; /* DONE */
9685 * For comparison with above,
9695 if (! nonimmediate_operand (operands
[2], mode
))
9696 operands
[2] = force_reg (mode
, operands
[2]);
9697 if (! nonimmediate_operand (operands
[3], mode
))
9698 operands
[3] = force_reg (mode
, operands
[3]);
9700 if (bypass_test
&& reg_overlap_mentioned_p (operands
[0], operands
[3]))
9702 rtx tmp
= gen_reg_rtx (mode
);
9703 emit_move_insn (tmp
, operands
[3]);
9706 if (second_test
&& reg_overlap_mentioned_p (operands
[0], operands
[2]))
9708 rtx tmp
= gen_reg_rtx (mode
);
9709 emit_move_insn (tmp
, operands
[2]);
9712 if (! register_operand (operands
[2], VOIDmode
)
9713 && ! register_operand (operands
[3], VOIDmode
))
9714 operands
[2] = force_reg (mode
, operands
[2]);
9716 emit_insn (compare_seq
);
9717 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
9718 gen_rtx_IF_THEN_ELSE (mode
,
9719 compare_op
, operands
[2],
9722 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
9723 gen_rtx_IF_THEN_ELSE (mode
,
9728 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
9729 gen_rtx_IF_THEN_ELSE (mode
,
9734 return 1; /* DONE */
9738 ix86_expand_fp_movcc (operands
)
9743 rtx compare_op
, second_test
, bypass_test
;
9745 /* For SF/DFmode conditional moves based on comparisons
9746 in same mode, we may want to use SSE min/max instructions. */
9747 if (((TARGET_SSE_MATH
&& GET_MODE (operands
[0]) == SFmode
)
9748 || (TARGET_SSE2
&& TARGET_SSE_MATH
&& GET_MODE (operands
[0]) == DFmode
))
9749 && GET_MODE (ix86_compare_op0
) == GET_MODE (operands
[0])
9750 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
9752 || (GET_CODE (operands
[1]) != LTGT
&& GET_CODE (operands
[1]) != UNEQ
))
9753 /* We may be called from the post-reload splitter. */
9754 && (!REG_P (operands
[0])
9755 || SSE_REG_P (operands
[0])
9756 || REGNO (operands
[0]) >= FIRST_PSEUDO_REGISTER
))
9758 rtx op0
= ix86_compare_op0
, op1
= ix86_compare_op1
;
9759 code
= GET_CODE (operands
[1]);
9761 /* See if we have (cross) match between comparison operands and
9762 conditional move operands. */
9763 if (rtx_equal_p (operands
[2], op1
))
9768 code
= reverse_condition_maybe_unordered (code
);
9770 if (rtx_equal_p (operands
[2], op0
) && rtx_equal_p (operands
[3], op1
))
9772 /* Check for min operation. */
9775 operands
[0] = force_reg (GET_MODE (operands
[0]), operands
[0]);
9776 if (memory_operand (op0
, VOIDmode
))
9777 op0
= force_reg (GET_MODE (operands
[0]), op0
);
9778 if (GET_MODE (operands
[0]) == SFmode
)
9779 emit_insn (gen_minsf3 (operands
[0], op0
, op1
));
9781 emit_insn (gen_mindf3 (operands
[0], op0
, op1
));
9784 /* Check for max operation. */
9787 operands
[0] = force_reg (GET_MODE (operands
[0]), operands
[0]);
9788 if (memory_operand (op0
, VOIDmode
))
9789 op0
= force_reg (GET_MODE (operands
[0]), op0
);
9790 if (GET_MODE (operands
[0]) == SFmode
)
9791 emit_insn (gen_maxsf3 (operands
[0], op0
, op1
));
9793 emit_insn (gen_maxdf3 (operands
[0], op0
, op1
));
9797 /* Manage condition to be sse_comparison_operator. In case we are
9798 in non-ieee mode, try to canonicalize the destination operand
9799 to be first in the comparison - this helps reload to avoid extra
9801 if (!sse_comparison_operator (operands
[1], VOIDmode
)
9802 || (rtx_equal_p (operands
[0], ix86_compare_op1
) && !TARGET_IEEE_FP
))
9804 rtx tmp
= ix86_compare_op0
;
9805 ix86_compare_op0
= ix86_compare_op1
;
9806 ix86_compare_op1
= tmp
;
9807 operands
[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands
[1])),
9808 VOIDmode
, ix86_compare_op0
,
9811 /* Similary try to manage result to be first operand of conditional
9812 move. We also don't support the NE comparison on SSE, so try to
9814 if ((rtx_equal_p (operands
[0], operands
[3])
9815 && (!TARGET_IEEE_FP
|| GET_CODE (operands
[1]) != EQ
))
9816 || (GET_CODE (operands
[1]) == NE
&& TARGET_IEEE_FP
))
9818 rtx tmp
= operands
[2];
9819 operands
[2] = operands
[3];
9821 operands
[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
9822 (GET_CODE (operands
[1])),
9823 VOIDmode
, ix86_compare_op0
,
9826 if (GET_MODE (operands
[0]) == SFmode
)
9827 emit_insn (gen_sse_movsfcc (operands
[0], operands
[1],
9828 operands
[2], operands
[3],
9829 ix86_compare_op0
, ix86_compare_op1
));
9831 emit_insn (gen_sse_movdfcc (operands
[0], operands
[1],
9832 operands
[2], operands
[3],
9833 ix86_compare_op0
, ix86_compare_op1
));
9837 /* The floating point conditional move instructions don't directly
9838 support conditions resulting from a signed integer comparison. */
9840 code
= GET_CODE (operands
[1]);
9841 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
9843 /* The floating point conditional move instructions don't directly
9844 support signed integer comparisons. */
9846 if (!fcmov_comparison_operator (compare_op
, VOIDmode
))
9848 if (second_test
!= NULL
|| bypass_test
!= NULL
)
9850 tmp
= gen_reg_rtx (QImode
);
9851 ix86_expand_setcc (code
, tmp
);
9853 ix86_compare_op0
= tmp
;
9854 ix86_compare_op1
= const0_rtx
;
9855 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
9857 if (bypass_test
&& reg_overlap_mentioned_p (operands
[0], operands
[3]))
9859 tmp
= gen_reg_rtx (GET_MODE (operands
[0]));
9860 emit_move_insn (tmp
, operands
[3]);
9863 if (second_test
&& reg_overlap_mentioned_p (operands
[0], operands
[2]))
9865 tmp
= gen_reg_rtx (GET_MODE (operands
[0]));
9866 emit_move_insn (tmp
, operands
[2]);
9870 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
9871 gen_rtx_IF_THEN_ELSE (GET_MODE (operands
[0]),
9876 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
9877 gen_rtx_IF_THEN_ELSE (GET_MODE (operands
[0]),
9882 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
9883 gen_rtx_IF_THEN_ELSE (GET_MODE (operands
[0]),
9891 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
9892 works for floating pointer parameters and nonoffsetable memories.
9893 For pushes, it returns just stack offsets; the values will be saved
9894 in the right order. Maximally three parts are generated. */
9897 ix86_split_to_parts (operand
, parts
, mode
)
9900 enum machine_mode mode
;
9905 size
= mode
== TFmode
? 3 : (GET_MODE_SIZE (mode
) / 4);
9907 size
= (GET_MODE_SIZE (mode
) + 4) / 8;
9909 if (GET_CODE (operand
) == REG
&& MMX_REGNO_P (REGNO (operand
)))
9911 if (size
< 2 || size
> 3)
9914 /* Optimize constant pool reference to immediates. This is used by fp
9915 moves, that force all constants to memory to allow combining. */
9916 if (GET_CODE (operand
) == MEM
&& RTX_UNCHANGING_P (operand
))
9918 rtx tmp
= maybe_get_pool_constant (operand
);
9923 if (GET_CODE (operand
) == MEM
&& !offsettable_memref_p (operand
))
9925 /* The only non-offsetable memories we handle are pushes. */
9926 if (! push_operand (operand
, VOIDmode
))
9929 operand
= copy_rtx (operand
);
9930 PUT_MODE (operand
, Pmode
);
9931 parts
[0] = parts
[1] = parts
[2] = operand
;
9933 else if (!TARGET_64BIT
)
9936 split_di (&operand
, 1, &parts
[0], &parts
[1]);
9939 if (REG_P (operand
))
9941 if (!reload_completed
)
9943 parts
[0] = gen_rtx_REG (SImode
, REGNO (operand
) + 0);
9944 parts
[1] = gen_rtx_REG (SImode
, REGNO (operand
) + 1);
9946 parts
[2] = gen_rtx_REG (SImode
, REGNO (operand
) + 2);
9948 else if (offsettable_memref_p (operand
))
9950 operand
= adjust_address (operand
, SImode
, 0);
9952 parts
[1] = adjust_address (operand
, SImode
, 4);
9954 parts
[2] = adjust_address (operand
, SImode
, 8);
9956 else if (GET_CODE (operand
) == CONST_DOUBLE
)
9961 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
9966 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r
, l
);
9967 parts
[2] = gen_int_mode (l
[2], SImode
);
9970 REAL_VALUE_TO_TARGET_DOUBLE (r
, l
);
9975 parts
[1] = gen_int_mode (l
[1], SImode
);
9976 parts
[0] = gen_int_mode (l
[0], SImode
);
9985 split_ti (&operand
, 1, &parts
[0], &parts
[1]);
9986 if (mode
== XFmode
|| mode
== TFmode
)
9988 if (REG_P (operand
))
9990 if (!reload_completed
)
9992 parts
[0] = gen_rtx_REG (DImode
, REGNO (operand
) + 0);
9993 parts
[1] = gen_rtx_REG (SImode
, REGNO (operand
) + 1);
9995 else if (offsettable_memref_p (operand
))
9997 operand
= adjust_address (operand
, DImode
, 0);
9999 parts
[1] = adjust_address (operand
, SImode
, 8);
10001 else if (GET_CODE (operand
) == CONST_DOUBLE
)
10006 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
10007 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r
, l
);
10008 /* Do not use shift by 32 to avoid warning on 32bit systems. */
10009 if (HOST_BITS_PER_WIDE_INT
>= 64)
10012 ((l
[0] & (((HOST_WIDE_INT
) 2 << 31) - 1))
10013 + ((((HOST_WIDE_INT
) l
[1]) << 31) << 1),
10016 parts
[0] = immed_double_const (l
[0], l
[1], DImode
);
10017 parts
[1] = gen_int_mode (l
[2], SImode
);
10027 /* Emit insns to perform a move or push of DI, DF, and XF values.
10028 Return false when normal moves are needed; true when all required
10029 insns have been emitted. Operands 2-4 contain the input values
10030 int the correct order; operands 5-7 contain the output values. */
10033 ix86_split_long_move (operands
)
10039 int collisions
= 0;
10040 enum machine_mode mode
= GET_MODE (operands
[0]);
10042 /* The DFmode expanders may ask us to move double.
10043 For 64bit target this is single move. By hiding the fact
10044 here we simplify i386.md splitters. */
10045 if (GET_MODE_SIZE (GET_MODE (operands
[0])) == 8 && TARGET_64BIT
)
10047 /* Optimize constant pool reference to immediates. This is used by
10048 fp moves, that force all constants to memory to allow combining. */
10050 if (GET_CODE (operands
[1]) == MEM
10051 && GET_CODE (XEXP (operands
[1], 0)) == SYMBOL_REF
10052 && CONSTANT_POOL_ADDRESS_P (XEXP (operands
[1], 0)))
10053 operands
[1] = get_pool_constant (XEXP (operands
[1], 0));
10054 if (push_operand (operands
[0], VOIDmode
))
10056 operands
[0] = copy_rtx (operands
[0]);
10057 PUT_MODE (operands
[0], Pmode
);
10060 operands
[0] = gen_lowpart (DImode
, operands
[0]);
10061 operands
[1] = gen_lowpart (DImode
, operands
[1]);
10062 emit_move_insn (operands
[0], operands
[1]);
10066 /* The only non-offsettable memory we handle is push. */
10067 if (push_operand (operands
[0], VOIDmode
))
10069 else if (GET_CODE (operands
[0]) == MEM
10070 && ! offsettable_memref_p (operands
[0]))
10073 nparts
= ix86_split_to_parts (operands
[1], part
[1], GET_MODE (operands
[0]));
10074 ix86_split_to_parts (operands
[0], part
[0], GET_MODE (operands
[0]));
10076 /* When emitting push, take care for source operands on the stack. */
10077 if (push
&& GET_CODE (operands
[1]) == MEM
10078 && reg_overlap_mentioned_p (stack_pointer_rtx
, operands
[1]))
10081 part
[1][1] = change_address (part
[1][1], GET_MODE (part
[1][1]),
10082 XEXP (part
[1][2], 0));
10083 part
[1][0] = change_address (part
[1][0], GET_MODE (part
[1][0]),
10084 XEXP (part
[1][1], 0));
10087 /* We need to do copy in the right order in case an address register
10088 of the source overlaps the destination. */
10089 if (REG_P (part
[0][0]) && GET_CODE (part
[1][0]) == MEM
)
10091 if (reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0)))
10093 if (reg_overlap_mentioned_p (part
[0][1], XEXP (part
[1][0], 0)))
10096 && reg_overlap_mentioned_p (part
[0][2], XEXP (part
[1][0], 0)))
10099 /* Collision in the middle part can be handled by reordering. */
10100 if (collisions
== 1 && nparts
== 3
10101 && reg_overlap_mentioned_p (part
[0][1], XEXP (part
[1][0], 0)))
10104 tmp
= part
[0][1]; part
[0][1] = part
[0][2]; part
[0][2] = tmp
;
10105 tmp
= part
[1][1]; part
[1][1] = part
[1][2]; part
[1][2] = tmp
;
10108 /* If there are more collisions, we can't handle it by reordering.
10109 Do an lea to the last part and use only one colliding move. */
10110 else if (collisions
> 1)
10116 base
= part
[0][nparts
- 1];
10118 /* Handle the case when the last part isn't valid for lea.
10119 Happens in 64-bit mode storing the 12-byte XFmode. */
10120 if (GET_MODE (base
) != Pmode
)
10121 base
= gen_rtx_REG (Pmode
, REGNO (base
));
10123 emit_insn (gen_rtx_SET (VOIDmode
, base
, XEXP (part
[1][0], 0)));
10124 part
[1][0] = replace_equiv_address (part
[1][0], base
);
10125 part
[1][1] = replace_equiv_address (part
[1][1],
10126 plus_constant (base
, UNITS_PER_WORD
));
10128 part
[1][2] = replace_equiv_address (part
[1][2],
10129 plus_constant (base
, 8));
10139 /* We use only first 12 bytes of TFmode value, but for pushing we
10140 are required to adjust stack as if we were pushing real 16byte
10142 if (mode
== TFmode
&& !TARGET_64BIT
)
10143 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
10145 emit_move_insn (part
[0][2], part
[1][2]);
10150 /* In 64bit mode we don't have 32bit push available. In case this is
10151 register, it is OK - we will just use larger counterpart. We also
10152 retype memory - these comes from attempt to avoid REX prefix on
10153 moving of second half of TFmode value. */
10154 if (GET_MODE (part
[1][1]) == SImode
)
10156 if (GET_CODE (part
[1][1]) == MEM
)
10157 part
[1][1] = adjust_address (part
[1][1], DImode
, 0);
10158 else if (REG_P (part
[1][1]))
10159 part
[1][1] = gen_rtx_REG (DImode
, REGNO (part
[1][1]));
10162 if (GET_MODE (part
[1][0]) == SImode
)
10163 part
[1][0] = part
[1][1];
10166 emit_move_insn (part
[0][1], part
[1][1]);
10167 emit_move_insn (part
[0][0], part
[1][0]);
10171 /* Choose correct order to not overwrite the source before it is copied. */
10172 if ((REG_P (part
[0][0])
10173 && REG_P (part
[1][1])
10174 && (REGNO (part
[0][0]) == REGNO (part
[1][1])
10176 && REGNO (part
[0][0]) == REGNO (part
[1][2]))))
10178 && reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0))))
10182 operands
[2] = part
[0][2];
10183 operands
[3] = part
[0][1];
10184 operands
[4] = part
[0][0];
10185 operands
[5] = part
[1][2];
10186 operands
[6] = part
[1][1];
10187 operands
[7] = part
[1][0];
10191 operands
[2] = part
[0][1];
10192 operands
[3] = part
[0][0];
10193 operands
[5] = part
[1][1];
10194 operands
[6] = part
[1][0];
10201 operands
[2] = part
[0][0];
10202 operands
[3] = part
[0][1];
10203 operands
[4] = part
[0][2];
10204 operands
[5] = part
[1][0];
10205 operands
[6] = part
[1][1];
10206 operands
[7] = part
[1][2];
10210 operands
[2] = part
[0][0];
10211 operands
[3] = part
[0][1];
10212 operands
[5] = part
[1][0];
10213 operands
[6] = part
[1][1];
10216 emit_move_insn (operands
[2], operands
[5]);
10217 emit_move_insn (operands
[3], operands
[6]);
10219 emit_move_insn (operands
[4], operands
[7]);
10225 ix86_split_ashldi (operands
, scratch
)
10226 rtx
*operands
, scratch
;
10228 rtx low
[2], high
[2];
10231 if (GET_CODE (operands
[2]) == CONST_INT
)
10233 split_di (operands
, 2, low
, high
);
10234 count
= INTVAL (operands
[2]) & 63;
10238 emit_move_insn (high
[0], low
[1]);
10239 emit_move_insn (low
[0], const0_rtx
);
10242 emit_insn (gen_ashlsi3 (high
[0], high
[0], GEN_INT (count
- 32)));
10246 if (!rtx_equal_p (operands
[0], operands
[1]))
10247 emit_move_insn (operands
[0], operands
[1]);
10248 emit_insn (gen_x86_shld_1 (high
[0], low
[0], GEN_INT (count
)));
10249 emit_insn (gen_ashlsi3 (low
[0], low
[0], GEN_INT (count
)));
10254 if (!rtx_equal_p (operands
[0], operands
[1]))
10255 emit_move_insn (operands
[0], operands
[1]);
10257 split_di (operands
, 1, low
, high
);
10259 emit_insn (gen_x86_shld_1 (high
[0], low
[0], operands
[2]));
10260 emit_insn (gen_ashlsi3 (low
[0], low
[0], operands
[2]));
10262 if (TARGET_CMOVE
&& (! no_new_pseudos
|| scratch
))
10264 if (! no_new_pseudos
)
10265 scratch
= force_reg (SImode
, const0_rtx
);
10267 emit_move_insn (scratch
, const0_rtx
);
10269 emit_insn (gen_x86_shift_adj_1 (high
[0], low
[0], operands
[2],
10273 emit_insn (gen_x86_shift_adj_2 (high
[0], low
[0], operands
[2]));
10278 ix86_split_ashrdi (operands
, scratch
)
10279 rtx
*operands
, scratch
;
10281 rtx low
[2], high
[2];
10284 if (GET_CODE (operands
[2]) == CONST_INT
)
10286 split_di (operands
, 2, low
, high
);
10287 count
= INTVAL (operands
[2]) & 63;
10291 emit_move_insn (low
[0], high
[1]);
10293 if (! reload_completed
)
10294 emit_insn (gen_ashrsi3 (high
[0], low
[0], GEN_INT (31)));
10297 emit_move_insn (high
[0], low
[0]);
10298 emit_insn (gen_ashrsi3 (high
[0], high
[0], GEN_INT (31)));
10302 emit_insn (gen_ashrsi3 (low
[0], low
[0], GEN_INT (count
- 32)));
10306 if (!rtx_equal_p (operands
[0], operands
[1]))
10307 emit_move_insn (operands
[0], operands
[1]);
10308 emit_insn (gen_x86_shrd_1 (low
[0], high
[0], GEN_INT (count
)));
10309 emit_insn (gen_ashrsi3 (high
[0], high
[0], GEN_INT (count
)));
10314 if (!rtx_equal_p (operands
[0], operands
[1]))
10315 emit_move_insn (operands
[0], operands
[1]);
10317 split_di (operands
, 1, low
, high
);
10319 emit_insn (gen_x86_shrd_1 (low
[0], high
[0], operands
[2]));
10320 emit_insn (gen_ashrsi3 (high
[0], high
[0], operands
[2]));
10322 if (TARGET_CMOVE
&& (! no_new_pseudos
|| scratch
))
10324 if (! no_new_pseudos
)
10325 scratch
= gen_reg_rtx (SImode
);
10326 emit_move_insn (scratch
, high
[0]);
10327 emit_insn (gen_ashrsi3 (scratch
, scratch
, GEN_INT (31)));
10328 emit_insn (gen_x86_shift_adj_1 (low
[0], high
[0], operands
[2],
10332 emit_insn (gen_x86_shift_adj_3 (low
[0], high
[0], operands
[2]));
10337 ix86_split_lshrdi (operands
, scratch
)
10338 rtx
*operands
, scratch
;
10340 rtx low
[2], high
[2];
10343 if (GET_CODE (operands
[2]) == CONST_INT
)
10345 split_di (operands
, 2, low
, high
);
10346 count
= INTVAL (operands
[2]) & 63;
10350 emit_move_insn (low
[0], high
[1]);
10351 emit_move_insn (high
[0], const0_rtx
);
10354 emit_insn (gen_lshrsi3 (low
[0], low
[0], GEN_INT (count
- 32)));
10358 if (!rtx_equal_p (operands
[0], operands
[1]))
10359 emit_move_insn (operands
[0], operands
[1]);
10360 emit_insn (gen_x86_shrd_1 (low
[0], high
[0], GEN_INT (count
)));
10361 emit_insn (gen_lshrsi3 (high
[0], high
[0], GEN_INT (count
)));
10366 if (!rtx_equal_p (operands
[0], operands
[1]))
10367 emit_move_insn (operands
[0], operands
[1]);
10369 split_di (operands
, 1, low
, high
);
10371 emit_insn (gen_x86_shrd_1 (low
[0], high
[0], operands
[2]));
10372 emit_insn (gen_lshrsi3 (high
[0], high
[0], operands
[2]));
10374 /* Heh. By reversing the arguments, we can reuse this pattern. */
10375 if (TARGET_CMOVE
&& (! no_new_pseudos
|| scratch
))
10377 if (! no_new_pseudos
)
10378 scratch
= force_reg (SImode
, const0_rtx
);
10380 emit_move_insn (scratch
, const0_rtx
);
10382 emit_insn (gen_x86_shift_adj_1 (low
[0], high
[0], operands
[2],
10386 emit_insn (gen_x86_shift_adj_2 (low
[0], high
[0], operands
[2]));
10390 /* Helper function for the string operations below. Dest VARIABLE whether
10391 it is aligned to VALUE bytes. If true, jump to the label. */
10393 ix86_expand_aligntest (variable
, value
)
10397 rtx label
= gen_label_rtx ();
10398 rtx tmpcount
= gen_reg_rtx (GET_MODE (variable
));
10399 if (GET_MODE (variable
) == DImode
)
10400 emit_insn (gen_anddi3 (tmpcount
, variable
, GEN_INT (value
)));
10402 emit_insn (gen_andsi3 (tmpcount
, variable
, GEN_INT (value
)));
10403 emit_cmp_and_jump_insns (tmpcount
, const0_rtx
, EQ
, 0, GET_MODE (variable
),
10408 /* Adjust COUNTER by the VALUE. */
10410 ix86_adjust_counter (countreg
, value
)
10412 HOST_WIDE_INT value
;
10414 if (GET_MODE (countreg
) == DImode
)
10415 emit_insn (gen_adddi3 (countreg
, countreg
, GEN_INT (-value
)));
10417 emit_insn (gen_addsi3 (countreg
, countreg
, GEN_INT (-value
)));
10420 /* Zero extend possibly SImode EXP to Pmode register. */
10422 ix86_zero_extend_to_Pmode (exp
)
10426 if (GET_MODE (exp
) == VOIDmode
)
10427 return force_reg (Pmode
, exp
);
10428 if (GET_MODE (exp
) == Pmode
)
10429 return copy_to_mode_reg (Pmode
, exp
);
10430 r
= gen_reg_rtx (Pmode
);
10431 emit_insn (gen_zero_extendsidi2 (r
, exp
));
10435 /* Expand string move (memcpy) operation. Use i386 string operations when
10436 profitable. expand_clrstr contains similar code. */
10438 ix86_expand_movstr (dst
, src
, count_exp
, align_exp
)
10439 rtx dst
, src
, count_exp
, align_exp
;
10441 rtx srcreg
, destreg
, countreg
;
10442 enum machine_mode counter_mode
;
10443 HOST_WIDE_INT align
= 0;
10444 unsigned HOST_WIDE_INT count
= 0;
10449 if (GET_CODE (align_exp
) == CONST_INT
)
10450 align
= INTVAL (align_exp
);
10452 /* This simple hack avoids all inlining code and simplifies code below. */
10453 if (!TARGET_ALIGN_STRINGOPS
)
10456 if (GET_CODE (count_exp
) == CONST_INT
)
10457 count
= INTVAL (count_exp
);
10459 /* Figure out proper mode for counter. For 32bits it is always SImode,
10460 for 64bits use SImode when possible, otherwise DImode.
10461 Set count to number of bytes copied when known at compile time. */
10462 if (!TARGET_64BIT
|| GET_MODE (count_exp
) == SImode
10463 || x86_64_zero_extended_value (count_exp
))
10464 counter_mode
= SImode
;
10466 counter_mode
= DImode
;
10468 if (counter_mode
!= SImode
&& counter_mode
!= DImode
)
10471 destreg
= copy_to_mode_reg (Pmode
, XEXP (dst
, 0));
10472 srcreg
= copy_to_mode_reg (Pmode
, XEXP (src
, 0));
10474 emit_insn (gen_cld ());
10476 /* When optimizing for size emit simple rep ; movsb instruction for
10477 counts not divisible by 4. */
10479 if ((!optimize
|| optimize_size
) && (count
== 0 || (count
& 0x03)))
10481 countreg
= ix86_zero_extend_to_Pmode (count_exp
);
10483 emit_insn (gen_rep_movqi_rex64 (destreg
, srcreg
, countreg
,
10484 destreg
, srcreg
, countreg
));
10486 emit_insn (gen_rep_movqi (destreg
, srcreg
, countreg
,
10487 destreg
, srcreg
, countreg
));
10490 /* For constant aligned (or small unaligned) copies use rep movsl
10491 followed by code copying the rest. For PentiumPro ensure 8 byte
10492 alignment to allow rep movsl acceleration. */
10494 else if (count
!= 0
10496 || (!TARGET_PENTIUMPRO
&& !TARGET_64BIT
&& align
>= 4)
10497 || optimize_size
|| count
< (unsigned int) 64))
10499 int size
= TARGET_64BIT
&& !optimize_size
? 8 : 4;
10500 if (count
& ~(size
- 1))
10502 countreg
= copy_to_mode_reg (counter_mode
,
10503 GEN_INT ((count
>> (size
== 4 ? 2 : 3))
10504 & (TARGET_64BIT
? -1 : 0x3fffffff)));
10505 countreg
= ix86_zero_extend_to_Pmode (countreg
);
10509 emit_insn (gen_rep_movsi_rex64 (destreg
, srcreg
, countreg
,
10510 destreg
, srcreg
, countreg
));
10512 emit_insn (gen_rep_movsi (destreg
, srcreg
, countreg
,
10513 destreg
, srcreg
, countreg
));
10516 emit_insn (gen_rep_movdi_rex64 (destreg
, srcreg
, countreg
,
10517 destreg
, srcreg
, countreg
));
10519 if (size
== 8 && (count
& 0x04))
10520 emit_insn (gen_strmovsi (destreg
, srcreg
));
10522 emit_insn (gen_strmovhi (destreg
, srcreg
));
10524 emit_insn (gen_strmovqi (destreg
, srcreg
));
10526 /* The generic code based on the glibc implementation:
10527 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
10528 allowing accelerated copying there)
10529 - copy the data using rep movsl
10530 - copy the rest. */
10535 int desired_alignment
= (TARGET_PENTIUMPRO
10536 && (count
== 0 || count
>= (unsigned int) 260)
10537 ? 8 : UNITS_PER_WORD
);
10539 /* In case we don't know anything about the alignment, default to
10540 library version, since it is usually equally fast and result in
10542 if (!TARGET_INLINE_ALL_STRINGOPS
&& align
< UNITS_PER_WORD
)
10548 if (TARGET_SINGLE_STRINGOP
)
10549 emit_insn (gen_cld ());
10551 countreg2
= gen_reg_rtx (Pmode
);
10552 countreg
= copy_to_mode_reg (counter_mode
, count_exp
);
10554 /* We don't use loops to align destination and to copy parts smaller
10555 than 4 bytes, because gcc is able to optimize such code better (in
10556 the case the destination or the count really is aligned, gcc is often
10557 able to predict the branches) and also it is friendlier to the
10558 hardware branch prediction.
10560 Using loops is benefical for generic case, because we can
10561 handle small counts using the loops. Many CPUs (such as Athlon)
10562 have large REP prefix setup costs.
10564 This is quite costy. Maybe we can revisit this decision later or
10565 add some customizability to this code. */
10567 if (count
== 0 && align
< desired_alignment
)
10569 label
= gen_label_rtx ();
10570 emit_cmp_and_jump_insns (countreg
, GEN_INT (desired_alignment
- 1),
10571 LEU
, 0, counter_mode
, 1, label
);
10575 rtx label
= ix86_expand_aligntest (destreg
, 1);
10576 emit_insn (gen_strmovqi (destreg
, srcreg
));
10577 ix86_adjust_counter (countreg
, 1);
10578 emit_label (label
);
10579 LABEL_NUSES (label
) = 1;
10583 rtx label
= ix86_expand_aligntest (destreg
, 2);
10584 emit_insn (gen_strmovhi (destreg
, srcreg
));
10585 ix86_adjust_counter (countreg
, 2);
10586 emit_label (label
);
10587 LABEL_NUSES (label
) = 1;
10589 if (align
<= 4 && desired_alignment
> 4)
10591 rtx label
= ix86_expand_aligntest (destreg
, 4);
10592 emit_insn (gen_strmovsi (destreg
, srcreg
));
10593 ix86_adjust_counter (countreg
, 4);
10594 emit_label (label
);
10595 LABEL_NUSES (label
) = 1;
10598 if (label
&& desired_alignment
> 4 && !TARGET_64BIT
)
10600 emit_label (label
);
10601 LABEL_NUSES (label
) = 1;
10604 if (!TARGET_SINGLE_STRINGOP
)
10605 emit_insn (gen_cld ());
10608 emit_insn (gen_lshrdi3 (countreg2
, ix86_zero_extend_to_Pmode (countreg
),
10610 emit_insn (gen_rep_movdi_rex64 (destreg
, srcreg
, countreg2
,
10611 destreg
, srcreg
, countreg2
));
10615 emit_insn (gen_lshrsi3 (countreg2
, countreg
, GEN_INT (2)));
10616 emit_insn (gen_rep_movsi (destreg
, srcreg
, countreg2
,
10617 destreg
, srcreg
, countreg2
));
10622 emit_label (label
);
10623 LABEL_NUSES (label
) = 1;
10625 if (TARGET_64BIT
&& align
> 4 && count
!= 0 && (count
& 4))
10626 emit_insn (gen_strmovsi (destreg
, srcreg
));
10627 if ((align
<= 4 || count
== 0) && TARGET_64BIT
)
10629 rtx label
= ix86_expand_aligntest (countreg
, 4);
10630 emit_insn (gen_strmovsi (destreg
, srcreg
));
10631 emit_label (label
);
10632 LABEL_NUSES (label
) = 1;
10634 if (align
> 2 && count
!= 0 && (count
& 2))
10635 emit_insn (gen_strmovhi (destreg
, srcreg
));
10636 if (align
<= 2 || count
== 0)
10638 rtx label
= ix86_expand_aligntest (countreg
, 2);
10639 emit_insn (gen_strmovhi (destreg
, srcreg
));
10640 emit_label (label
);
10641 LABEL_NUSES (label
) = 1;
10643 if (align
> 1 && count
!= 0 && (count
& 1))
10644 emit_insn (gen_strmovqi (destreg
, srcreg
));
10645 if (align
<= 1 || count
== 0)
10647 rtx label
= ix86_expand_aligntest (countreg
, 1);
10648 emit_insn (gen_strmovqi (destreg
, srcreg
));
10649 emit_label (label
);
10650 LABEL_NUSES (label
) = 1;
10654 insns
= get_insns ();
10657 ix86_set_move_mem_attrs (insns
, dst
, src
, destreg
, srcreg
);
10662 /* Expand string clear operation (bzero). Use i386 string operations when
10663 profitable. expand_movstr contains similar code. */
10665 ix86_expand_clrstr (src
, count_exp
, align_exp
)
10666 rtx src
, count_exp
, align_exp
;
10668 rtx destreg
, zeroreg
, countreg
;
10669 enum machine_mode counter_mode
;
10670 HOST_WIDE_INT align
= 0;
10671 unsigned HOST_WIDE_INT count
= 0;
10673 if (GET_CODE (align_exp
) == CONST_INT
)
10674 align
= INTVAL (align_exp
);
10676 /* This simple hack avoids all inlining code and simplifies code below. */
10677 if (!TARGET_ALIGN_STRINGOPS
)
10680 if (GET_CODE (count_exp
) == CONST_INT
)
10681 count
= INTVAL (count_exp
);
10682 /* Figure out proper mode for counter. For 32bits it is always SImode,
10683 for 64bits use SImode when possible, otherwise DImode.
10684 Set count to number of bytes copied when known at compile time. */
10685 if (!TARGET_64BIT
|| GET_MODE (count_exp
) == SImode
10686 || x86_64_zero_extended_value (count_exp
))
10687 counter_mode
= SImode
;
10689 counter_mode
= DImode
;
10691 destreg
= copy_to_mode_reg (Pmode
, XEXP (src
, 0));
10693 emit_insn (gen_cld ());
10695 /* When optimizing for size emit simple rep ; movsb instruction for
10696 counts not divisible by 4. */
10698 if ((!optimize
|| optimize_size
) && (count
== 0 || (count
& 0x03)))
10700 countreg
= ix86_zero_extend_to_Pmode (count_exp
);
10701 zeroreg
= copy_to_mode_reg (QImode
, const0_rtx
);
10703 emit_insn (gen_rep_stosqi_rex64 (destreg
, countreg
, zeroreg
,
10704 destreg
, countreg
));
10706 emit_insn (gen_rep_stosqi (destreg
, countreg
, zeroreg
,
10707 destreg
, countreg
));
10709 else if (count
!= 0
10711 || (!TARGET_PENTIUMPRO
&& !TARGET_64BIT
&& align
>= 4)
10712 || optimize_size
|| count
< (unsigned int) 64))
10714 int size
= TARGET_64BIT
&& !optimize_size
? 8 : 4;
10715 zeroreg
= copy_to_mode_reg (size
== 4 ? SImode
: DImode
, const0_rtx
);
10716 if (count
& ~(size
- 1))
10718 countreg
= copy_to_mode_reg (counter_mode
,
10719 GEN_INT ((count
>> (size
== 4 ? 2 : 3))
10720 & (TARGET_64BIT
? -1 : 0x3fffffff)));
10721 countreg
= ix86_zero_extend_to_Pmode (countreg
);
10725 emit_insn (gen_rep_stossi_rex64 (destreg
, countreg
, zeroreg
,
10726 destreg
, countreg
));
10728 emit_insn (gen_rep_stossi (destreg
, countreg
, zeroreg
,
10729 destreg
, countreg
));
10732 emit_insn (gen_rep_stosdi_rex64 (destreg
, countreg
, zeroreg
,
10733 destreg
, countreg
));
10735 if (size
== 8 && (count
& 0x04))
10736 emit_insn (gen_strsetsi (destreg
,
10737 gen_rtx_SUBREG (SImode
, zeroreg
, 0)));
10739 emit_insn (gen_strsethi (destreg
,
10740 gen_rtx_SUBREG (HImode
, zeroreg
, 0)));
10742 emit_insn (gen_strsetqi (destreg
,
10743 gen_rtx_SUBREG (QImode
, zeroreg
, 0)));
10749 /* Compute desired alignment of the string operation. */
10750 int desired_alignment
= (TARGET_PENTIUMPRO
10751 && (count
== 0 || count
>= (unsigned int) 260)
10752 ? 8 : UNITS_PER_WORD
);
10754 /* In case we don't know anything about the alignment, default to
10755 library version, since it is usually equally fast and result in
10757 if (!TARGET_INLINE_ALL_STRINGOPS
&& align
< UNITS_PER_WORD
)
10760 if (TARGET_SINGLE_STRINGOP
)
10761 emit_insn (gen_cld ());
10763 countreg2
= gen_reg_rtx (Pmode
);
10764 countreg
= copy_to_mode_reg (counter_mode
, count_exp
);
10765 zeroreg
= copy_to_mode_reg (Pmode
, const0_rtx
);
10767 if (count
== 0 && align
< desired_alignment
)
10769 label
= gen_label_rtx ();
10770 emit_cmp_and_jump_insns (countreg
, GEN_INT (desired_alignment
- 1),
10771 LEU
, 0, counter_mode
, 1, label
);
10775 rtx label
= ix86_expand_aligntest (destreg
, 1);
10776 emit_insn (gen_strsetqi (destreg
,
10777 gen_rtx_SUBREG (QImode
, zeroreg
, 0)));
10778 ix86_adjust_counter (countreg
, 1);
10779 emit_label (label
);
10780 LABEL_NUSES (label
) = 1;
10784 rtx label
= ix86_expand_aligntest (destreg
, 2);
10785 emit_insn (gen_strsethi (destreg
,
10786 gen_rtx_SUBREG (HImode
, zeroreg
, 0)));
10787 ix86_adjust_counter (countreg
, 2);
10788 emit_label (label
);
10789 LABEL_NUSES (label
) = 1;
10791 if (align
<= 4 && desired_alignment
> 4)
10793 rtx label
= ix86_expand_aligntest (destreg
, 4);
10794 emit_insn (gen_strsetsi (destreg
, (TARGET_64BIT
10795 ? gen_rtx_SUBREG (SImode
, zeroreg
, 0)
10797 ix86_adjust_counter (countreg
, 4);
10798 emit_label (label
);
10799 LABEL_NUSES (label
) = 1;
10802 if (label
&& desired_alignment
> 4 && !TARGET_64BIT
)
10804 emit_label (label
);
10805 LABEL_NUSES (label
) = 1;
10809 if (!TARGET_SINGLE_STRINGOP
)
10810 emit_insn (gen_cld ());
10813 emit_insn (gen_lshrdi3 (countreg2
, ix86_zero_extend_to_Pmode (countreg
),
10815 emit_insn (gen_rep_stosdi_rex64 (destreg
, countreg2
, zeroreg
,
10816 destreg
, countreg2
));
10820 emit_insn (gen_lshrsi3 (countreg2
, countreg
, GEN_INT (2)));
10821 emit_insn (gen_rep_stossi (destreg
, countreg2
, zeroreg
,
10822 destreg
, countreg2
));
10826 emit_label (label
);
10827 LABEL_NUSES (label
) = 1;
10830 if (TARGET_64BIT
&& align
> 4 && count
!= 0 && (count
& 4))
10831 emit_insn (gen_strsetsi (destreg
,
10832 gen_rtx_SUBREG (SImode
, zeroreg
, 0)));
10833 if (TARGET_64BIT
&& (align
<= 4 || count
== 0))
10835 rtx label
= ix86_expand_aligntest (countreg
, 4);
10836 emit_insn (gen_strsetsi (destreg
,
10837 gen_rtx_SUBREG (SImode
, zeroreg
, 0)));
10838 emit_label (label
);
10839 LABEL_NUSES (label
) = 1;
10841 if (align
> 2 && count
!= 0 && (count
& 2))
10842 emit_insn (gen_strsethi (destreg
,
10843 gen_rtx_SUBREG (HImode
, zeroreg
, 0)));
10844 if (align
<= 2 || count
== 0)
10846 rtx label
= ix86_expand_aligntest (countreg
, 2);
10847 emit_insn (gen_strsethi (destreg
,
10848 gen_rtx_SUBREG (HImode
, zeroreg
, 0)));
10849 emit_label (label
);
10850 LABEL_NUSES (label
) = 1;
10852 if (align
> 1 && count
!= 0 && (count
& 1))
10853 emit_insn (gen_strsetqi (destreg
,
10854 gen_rtx_SUBREG (QImode
, zeroreg
, 0)));
10855 if (align
<= 1 || count
== 0)
10857 rtx label
= ix86_expand_aligntest (countreg
, 1);
10858 emit_insn (gen_strsetqi (destreg
,
10859 gen_rtx_SUBREG (QImode
, zeroreg
, 0)));
10860 emit_label (label
);
10861 LABEL_NUSES (label
) = 1;
10866 /* Expand strlen. */
10868 ix86_expand_strlen (out
, src
, eoschar
, align
)
10869 rtx out
, src
, eoschar
, align
;
10871 rtx addr
, scratch1
, scratch2
, scratch3
, scratch4
;
10873 /* The generic case of strlen expander is long. Avoid it's
10874 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
10876 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
10877 && !TARGET_INLINE_ALL_STRINGOPS
10879 && (GET_CODE (align
) != CONST_INT
|| INTVAL (align
) < 4))
10882 addr
= force_reg (Pmode
, XEXP (src
, 0));
10883 scratch1
= gen_reg_rtx (Pmode
);
10885 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
10888 /* Well it seems that some optimizer does not combine a call like
10889 foo(strlen(bar), strlen(bar));
10890 when the move and the subtraction is done here. It does calculate
10891 the length just once when these instructions are done inside of
10892 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
10893 often used and I use one fewer register for the lifetime of
10894 output_strlen_unroll() this is better. */
10896 emit_move_insn (out
, addr
);
10898 ix86_expand_strlensi_unroll_1 (out
, align
);
10900 /* strlensi_unroll_1 returns the address of the zero at the end of
10901 the string, like memchr(), so compute the length by subtracting
10902 the start address. */
10904 emit_insn (gen_subdi3 (out
, out
, addr
));
10906 emit_insn (gen_subsi3 (out
, out
, addr
));
10910 scratch2
= gen_reg_rtx (Pmode
);
10911 scratch3
= gen_reg_rtx (Pmode
);
10912 scratch4
= force_reg (Pmode
, constm1_rtx
);
10914 emit_move_insn (scratch3
, addr
);
10915 eoschar
= force_reg (QImode
, eoschar
);
10917 emit_insn (gen_cld ());
10920 emit_insn (gen_strlenqi_rex_1 (scratch1
, scratch3
, eoschar
,
10921 align
, scratch4
, scratch3
));
10922 emit_insn (gen_one_cmpldi2 (scratch2
, scratch1
));
10923 emit_insn (gen_adddi3 (out
, scratch2
, constm1_rtx
));
10927 emit_insn (gen_strlenqi_1 (scratch1
, scratch3
, eoschar
,
10928 align
, scratch4
, scratch3
));
10929 emit_insn (gen_one_cmplsi2 (scratch2
, scratch1
));
10930 emit_insn (gen_addsi3 (out
, scratch2
, constm1_rtx
));
10936 /* Expand the appropriate insns for doing strlen if not just doing
10939 out = result, initialized with the start address
10940 align_rtx = alignment of the address.
10941 scratch = scratch register, initialized with the startaddress when
10942 not aligned, otherwise undefined
10944 This is just the body. It needs the initialisations mentioned above and
10945 some address computing at the end. These things are done in i386.md. */
10948 ix86_expand_strlensi_unroll_1 (out
, align_rtx
)
10949 rtx out
, align_rtx
;
10953 rtx align_2_label
= NULL_RTX
;
10954 rtx align_3_label
= NULL_RTX
;
10955 rtx align_4_label
= gen_label_rtx ();
10956 rtx end_0_label
= gen_label_rtx ();
10958 rtx tmpreg
= gen_reg_rtx (SImode
);
10959 rtx scratch
= gen_reg_rtx (SImode
);
10962 if (GET_CODE (align_rtx
) == CONST_INT
)
10963 align
= INTVAL (align_rtx
);
10965 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
10967 /* Is there a known alignment and is it less than 4? */
10970 rtx scratch1
= gen_reg_rtx (Pmode
);
10971 emit_move_insn (scratch1
, out
);
10972 /* Is there a known alignment and is it not 2? */
10975 align_3_label
= gen_label_rtx (); /* Label when aligned to 3-byte */
10976 align_2_label
= gen_label_rtx (); /* Label when aligned to 2-byte */
10978 /* Leave just the 3 lower bits. */
10979 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, GEN_INT (3),
10980 NULL_RTX
, 0, OPTAB_WIDEN
);
10982 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
10983 Pmode
, 1, align_4_label
);
10984 emit_cmp_and_jump_insns (align_rtx
, GEN_INT (2), EQ
, NULL
,
10985 Pmode
, 1, align_2_label
);
10986 emit_cmp_and_jump_insns (align_rtx
, GEN_INT (2), GTU
, NULL
,
10987 Pmode
, 1, align_3_label
);
10991 /* Since the alignment is 2, we have to check 2 or 0 bytes;
10992 check if is aligned to 4 - byte. */
10994 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, GEN_INT (2),
10995 NULL_RTX
, 0, OPTAB_WIDEN
);
10997 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
10998 Pmode
, 1, align_4_label
);
11001 mem
= gen_rtx_MEM (QImode
, out
);
11003 /* Now compare the bytes. */
11005 /* Compare the first n unaligned byte on a byte per byte basis. */
11006 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
,
11007 QImode
, 1, end_0_label
);
11009 /* Increment the address. */
11011 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
11013 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
11015 /* Not needed with an alignment of 2 */
11018 emit_label (align_2_label
);
11020 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
11024 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
11026 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
11028 emit_label (align_3_label
);
11031 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
11035 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
11037 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
11040 /* Generate loop to check 4 bytes at a time. It is not a good idea to
11041 align this loop. It gives only huge programs, but does not help to
11043 emit_label (align_4_label
);
11045 mem
= gen_rtx_MEM (SImode
, out
);
11046 emit_move_insn (scratch
, mem
);
11048 emit_insn (gen_adddi3 (out
, out
, GEN_INT (4)));
11050 emit_insn (gen_addsi3 (out
, out
, GEN_INT (4)));
11052 /* This formula yields a nonzero result iff one of the bytes is zero.
11053 This saves three branches inside loop and many cycles. */
11055 emit_insn (gen_addsi3 (tmpreg
, scratch
, GEN_INT (-0x01010101)));
11056 emit_insn (gen_one_cmplsi2 (scratch
, scratch
));
11057 emit_insn (gen_andsi3 (tmpreg
, tmpreg
, scratch
));
11058 emit_insn (gen_andsi3 (tmpreg
, tmpreg
,
11059 gen_int_mode (0x80808080, SImode
)));
11060 emit_cmp_and_jump_insns (tmpreg
, const0_rtx
, EQ
, 0, SImode
, 1,
11065 rtx reg
= gen_reg_rtx (SImode
);
11066 rtx reg2
= gen_reg_rtx (Pmode
);
11067 emit_move_insn (reg
, tmpreg
);
11068 emit_insn (gen_lshrsi3 (reg
, reg
, GEN_INT (16)));
11070 /* If zero is not in the first two bytes, move two bytes forward. */
11071 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
11072 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
11073 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
11074 emit_insn (gen_rtx_SET (VOIDmode
, tmpreg
,
11075 gen_rtx_IF_THEN_ELSE (SImode
, tmp
,
11078 /* Emit lea manually to avoid clobbering of flags. */
11079 emit_insn (gen_rtx_SET (SImode
, reg2
,
11080 gen_rtx_PLUS (Pmode
, out
, GEN_INT (2))));
11082 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
11083 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
11084 emit_insn (gen_rtx_SET (VOIDmode
, out
,
11085 gen_rtx_IF_THEN_ELSE (Pmode
, tmp
,
11092 rtx end_2_label
= gen_label_rtx ();
11093 /* Is zero in the first two bytes? */
11095 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
11096 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
11097 tmp
= gen_rtx_NE (VOIDmode
, tmp
, const0_rtx
);
11098 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
11099 gen_rtx_LABEL_REF (VOIDmode
, end_2_label
),
11101 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
11102 JUMP_LABEL (tmp
) = end_2_label
;
11104 /* Not in the first two. Move two bytes forward. */
11105 emit_insn (gen_lshrsi3 (tmpreg
, tmpreg
, GEN_INT (16)));
11107 emit_insn (gen_adddi3 (out
, out
, GEN_INT (2)));
11109 emit_insn (gen_addsi3 (out
, out
, GEN_INT (2)));
11111 emit_label (end_2_label
);
11115 /* Avoid branch in fixing the byte. */
11116 tmpreg
= gen_lowpart (QImode
, tmpreg
);
11117 emit_insn (gen_addqi3_cc (tmpreg
, tmpreg
, tmpreg
));
11119 emit_insn (gen_subdi3_carry_rex64 (out
, out
, GEN_INT (3)));
11121 emit_insn (gen_subsi3_carry (out
, out
, GEN_INT (3)));
11123 emit_label (end_0_label
);
11127 ix86_expand_call (retval
, fnaddr
, callarg1
, callarg2
, pop
)
11128 rtx retval
, fnaddr
, callarg1
, callarg2
, pop
;
11130 rtx use
= NULL
, call
;
11132 if (pop
== const0_rtx
)
11134 if (TARGET_64BIT
&& pop
)
11138 if (flag_pic
&& GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
)
11139 fnaddr
= machopic_indirect_call_target (fnaddr
);
11141 /* Static functions and indirect calls don't need the pic register. */
11142 if (! TARGET_64BIT
&& flag_pic
11143 && GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
11144 && ! SYMBOL_REF_FLAG (XEXP (fnaddr
, 0)))
11145 use_reg (&use
, pic_offset_table_rtx
);
11147 if (TARGET_64BIT
&& INTVAL (callarg2
) >= 0)
11149 rtx al
= gen_rtx_REG (QImode
, 0);
11150 emit_move_insn (al
, callarg2
);
11151 use_reg (&use
, al
);
11153 #endif /* TARGET_MACHO */
11155 if (! call_insn_operand (XEXP (fnaddr
, 0), Pmode
))
11157 fnaddr
= copy_to_mode_reg (Pmode
, XEXP (fnaddr
, 0));
11158 fnaddr
= gen_rtx_MEM (QImode
, fnaddr
);
11161 call
= gen_rtx_CALL (VOIDmode
, fnaddr
, callarg1
);
11163 call
= gen_rtx_SET (VOIDmode
, retval
, call
);
11166 pop
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, pop
);
11167 pop
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, pop
);
11168 call
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, call
, pop
));
11171 call
= emit_call_insn (call
);
11173 CALL_INSN_FUNCTION_USAGE (call
) = use
;
11177 /* Clear stack slot assignments remembered from previous functions.
11178 This is called from INIT_EXPANDERS once before RTL is emitted for each
11181 static struct machine_function
*
11182 ix86_init_machine_status ()
11184 return ggc_alloc_cleared (sizeof (struct machine_function
));
11187 /* Return a MEM corresponding to a stack slot with mode MODE.
11188 Allocate a new slot if necessary.
11190 The RTL for a function can have several slots available: N is
11191 which slot to use. */
11194 assign_386_stack_local (mode
, n
)
11195 enum machine_mode mode
;
11198 if (n
< 0 || n
>= MAX_386_STACK_LOCALS
)
11201 if (ix86_stack_locals
[(int) mode
][n
] == NULL_RTX
)
11202 ix86_stack_locals
[(int) mode
][n
]
11203 = assign_stack_local (mode
, GET_MODE_SIZE (mode
), 0);
11205 return ix86_stack_locals
[(int) mode
][n
];
11208 /* Construct the SYMBOL_REF for the tls_get_addr function. */
11210 static GTY(()) rtx ix86_tls_symbol
;
11212 ix86_tls_get_addr ()
11215 if (!ix86_tls_symbol
)
11217 ix86_tls_symbol
= gen_rtx_SYMBOL_REF (Pmode
,
11218 (TARGET_GNU_TLS
&& !TARGET_64BIT
)
11219 ? "___tls_get_addr"
11220 : "__tls_get_addr");
11223 return ix86_tls_symbol
;
11226 /* Calculate the length of the memory address in the instruction
11227 encoding. Does not include the one-byte modrm, opcode, or prefix. */
11230 memory_address_length (addr
)
11233 struct ix86_address parts
;
11234 rtx base
, index
, disp
;
11237 if (GET_CODE (addr
) == PRE_DEC
11238 || GET_CODE (addr
) == POST_INC
11239 || GET_CODE (addr
) == PRE_MODIFY
11240 || GET_CODE (addr
) == POST_MODIFY
)
11243 if (! ix86_decompose_address (addr
, &parts
))
11247 index
= parts
.index
;
11252 - esp as the base always wants an index,
11253 - ebp as the base always wants a displacement. */
11255 /* Register Indirect. */
11256 if (base
&& !index
&& !disp
)
11258 /* esp (for its index) and ebp (for its displacement) need
11259 the two-byte modrm form. */
11260 if (addr
== stack_pointer_rtx
11261 || addr
== arg_pointer_rtx
11262 || addr
== frame_pointer_rtx
11263 || addr
== hard_frame_pointer_rtx
)
11267 /* Direct Addressing. */
11268 else if (disp
&& !base
&& !index
)
11273 /* Find the length of the displacement constant. */
11276 if (GET_CODE (disp
) == CONST_INT
11277 && CONST_OK_FOR_LETTER_P (INTVAL (disp
), 'K')
11283 /* ebp always wants a displacement. */
11284 else if (base
== hard_frame_pointer_rtx
)
11287 /* An index requires the two-byte modrm form... */
11289 /* ...like esp, which always wants an index. */
11290 || base
== stack_pointer_rtx
11291 || base
== arg_pointer_rtx
11292 || base
== frame_pointer_rtx
)
11299 /* Compute default value for "length_immediate" attribute. When SHORTFORM
11300 is set, expect that insn have 8bit immediate alternative. */
11302 ix86_attr_length_immediate_default (insn
, shortform
)
11308 extract_insn_cached (insn
);
11309 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
11310 if (CONSTANT_P (recog_data
.operand
[i
]))
11315 && GET_CODE (recog_data
.operand
[i
]) == CONST_INT
11316 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data
.operand
[i
]), 'K'))
11320 switch (get_attr_mode (insn
))
11331 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
11336 fatal_insn ("unknown insn mode", insn
);
11342 /* Compute default value for "length_address" attribute. */
11344 ix86_attr_length_address_default (insn
)
11349 if (get_attr_type (insn
) == TYPE_LEA
)
11351 rtx set
= PATTERN (insn
);
11352 if (GET_CODE (set
) == SET
)
11354 else if (GET_CODE (set
) == PARALLEL
11355 && GET_CODE (XVECEXP (set
, 0, 0)) == SET
)
11356 set
= XVECEXP (set
, 0, 0);
11359 #ifdef ENABLE_CHECKING
11365 return memory_address_length (SET_SRC (set
));
11368 extract_insn_cached (insn
);
11369 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
11370 if (GET_CODE (recog_data
.operand
[i
]) == MEM
)
11372 return memory_address_length (XEXP (recog_data
.operand
[i
], 0));
11378 /* Return the maximum number of instructions a cpu can issue. */
11385 case PROCESSOR_PENTIUM
:
11389 case PROCESSOR_PENTIUMPRO
:
11390 case PROCESSOR_PENTIUM4
:
11391 case PROCESSOR_ATHLON
:
11399 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
11400 by DEP_INSN and nothing set by DEP_INSN. */
11403 ix86_flags_dependant (insn
, dep_insn
, insn_type
)
11404 rtx insn
, dep_insn
;
11405 enum attr_type insn_type
;
11409 /* Simplify the test for uninteresting insns. */
11410 if (insn_type
!= TYPE_SETCC
11411 && insn_type
!= TYPE_ICMOV
11412 && insn_type
!= TYPE_FCMOV
11413 && insn_type
!= TYPE_IBR
)
11416 if ((set
= single_set (dep_insn
)) != 0)
11418 set
= SET_DEST (set
);
11421 else if (GET_CODE (PATTERN (dep_insn
)) == PARALLEL
11422 && XVECLEN (PATTERN (dep_insn
), 0) == 2
11423 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 0)) == SET
11424 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 1)) == SET
)
11426 set
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
11427 set2
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
11432 if (GET_CODE (set
) != REG
|| REGNO (set
) != FLAGS_REG
)
11435 /* This test is true if the dependent insn reads the flags but
11436 not any other potentially set register. */
11437 if (!reg_overlap_mentioned_p (set
, PATTERN (insn
)))
11440 if (set2
&& reg_overlap_mentioned_p (set2
, PATTERN (insn
)))
11446 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
11447 address with operands set by DEP_INSN. */
11450 ix86_agi_dependant (insn
, dep_insn
, insn_type
)
11451 rtx insn
, dep_insn
;
11452 enum attr_type insn_type
;
11456 if (insn_type
== TYPE_LEA
11459 addr
= PATTERN (insn
);
11460 if (GET_CODE (addr
) == SET
)
11462 else if (GET_CODE (addr
) == PARALLEL
11463 && GET_CODE (XVECEXP (addr
, 0, 0)) == SET
)
11464 addr
= XVECEXP (addr
, 0, 0);
11467 addr
= SET_SRC (addr
);
11472 extract_insn_cached (insn
);
11473 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
11474 if (GET_CODE (recog_data
.operand
[i
]) == MEM
)
11476 addr
= XEXP (recog_data
.operand
[i
], 0);
11483 return modified_in_p (addr
, dep_insn
);
11487 ix86_adjust_cost (insn
, link
, dep_insn
, cost
)
11488 rtx insn
, link
, dep_insn
;
11491 enum attr_type insn_type
, dep_insn_type
;
11492 enum attr_memory memory
, dep_memory
;
11494 int dep_insn_code_number
;
11496 /* Anti and output depenancies have zero cost on all CPUs. */
11497 if (REG_NOTE_KIND (link
) != 0)
11500 dep_insn_code_number
= recog_memoized (dep_insn
);
11502 /* If we can't recognize the insns, we can't really do anything. */
11503 if (dep_insn_code_number
< 0 || recog_memoized (insn
) < 0)
11506 insn_type
= get_attr_type (insn
);
11507 dep_insn_type
= get_attr_type (dep_insn
);
11511 case PROCESSOR_PENTIUM
:
11512 /* Address Generation Interlock adds a cycle of latency. */
11513 if (ix86_agi_dependant (insn
, dep_insn
, insn_type
))
11516 /* ??? Compares pair with jump/setcc. */
11517 if (ix86_flags_dependant (insn
, dep_insn
, insn_type
))
11520 /* Floating point stores require value to be ready one cycle ealier. */
11521 if (insn_type
== TYPE_FMOV
11522 && get_attr_memory (insn
) == MEMORY_STORE
11523 && !ix86_agi_dependant (insn
, dep_insn
, insn_type
))
11527 case PROCESSOR_PENTIUMPRO
:
11528 memory
= get_attr_memory (insn
);
11529 dep_memory
= get_attr_memory (dep_insn
);
11531 /* Since we can't represent delayed latencies of load+operation,
11532 increase the cost here for non-imov insns. */
11533 if (dep_insn_type
!= TYPE_IMOV
11534 && dep_insn_type
!= TYPE_FMOV
11535 && (dep_memory
== MEMORY_LOAD
|| dep_memory
== MEMORY_BOTH
))
11538 /* INT->FP conversion is expensive. */
11539 if (get_attr_fp_int_src (dep_insn
))
11542 /* There is one cycle extra latency between an FP op and a store. */
11543 if (insn_type
== TYPE_FMOV
11544 && (set
= single_set (dep_insn
)) != NULL_RTX
11545 && (set2
= single_set (insn
)) != NULL_RTX
11546 && rtx_equal_p (SET_DEST (set
), SET_SRC (set2
))
11547 && GET_CODE (SET_DEST (set2
)) == MEM
)
11550 /* Show ability of reorder buffer to hide latency of load by executing
11551 in parallel with previous instruction in case
11552 previous instruction is not needed to compute the address. */
11553 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
11554 && !ix86_agi_dependant (insn
, dep_insn
, insn_type
))
11556 /* Claim moves to take one cycle, as core can issue one load
11557 at time and the next load can start cycle later. */
11558 if (dep_insn_type
== TYPE_IMOV
11559 || dep_insn_type
== TYPE_FMOV
)
11567 memory
= get_attr_memory (insn
);
11568 dep_memory
= get_attr_memory (dep_insn
);
11569 /* The esp dependency is resolved before the instruction is really
11571 if ((insn_type
== TYPE_PUSH
|| insn_type
== TYPE_POP
)
11572 && (dep_insn_type
== TYPE_PUSH
|| dep_insn_type
== TYPE_POP
))
11575 /* Since we can't represent delayed latencies of load+operation,
11576 increase the cost here for non-imov insns. */
11577 if (dep_memory
== MEMORY_LOAD
|| dep_memory
== MEMORY_BOTH
)
11578 cost
+= (dep_insn_type
!= TYPE_IMOV
) ? 2 : 1;
11580 /* INT->FP conversion is expensive. */
11581 if (get_attr_fp_int_src (dep_insn
))
11584 /* Show ability of reorder buffer to hide latency of load by executing
11585 in parallel with previous instruction in case
11586 previous instruction is not needed to compute the address. */
11587 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
11588 && !ix86_agi_dependant (insn
, dep_insn
, insn_type
))
11590 /* Claim moves to take one cycle, as core can issue one load
11591 at time and the next load can start cycle later. */
11592 if (dep_insn_type
== TYPE_IMOV
11593 || dep_insn_type
== TYPE_FMOV
)
11602 case PROCESSOR_ATHLON
:
11603 memory
= get_attr_memory (insn
);
11604 dep_memory
= get_attr_memory (dep_insn
);
11606 if (dep_memory
== MEMORY_LOAD
|| dep_memory
== MEMORY_BOTH
)
11608 if (dep_insn_type
== TYPE_IMOV
|| dep_insn_type
== TYPE_FMOV
)
11613 /* Show ability of reorder buffer to hide latency of load by executing
11614 in parallel with previous instruction in case
11615 previous instruction is not needed to compute the address. */
11616 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
11617 && !ix86_agi_dependant (insn
, dep_insn
, insn_type
))
11619 /* Claim moves to take one cycle, as core can issue one load
11620 at time and the next load can start cycle later. */
11621 if (dep_insn_type
== TYPE_IMOV
11622 || dep_insn_type
== TYPE_FMOV
)
11624 else if (cost
>= 3)
11639 struct ppro_sched_data
11642 int issued_this_cycle
;
11646 static enum attr_ppro_uops
11647 ix86_safe_ppro_uops (insn
)
11650 if (recog_memoized (insn
) >= 0)
11651 return get_attr_ppro_uops (insn
);
11653 return PPRO_UOPS_MANY
;
11657 ix86_dump_ppro_packet (dump
)
11660 if (ix86_sched_data
.ppro
.decode
[0])
11662 fprintf (dump
, "PPRO packet: %d",
11663 INSN_UID (ix86_sched_data
.ppro
.decode
[0]));
11664 if (ix86_sched_data
.ppro
.decode
[1])
11665 fprintf (dump
, " %d", INSN_UID (ix86_sched_data
.ppro
.decode
[1]));
11666 if (ix86_sched_data
.ppro
.decode
[2])
11667 fprintf (dump
, " %d", INSN_UID (ix86_sched_data
.ppro
.decode
[2]));
11668 fputc ('\n', dump
);
11672 /* We're beginning a new block. Initialize data structures as necessary. */
11675 ix86_sched_init (dump
, sched_verbose
, veclen
)
11676 FILE *dump ATTRIBUTE_UNUSED
;
11677 int sched_verbose ATTRIBUTE_UNUSED
;
11678 int veclen ATTRIBUTE_UNUSED
;
11680 memset (&ix86_sched_data
, 0, sizeof (ix86_sched_data
));
11683 /* Shift INSN to SLOT, and shift everything else down. */
11686 ix86_reorder_insn (insnp
, slot
)
11693 insnp
[0] = insnp
[1];
11694 while (++insnp
!= slot
);
11700 ix86_sched_reorder_ppro (ready
, e_ready
)
11705 enum attr_ppro_uops cur_uops
;
11706 int issued_this_cycle
;
11710 /* At this point .ppro.decode contains the state of the three
11711 decoders from last "cycle". That is, those insns that were
11712 actually independent. But here we're scheduling for the
11713 decoder, and we may find things that are decodable in the
11716 memcpy (decode
, ix86_sched_data
.ppro
.decode
, sizeof (decode
));
11717 issued_this_cycle
= 0;
11720 cur_uops
= ix86_safe_ppro_uops (*insnp
);
11722 /* If the decoders are empty, and we've a complex insn at the
11723 head of the priority queue, let it issue without complaint. */
11724 if (decode
[0] == NULL
)
11726 if (cur_uops
== PPRO_UOPS_MANY
)
11728 decode
[0] = *insnp
;
11732 /* Otherwise, search for a 2-4 uop unsn to issue. */
11733 while (cur_uops
!= PPRO_UOPS_FEW
)
11735 if (insnp
== ready
)
11737 cur_uops
= ix86_safe_ppro_uops (*--insnp
);
11740 /* If so, move it to the head of the line. */
11741 if (cur_uops
== PPRO_UOPS_FEW
)
11742 ix86_reorder_insn (insnp
, e_ready
);
11744 /* Issue the head of the queue. */
11745 issued_this_cycle
= 1;
11746 decode
[0] = *e_ready
--;
11749 /* Look for simple insns to fill in the other two slots. */
11750 for (i
= 1; i
< 3; ++i
)
11751 if (decode
[i
] == NULL
)
11753 if (ready
> e_ready
)
11757 cur_uops
= ix86_safe_ppro_uops (*insnp
);
11758 while (cur_uops
!= PPRO_UOPS_ONE
)
11760 if (insnp
== ready
)
11762 cur_uops
= ix86_safe_ppro_uops (*--insnp
);
11765 /* Found one. Move it to the head of the queue and issue it. */
11766 if (cur_uops
== PPRO_UOPS_ONE
)
11768 ix86_reorder_insn (insnp
, e_ready
);
11769 decode
[i
] = *e_ready
--;
11770 issued_this_cycle
++;
11774 /* ??? Didn't find one. Ideally, here we would do a lazy split
11775 of 2-uop insns, issue one and queue the other. */
11779 if (issued_this_cycle
== 0)
11780 issued_this_cycle
= 1;
11781 ix86_sched_data
.ppro
.issued_this_cycle
= issued_this_cycle
;
11784 /* We are about to being issuing insns for this clock cycle.
11785 Override the default sort algorithm to better slot instructions. */
11787 ix86_sched_reorder (dump
, sched_verbose
, ready
, n_readyp
, clock_var
)
11788 FILE *dump ATTRIBUTE_UNUSED
;
11789 int sched_verbose ATTRIBUTE_UNUSED
;
11792 int clock_var ATTRIBUTE_UNUSED
;
11794 int n_ready
= *n_readyp
;
11795 rtx
*e_ready
= ready
+ n_ready
- 1;
11797 /* Make sure to go ahead and initialize key items in
11798 ix86_sched_data if we are not going to bother trying to
11799 reorder the ready queue. */
11802 ix86_sched_data
.ppro
.issued_this_cycle
= 1;
11811 case PROCESSOR_PENTIUMPRO
:
11812 ix86_sched_reorder_ppro (ready
, e_ready
);
11817 return ix86_issue_rate ();
11820 /* We are about to issue INSN. Return the number of insns left on the
11821 ready queue that can be issued this cycle. */
11824 ix86_variable_issue (dump
, sched_verbose
, insn
, can_issue_more
)
11828 int can_issue_more
;
11834 return can_issue_more
- 1;
11836 case PROCESSOR_PENTIUMPRO
:
11838 enum attr_ppro_uops uops
= ix86_safe_ppro_uops (insn
);
11840 if (uops
== PPRO_UOPS_MANY
)
11843 ix86_dump_ppro_packet (dump
);
11844 ix86_sched_data
.ppro
.decode
[0] = insn
;
11845 ix86_sched_data
.ppro
.decode
[1] = NULL
;
11846 ix86_sched_data
.ppro
.decode
[2] = NULL
;
11848 ix86_dump_ppro_packet (dump
);
11849 ix86_sched_data
.ppro
.decode
[0] = NULL
;
11851 else if (uops
== PPRO_UOPS_FEW
)
11854 ix86_dump_ppro_packet (dump
);
11855 ix86_sched_data
.ppro
.decode
[0] = insn
;
11856 ix86_sched_data
.ppro
.decode
[1] = NULL
;
11857 ix86_sched_data
.ppro
.decode
[2] = NULL
;
11861 for (i
= 0; i
< 3; ++i
)
11862 if (ix86_sched_data
.ppro
.decode
[i
] == NULL
)
11864 ix86_sched_data
.ppro
.decode
[i
] = insn
;
11872 ix86_dump_ppro_packet (dump
);
11873 ix86_sched_data
.ppro
.decode
[0] = NULL
;
11874 ix86_sched_data
.ppro
.decode
[1] = NULL
;
11875 ix86_sched_data
.ppro
.decode
[2] = NULL
;
11879 return --ix86_sched_data
.ppro
.issued_this_cycle
;
11884 ia32_use_dfa_pipeline_interface ()
11886 if (ix86_cpu
== PROCESSOR_PENTIUM
)
11891 /* How many alternative schedules to try. This should be as wide as the
11892 scheduling freedom in the DFA, but no wider. Making this value too
11893 large results extra work for the scheduler. */
11896 ia32_multipass_dfa_lookahead ()
11898 if (ix86_cpu
== PROCESSOR_PENTIUM
)
11905 /* Walk through INSNS and look for MEM references whose address is DSTREG or
11906 SRCREG and set the memory attribute to those of DSTREF and SRCREF, as
11910 ix86_set_move_mem_attrs (insns
, dstref
, srcref
, dstreg
, srcreg
)
11912 rtx dstref
, srcref
, dstreg
, srcreg
;
11916 for (insn
= insns
; insn
!= 0 ; insn
= NEXT_INSN (insn
))
11918 ix86_set_move_mem_attrs_1 (PATTERN (insn
), dstref
, srcref
,
11922 /* Subroutine of above to actually do the updating by recursively walking
11926 ix86_set_move_mem_attrs_1 (x
, dstref
, srcref
, dstreg
, srcreg
)
11928 rtx dstref
, srcref
, dstreg
, srcreg
;
11930 enum rtx_code code
= GET_CODE (x
);
11931 const char *format_ptr
= GET_RTX_FORMAT (code
);
11934 if (code
== MEM
&& XEXP (x
, 0) == dstreg
)
11935 MEM_COPY_ATTRIBUTES (x
, dstref
);
11936 else if (code
== MEM
&& XEXP (x
, 0) == srcreg
)
11937 MEM_COPY_ATTRIBUTES (x
, srcref
);
11939 for (i
= 0; i
< GET_RTX_LENGTH (code
); i
++, format_ptr
++)
11941 if (*format_ptr
== 'e')
11942 ix86_set_move_mem_attrs_1 (XEXP (x
, i
), dstref
, srcref
,
11944 else if (*format_ptr
== 'E')
11945 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
11946 ix86_set_move_mem_attrs_1 (XVECEXP (x
, i
, j
), dstref
, srcref
,
11951 /* Compute the alignment given to a constant that is being placed in memory.
11952 EXP is the constant and ALIGN is the alignment that the object would
11954 The value of this function is used instead of that alignment to align
11958 ix86_constant_alignment (exp
, align
)
11962 if (TREE_CODE (exp
) == REAL_CST
)
11964 if (TYPE_MODE (TREE_TYPE (exp
)) == DFmode
&& align
< 64)
11966 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp
))) && align
< 128)
11969 else if (TREE_CODE (exp
) == STRING_CST
&& TREE_STRING_LENGTH (exp
) >= 31
11976 /* Compute the alignment for a static variable.
11977 TYPE is the data type, and ALIGN is the alignment that
11978 the object would ordinarily have. The value of this function is used
11979 instead of that alignment to align the object. */
11982 ix86_data_alignment (type
, align
)
11986 if (AGGREGATE_TYPE_P (type
)
11987 && TYPE_SIZE (type
)
11988 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
11989 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 256
11990 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 256)
11993 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
11994 to 16byte boundary. */
11997 if (AGGREGATE_TYPE_P (type
)
11998 && TYPE_SIZE (type
)
11999 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
12000 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 128
12001 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
12005 if (TREE_CODE (type
) == ARRAY_TYPE
)
12007 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
12009 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
12012 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
12015 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
12017 if (TYPE_MODE (type
) == XCmode
&& align
< 128)
12020 else if ((TREE_CODE (type
) == RECORD_TYPE
12021 || TREE_CODE (type
) == UNION_TYPE
12022 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
12023 && TYPE_FIELDS (type
))
12025 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
12027 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
12030 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
12031 || TREE_CODE (type
) == INTEGER_TYPE
)
12033 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
12035 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
12042 /* Compute the alignment for a local variable.
12043 TYPE is the data type, and ALIGN is the alignment that
12044 the object would ordinarily have. The value of this macro is used
12045 instead of that alignment to align the object. */
12048 ix86_local_alignment (type
, align
)
12052 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12053 to 16byte boundary. */
12056 if (AGGREGATE_TYPE_P (type
)
12057 && TYPE_SIZE (type
)
12058 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
12059 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 16
12060 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
12063 if (TREE_CODE (type
) == ARRAY_TYPE
)
12065 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
12067 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
12070 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
12072 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
12074 if (TYPE_MODE (type
) == XCmode
&& align
< 128)
12077 else if ((TREE_CODE (type
) == RECORD_TYPE
12078 || TREE_CODE (type
) == UNION_TYPE
12079 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
12080 && TYPE_FIELDS (type
))
12082 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
12084 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
12087 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
12088 || TREE_CODE (type
) == INTEGER_TYPE
)
12091 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
12093 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
12099 /* Emit RTL insns to initialize the variable parts of a trampoline.
12100 FNADDR is an RTX for the address of the function's pure code.
12101 CXT is an RTX for the static chain value for the function. */
12103 x86_initialize_trampoline (tramp
, fnaddr
, cxt
)
12104 rtx tramp
, fnaddr
, cxt
;
12108 /* Compute offset from the end of the jmp to the target function. */
12109 rtx disp
= expand_binop (SImode
, sub_optab
, fnaddr
,
12110 plus_constant (tramp
, 10),
12111 NULL_RTX
, 1, OPTAB_DIRECT
);
12112 emit_move_insn (gen_rtx_MEM (QImode
, tramp
),
12113 gen_int_mode (0xb9, QImode
));
12114 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, 1)), cxt
);
12115 emit_move_insn (gen_rtx_MEM (QImode
, plus_constant (tramp
, 5)),
12116 gen_int_mode (0xe9, QImode
));
12117 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, 6)), disp
);
12122 /* Try to load address using shorter movl instead of movabs.
12123 We may want to support movq for kernel mode, but kernel does not use
12124 trampolines at the moment. */
12125 if (x86_64_zero_extended_value (fnaddr
))
12127 fnaddr
= copy_to_mode_reg (DImode
, fnaddr
);
12128 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
12129 gen_int_mode (0xbb41, HImode
));
12130 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, offset
+ 2)),
12131 gen_lowpart (SImode
, fnaddr
));
12136 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
12137 gen_int_mode (0xbb49, HImode
));
12138 emit_move_insn (gen_rtx_MEM (DImode
, plus_constant (tramp
, offset
+ 2)),
12142 /* Load static chain using movabs to r10. */
12143 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
12144 gen_int_mode (0xba49, HImode
));
12145 emit_move_insn (gen_rtx_MEM (DImode
, plus_constant (tramp
, offset
+ 2)),
12148 /* Jump to the r11 */
12149 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
12150 gen_int_mode (0xff49, HImode
));
12151 emit_move_insn (gen_rtx_MEM (QImode
, plus_constant (tramp
, offset
+2)),
12152 gen_int_mode (0xe3, QImode
));
12154 if (offset
> TRAMPOLINE_SIZE
)
12158 #ifdef TRANSFER_FROM_TRAMPOLINE
12159 emit_library_call (gen_rtx (SYMBOL_REF
, Pmode
, "__enable_execute_stack"),
12160 LCT_NORMAL
, VOIDmode
, 1, tramp
, Pmode
);
12164 #define def_builtin(MASK, NAME, TYPE, CODE) \
12166 if ((MASK) & target_flags \
12167 && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \
12168 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
12169 NULL, NULL_TREE); \
12172 struct builtin_description
12174 const unsigned int mask
;
12175 const enum insn_code icode
;
12176 const char *const name
;
12177 const enum ix86_builtins code
;
12178 const enum rtx_code comparison
;
12179 const unsigned int flag
;
12182 /* Used for builtins that are enabled both by -msse and -msse2. */
12183 #define MASK_SSE1 (MASK_SSE | MASK_SSE2)
12184 #define MASK_SSE164 (MASK_SSE | MASK_SSE2 | MASK_64BIT)
12185 #define MASK_SSE264 (MASK_SSE2 | MASK_64BIT)
12187 static const struct builtin_description bdesc_comi
[] =
12189 { MASK_SSE1
, CODE_FOR_sse_comi
, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS
, UNEQ
, 0 },
12190 { MASK_SSE1
, CODE_FOR_sse_comi
, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS
, UNLT
, 0 },
12191 { MASK_SSE1
, CODE_FOR_sse_comi
, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS
, UNLE
, 0 },
12192 { MASK_SSE1
, CODE_FOR_sse_comi
, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS
, GT
, 0 },
12193 { MASK_SSE1
, CODE_FOR_sse_comi
, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS
, GE
, 0 },
12194 { MASK_SSE1
, CODE_FOR_sse_comi
, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS
, LTGT
, 0 },
12195 { MASK_SSE1
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS
, UNEQ
, 0 },
12196 { MASK_SSE1
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS
, UNLT
, 0 },
12197 { MASK_SSE1
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS
, UNLE
, 0 },
12198 { MASK_SSE1
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS
, GT
, 0 },
12199 { MASK_SSE1
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS
, GE
, 0 },
12200 { MASK_SSE1
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS
, LTGT
, 0 },
12201 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD
, UNEQ
, 0 },
12202 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD
, UNLT
, 0 },
12203 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD
, UNLE
, 0 },
12204 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD
, GT
, 0 },
12205 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD
, GE
, 0 },
12206 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD
, LTGT
, 0 },
12207 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD
, UNEQ
, 0 },
12208 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD
, UNLT
, 0 },
12209 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD
, UNLE
, 0 },
12210 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD
, GT
, 0 },
12211 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD
, GE
, 0 },
12212 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD
, LTGT
, 0 },
12215 static const struct builtin_description bdesc_2arg
[] =
12218 { MASK_SSE1
, CODE_FOR_addv4sf3
, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS
, 0, 0 },
12219 { MASK_SSE1
, CODE_FOR_subv4sf3
, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS
, 0, 0 },
12220 { MASK_SSE1
, CODE_FOR_mulv4sf3
, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS
, 0, 0 },
12221 { MASK_SSE1
, CODE_FOR_divv4sf3
, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS
, 0, 0 },
12222 { MASK_SSE1
, CODE_FOR_vmaddv4sf3
, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS
, 0, 0 },
12223 { MASK_SSE1
, CODE_FOR_vmsubv4sf3
, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS
, 0, 0 },
12224 { MASK_SSE1
, CODE_FOR_vmmulv4sf3
, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS
, 0, 0 },
12225 { MASK_SSE1
, CODE_FOR_vmdivv4sf3
, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS
, 0, 0 },
12227 { MASK_SSE1
, CODE_FOR_maskcmpv4sf3
, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS
, EQ
, 0 },
12228 { MASK_SSE1
, CODE_FOR_maskcmpv4sf3
, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS
, LT
, 0 },
12229 { MASK_SSE1
, CODE_FOR_maskcmpv4sf3
, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS
, LE
, 0 },
12230 { MASK_SSE1
, CODE_FOR_maskcmpv4sf3
, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS
, LT
, 1 },
12231 { MASK_SSE1
, CODE_FOR_maskcmpv4sf3
, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS
, LE
, 1 },
12232 { MASK_SSE1
, CODE_FOR_maskcmpv4sf3
, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS
, UNORDERED
, 0 },
12233 { MASK_SSE1
, CODE_FOR_maskncmpv4sf3
, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS
, EQ
, 0 },
12234 { MASK_SSE1
, CODE_FOR_maskncmpv4sf3
, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS
, LT
, 0 },
12235 { MASK_SSE1
, CODE_FOR_maskncmpv4sf3
, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS
, LE
, 0 },
12236 { MASK_SSE1
, CODE_FOR_maskncmpv4sf3
, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS
, LT
, 1 },
12237 { MASK_SSE1
, CODE_FOR_maskncmpv4sf3
, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS
, LE
, 1 },
12238 { MASK_SSE1
, CODE_FOR_maskncmpv4sf3
, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS
, UNORDERED
, 0 },
12239 { MASK_SSE1
, CODE_FOR_vmmaskcmpv4sf3
, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS
, EQ
, 0 },
12240 { MASK_SSE1
, CODE_FOR_vmmaskcmpv4sf3
, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS
, LT
, 0 },
12241 { MASK_SSE1
, CODE_FOR_vmmaskcmpv4sf3
, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS
, LE
, 0 },
12242 { MASK_SSE1
, CODE_FOR_vmmaskcmpv4sf3
, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS
, UNORDERED
, 0 },
12243 { MASK_SSE1
, CODE_FOR_vmmaskncmpv4sf3
, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS
, EQ
, 0 },
12244 { MASK_SSE1
, CODE_FOR_vmmaskncmpv4sf3
, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS
, LT
, 0 },
12245 { MASK_SSE1
, CODE_FOR_vmmaskncmpv4sf3
, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS
, LE
, 0 },
12246 { MASK_SSE1
, CODE_FOR_vmmaskncmpv4sf3
, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS
, UNORDERED
, 0 },
12248 { MASK_SSE1
, CODE_FOR_sminv4sf3
, "__builtin_ia32_minps", IX86_BUILTIN_MINPS
, 0, 0 },
12249 { MASK_SSE1
, CODE_FOR_smaxv4sf3
, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS
, 0, 0 },
12250 { MASK_SSE1
, CODE_FOR_vmsminv4sf3
, "__builtin_ia32_minss", IX86_BUILTIN_MINSS
, 0, 0 },
12251 { MASK_SSE1
, CODE_FOR_vmsmaxv4sf3
, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS
, 0, 0 },
12253 { MASK_SSE1
, CODE_FOR_sse_andv4sf3
, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS
, 0, 0 },
12254 { MASK_SSE1
, CODE_FOR_sse_nandv4sf3
, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS
, 0, 0 },
12255 { MASK_SSE1
, CODE_FOR_sse_iorv4sf3
, "__builtin_ia32_orps", IX86_BUILTIN_ORPS
, 0, 0 },
12256 { MASK_SSE1
, CODE_FOR_sse_xorv4sf3
, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS
, 0, 0 },
12258 { MASK_SSE1
, CODE_FOR_sse_movss
, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS
, 0, 0 },
12259 { MASK_SSE1
, CODE_FOR_sse_movhlps
, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS
, 0, 0 },
12260 { MASK_SSE1
, CODE_FOR_sse_movlhps
, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS
, 0, 0 },
12261 { MASK_SSE1
, CODE_FOR_sse_unpckhps
, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS
, 0, 0 },
12262 { MASK_SSE1
, CODE_FOR_sse_unpcklps
, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS
, 0, 0 },
12265 { MASK_MMX
, CODE_FOR_addv8qi3
, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB
, 0, 0 },
12266 { MASK_MMX
, CODE_FOR_addv4hi3
, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW
, 0, 0 },
12267 { MASK_MMX
, CODE_FOR_addv2si3
, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD
, 0, 0 },
12268 { MASK_MMX
, CODE_FOR_mmx_adddi3
, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ
, 0, 0 },
12269 { MASK_MMX
, CODE_FOR_subv8qi3
, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB
, 0, 0 },
12270 { MASK_MMX
, CODE_FOR_subv4hi3
, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW
, 0, 0 },
12271 { MASK_MMX
, CODE_FOR_subv2si3
, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD
, 0, 0 },
12272 { MASK_MMX
, CODE_FOR_mmx_subdi3
, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ
, 0, 0 },
12274 { MASK_MMX
, CODE_FOR_ssaddv8qi3
, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB
, 0, 0 },
12275 { MASK_MMX
, CODE_FOR_ssaddv4hi3
, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW
, 0, 0 },
12276 { MASK_MMX
, CODE_FOR_sssubv8qi3
, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB
, 0, 0 },
12277 { MASK_MMX
, CODE_FOR_sssubv4hi3
, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW
, 0, 0 },
12278 { MASK_MMX
, CODE_FOR_usaddv8qi3
, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB
, 0, 0 },
12279 { MASK_MMX
, CODE_FOR_usaddv4hi3
, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW
, 0, 0 },
12280 { MASK_MMX
, CODE_FOR_ussubv8qi3
, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB
, 0, 0 },
12281 { MASK_MMX
, CODE_FOR_ussubv4hi3
, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW
, 0, 0 },
12283 { MASK_MMX
, CODE_FOR_mulv4hi3
, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW
, 0, 0 },
12284 { MASK_MMX
, CODE_FOR_smulv4hi3_highpart
, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW
, 0, 0 },
12285 { MASK_SSE1
| MASK_3DNOW_A
, CODE_FOR_umulv4hi3_highpart
, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW
, 0, 0 },
12287 { MASK_MMX
, CODE_FOR_mmx_anddi3
, "__builtin_ia32_pand", IX86_BUILTIN_PAND
, 0, 0 },
12288 { MASK_MMX
, CODE_FOR_mmx_nanddi3
, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN
, 0, 0 },
12289 { MASK_MMX
, CODE_FOR_mmx_iordi3
, "__builtin_ia32_por", IX86_BUILTIN_POR
, 0, 0 },
12290 { MASK_MMX
, CODE_FOR_mmx_xordi3
, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR
, 0, 0 },
12292 { MASK_SSE1
| MASK_3DNOW_A
, CODE_FOR_mmx_uavgv8qi3
, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB
, 0, 0 },
12293 { MASK_SSE1
| MASK_3DNOW_A
, CODE_FOR_mmx_uavgv4hi3
, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW
, 0, 0 },
12295 { MASK_MMX
, CODE_FOR_eqv8qi3
, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB
, 0, 0 },
12296 { MASK_MMX
, CODE_FOR_eqv4hi3
, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW
, 0, 0 },
12297 { MASK_MMX
, CODE_FOR_eqv2si3
, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD
, 0, 0 },
12298 { MASK_MMX
, CODE_FOR_gtv8qi3
, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB
, 0, 0 },
12299 { MASK_MMX
, CODE_FOR_gtv4hi3
, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW
, 0, 0 },
12300 { MASK_MMX
, CODE_FOR_gtv2si3
, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD
, 0, 0 },
12302 { MASK_SSE1
| MASK_3DNOW_A
, CODE_FOR_umaxv8qi3
, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB
, 0, 0 },
12303 { MASK_SSE1
| MASK_3DNOW_A
, CODE_FOR_smaxv4hi3
, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW
, 0, 0 },
12304 { MASK_SSE1
| MASK_3DNOW_A
, CODE_FOR_uminv8qi3
, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB
, 0, 0 },
12305 { MASK_SSE1
| MASK_3DNOW_A
, CODE_FOR_sminv4hi3
, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW
, 0, 0 },
12307 { MASK_MMX
, CODE_FOR_mmx_punpckhbw
, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW
, 0, 0 },
12308 { MASK_MMX
, CODE_FOR_mmx_punpckhwd
, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD
, 0, 0 },
12309 { MASK_MMX
, CODE_FOR_mmx_punpckhdq
, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ
, 0, 0 },
12310 { MASK_MMX
, CODE_FOR_mmx_punpcklbw
, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW
, 0, 0 },
12311 { MASK_MMX
, CODE_FOR_mmx_punpcklwd
, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD
, 0, 0 },
12312 { MASK_MMX
, CODE_FOR_mmx_punpckldq
, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ
, 0, 0 },
12315 { MASK_MMX
, CODE_FOR_mmx_packsswb
, 0, IX86_BUILTIN_PACKSSWB
, 0, 0 },
12316 { MASK_MMX
, CODE_FOR_mmx_packssdw
, 0, IX86_BUILTIN_PACKSSDW
, 0, 0 },
12317 { MASK_MMX
, CODE_FOR_mmx_packuswb
, 0, IX86_BUILTIN_PACKUSWB
, 0, 0 },
12319 { MASK_SSE1
, CODE_FOR_cvtpi2ps
, 0, IX86_BUILTIN_CVTPI2PS
, 0, 0 },
12320 { MASK_SSE1
, CODE_FOR_cvtsi2ss
, 0, IX86_BUILTIN_CVTSI2SS
, 0, 0 },
12321 { MASK_SSE164
, CODE_FOR_cvtsi2ssq
, 0, IX86_BUILTIN_CVTSI642SS
, 0, 0 },
12323 { MASK_MMX
, CODE_FOR_ashlv4hi3
, 0, IX86_BUILTIN_PSLLW
, 0, 0 },
12324 { MASK_MMX
, CODE_FOR_ashlv4hi3
, 0, IX86_BUILTIN_PSLLWI
, 0, 0 },
12325 { MASK_MMX
, CODE_FOR_ashlv2si3
, 0, IX86_BUILTIN_PSLLD
, 0, 0 },
12326 { MASK_MMX
, CODE_FOR_ashlv2si3
, 0, IX86_BUILTIN_PSLLDI
, 0, 0 },
12327 { MASK_MMX
, CODE_FOR_mmx_ashldi3
, 0, IX86_BUILTIN_PSLLQ
, 0, 0 },
12328 { MASK_MMX
, CODE_FOR_mmx_ashldi3
, 0, IX86_BUILTIN_PSLLQI
, 0, 0 },
12330 { MASK_MMX
, CODE_FOR_lshrv4hi3
, 0, IX86_BUILTIN_PSRLW
, 0, 0 },
12331 { MASK_MMX
, CODE_FOR_lshrv4hi3
, 0, IX86_BUILTIN_PSRLWI
, 0, 0 },
12332 { MASK_MMX
, CODE_FOR_lshrv2si3
, 0, IX86_BUILTIN_PSRLD
, 0, 0 },
12333 { MASK_MMX
, CODE_FOR_lshrv2si3
, 0, IX86_BUILTIN_PSRLDI
, 0, 0 },
12334 { MASK_MMX
, CODE_FOR_mmx_lshrdi3
, 0, IX86_BUILTIN_PSRLQ
, 0, 0 },
12335 { MASK_MMX
, CODE_FOR_mmx_lshrdi3
, 0, IX86_BUILTIN_PSRLQI
, 0, 0 },
12337 { MASK_MMX
, CODE_FOR_ashrv4hi3
, 0, IX86_BUILTIN_PSRAW
, 0, 0 },
12338 { MASK_MMX
, CODE_FOR_ashrv4hi3
, 0, IX86_BUILTIN_PSRAWI
, 0, 0 },
12339 { MASK_MMX
, CODE_FOR_ashrv2si3
, 0, IX86_BUILTIN_PSRAD
, 0, 0 },
12340 { MASK_MMX
, CODE_FOR_ashrv2si3
, 0, IX86_BUILTIN_PSRADI
, 0, 0 },
12342 { MASK_SSE1
| MASK_3DNOW_A
, CODE_FOR_mmx_psadbw
, 0, IX86_BUILTIN_PSADBW
, 0, 0 },
12343 { MASK_MMX
, CODE_FOR_mmx_pmaddwd
, 0, IX86_BUILTIN_PMADDWD
, 0, 0 },
12346 { MASK_SSE2
, CODE_FOR_addv2df3
, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD
, 0, 0 },
12347 { MASK_SSE2
, CODE_FOR_subv2df3
, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD
, 0, 0 },
12348 { MASK_SSE2
, CODE_FOR_mulv2df3
, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD
, 0, 0 },
12349 { MASK_SSE2
, CODE_FOR_divv2df3
, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD
, 0, 0 },
12350 { MASK_SSE2
, CODE_FOR_vmaddv2df3
, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD
, 0, 0 },
12351 { MASK_SSE2
, CODE_FOR_vmsubv2df3
, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD
, 0, 0 },
12352 { MASK_SSE2
, CODE_FOR_vmmulv2df3
, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD
, 0, 0 },
12353 { MASK_SSE2
, CODE_FOR_vmdivv2df3
, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD
, 0, 0 },
12355 { MASK_SSE2
, CODE_FOR_maskcmpv2df3
, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD
, EQ
, 0 },
12356 { MASK_SSE2
, CODE_FOR_maskcmpv2df3
, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD
, LT
, 0 },
12357 { MASK_SSE2
, CODE_FOR_maskcmpv2df3
, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD
, LE
, 0 },
12358 { MASK_SSE2
, CODE_FOR_maskcmpv2df3
, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD
, LT
, 1 },
12359 { MASK_SSE2
, CODE_FOR_maskcmpv2df3
, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD
, LE
, 1 },
12360 { MASK_SSE2
, CODE_FOR_maskcmpv2df3
, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD
, UNORDERED
, 0 },
12361 { MASK_SSE2
, CODE_FOR_maskncmpv2df3
, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD
, EQ
, 0 },
12362 { MASK_SSE2
, CODE_FOR_maskncmpv2df3
, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD
, LT
, 0 },
12363 { MASK_SSE2
, CODE_FOR_maskncmpv2df3
, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD
, LE
, 0 },
12364 { MASK_SSE2
, CODE_FOR_maskncmpv2df3
, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD
, LT
, 1 },
12365 { MASK_SSE2
, CODE_FOR_maskncmpv2df3
, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD
, LE
, 1 },
12366 { MASK_SSE2
, CODE_FOR_maskncmpv2df3
, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD
, UNORDERED
, 0 },
12367 { MASK_SSE2
, CODE_FOR_vmmaskcmpv2df3
, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD
, EQ
, 0 },
12368 { MASK_SSE2
, CODE_FOR_vmmaskcmpv2df3
, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD
, LT
, 0 },
12369 { MASK_SSE2
, CODE_FOR_vmmaskcmpv2df3
, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD
, LE
, 0 },
12370 { MASK_SSE2
, CODE_FOR_vmmaskcmpv2df3
, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD
, UNORDERED
, 0 },
12371 { MASK_SSE2
, CODE_FOR_vmmaskncmpv2df3
, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD
, EQ
, 0 },
12372 { MASK_SSE2
, CODE_FOR_vmmaskncmpv2df3
, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD
, LT
, 0 },
12373 { MASK_SSE2
, CODE_FOR_vmmaskncmpv2df3
, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD
, LE
, 0 },
12374 { MASK_SSE2
, CODE_FOR_vmmaskncmpv2df3
, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD
, UNORDERED
, 0 },
12376 { MASK_SSE2
, CODE_FOR_sminv2df3
, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD
, 0, 0 },
12377 { MASK_SSE2
, CODE_FOR_smaxv2df3
, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD
, 0, 0 },
12378 { MASK_SSE2
, CODE_FOR_vmsminv2df3
, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD
, 0, 0 },
12379 { MASK_SSE2
, CODE_FOR_vmsmaxv2df3
, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD
, 0, 0 },
12381 { MASK_SSE2
, CODE_FOR_sse2_andv2df3
, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD
, 0, 0 },
12382 { MASK_SSE2
, CODE_FOR_sse2_nandv2df3
, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD
, 0, 0 },
12383 { MASK_SSE2
, CODE_FOR_sse2_iorv2df3
, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD
, 0, 0 },
12384 { MASK_SSE2
, CODE_FOR_sse2_xorv2df3
, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD
, 0, 0 },
12386 { MASK_SSE2
, CODE_FOR_sse2_movsd
, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD
, 0, 0 },
12387 { MASK_SSE2
, CODE_FOR_sse2_unpckhpd
, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD
, 0, 0 },
12388 { MASK_SSE2
, CODE_FOR_sse2_unpcklpd
, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD
, 0, 0 },
12391 { MASK_SSE2
, CODE_FOR_addv16qi3
, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128
, 0, 0 },
12392 { MASK_SSE2
, CODE_FOR_addv8hi3
, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128
, 0, 0 },
12393 { MASK_SSE2
, CODE_FOR_addv4si3
, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128
, 0, 0 },
12394 { MASK_SSE2
, CODE_FOR_addv2di3
, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128
, 0, 0 },
12395 { MASK_SSE2
, CODE_FOR_subv16qi3
, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128
, 0, 0 },
12396 { MASK_SSE2
, CODE_FOR_subv8hi3
, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128
, 0, 0 },
12397 { MASK_SSE2
, CODE_FOR_subv4si3
, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128
, 0, 0 },
12398 { MASK_SSE2
, CODE_FOR_subv2di3
, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128
, 0, 0 },
12400 { MASK_MMX
, CODE_FOR_ssaddv16qi3
, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128
, 0, 0 },
12401 { MASK_MMX
, CODE_FOR_ssaddv8hi3
, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128
, 0, 0 },
12402 { MASK_MMX
, CODE_FOR_sssubv16qi3
, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128
, 0, 0 },
12403 { MASK_MMX
, CODE_FOR_sssubv8hi3
, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128
, 0, 0 },
12404 { MASK_MMX
, CODE_FOR_usaddv16qi3
, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128
, 0, 0 },
12405 { MASK_MMX
, CODE_FOR_usaddv8hi3
, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128
, 0, 0 },
12406 { MASK_MMX
, CODE_FOR_ussubv16qi3
, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128
, 0, 0 },
12407 { MASK_MMX
, CODE_FOR_ussubv8hi3
, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128
, 0, 0 },
12409 { MASK_SSE2
, CODE_FOR_mulv8hi3
, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128
, 0, 0 },
12410 { MASK_SSE2
, CODE_FOR_smulv8hi3_highpart
, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128
, 0, 0 },
12411 { MASK_SSE2
, CODE_FOR_sse2_umulsidi3
, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ
, 0, 0 },
12412 { MASK_SSE2
, CODE_FOR_sse2_umulv2siv2di3
, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128
, 0, 0 },
12414 { MASK_SSE2
, CODE_FOR_sse2_andv2di3
, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128
, 0, 0 },
12415 { MASK_SSE2
, CODE_FOR_sse2_nandv2di3
, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128
, 0, 0 },
12416 { MASK_SSE2
, CODE_FOR_sse2_iorv2di3
, "__builtin_ia32_por128", IX86_BUILTIN_POR128
, 0, 0 },
12417 { MASK_SSE2
, CODE_FOR_sse2_xorv2di3
, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128
, 0, 0 },
12419 { MASK_SSE2
, CODE_FOR_sse2_uavgv16qi3
, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128
, 0, 0 },
12420 { MASK_SSE2
, CODE_FOR_sse2_uavgv8hi3
, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128
, 0, 0 },
12422 { MASK_SSE2
, CODE_FOR_eqv16qi3
, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128
, 0, 0 },
12423 { MASK_SSE2
, CODE_FOR_eqv8hi3
, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128
, 0, 0 },
12424 { MASK_SSE2
, CODE_FOR_eqv4si3
, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128
, 0, 0 },
12425 { MASK_SSE2
, CODE_FOR_gtv16qi3
, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128
, 0, 0 },
12426 { MASK_SSE2
, CODE_FOR_gtv8hi3
, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128
, 0, 0 },
12427 { MASK_SSE2
, CODE_FOR_gtv4si3
, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128
, 0, 0 },
12429 { MASK_SSE2
, CODE_FOR_umaxv16qi3
, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128
, 0, 0 },
12430 { MASK_SSE2
, CODE_FOR_smaxv8hi3
, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128
, 0, 0 },
12431 { MASK_SSE2
, CODE_FOR_uminv16qi3
, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128
, 0, 0 },
12432 { MASK_SSE2
, CODE_FOR_sminv8hi3
, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128
, 0, 0 },
12434 { MASK_SSE2
, CODE_FOR_sse2_punpckhbw
, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128
, 0, 0 },
12435 { MASK_SSE2
, CODE_FOR_sse2_punpckhwd
, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128
, 0, 0 },
12436 { MASK_SSE2
, CODE_FOR_sse2_punpckhdq
, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128
, 0, 0 },
12437 { MASK_SSE2
, CODE_FOR_sse2_punpckhqdq
, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128
, 0, 0 },
12438 { MASK_SSE2
, CODE_FOR_sse2_punpcklbw
, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128
, 0, 0 },
12439 { MASK_SSE2
, CODE_FOR_sse2_punpcklwd
, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128
, 0, 0 },
12440 { MASK_SSE2
, CODE_FOR_sse2_punpckldq
, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128
, 0, 0 },
12441 { MASK_SSE2
, CODE_FOR_sse2_punpcklqdq
, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128
, 0, 0 },
12443 { MASK_SSE2
, CODE_FOR_sse2_packsswb
, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128
, 0, 0 },
12444 { MASK_SSE2
, CODE_FOR_sse2_packssdw
, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128
, 0, 0 },
12445 { MASK_SSE2
, CODE_FOR_sse2_packuswb
, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128
, 0, 0 },
12447 { MASK_SSE2
, CODE_FOR_umulv8hi3_highpart
, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128
, 0, 0 },
12448 { MASK_SSE2
, CODE_FOR_sse2_psadbw
, 0, IX86_BUILTIN_PSADBW128
, 0, 0 },
12450 { MASK_SSE2
, CODE_FOR_ashlv8hi3_ti
, 0, IX86_BUILTIN_PSLLW128
, 0, 0 },
12451 { MASK_SSE2
, CODE_FOR_ashlv8hi3
, 0, IX86_BUILTIN_PSLLWI128
, 0, 0 },
12452 { MASK_SSE2
, CODE_FOR_ashlv4si3_ti
, 0, IX86_BUILTIN_PSLLD128
, 0, 0 },
12453 { MASK_SSE2
, CODE_FOR_ashlv4si3
, 0, IX86_BUILTIN_PSLLDI128
, 0, 0 },
12454 { MASK_SSE2
, CODE_FOR_ashlv2di3_ti
, 0, IX86_BUILTIN_PSLLQ128
, 0, 0 },
12455 { MASK_SSE2
, CODE_FOR_ashlv2di3
, 0, IX86_BUILTIN_PSLLQI128
, 0, 0 },
12457 { MASK_SSE2
, CODE_FOR_lshrv8hi3_ti
, 0, IX86_BUILTIN_PSRLW128
, 0, 0 },
12458 { MASK_SSE2
, CODE_FOR_lshrv8hi3
, 0, IX86_BUILTIN_PSRLWI128
, 0, 0 },
12459 { MASK_SSE2
, CODE_FOR_lshrv4si3_ti
, 0, IX86_BUILTIN_PSRLD128
, 0, 0 },
12460 { MASK_SSE2
, CODE_FOR_lshrv4si3
, 0, IX86_BUILTIN_PSRLDI128
, 0, 0 },
12461 { MASK_SSE2
, CODE_FOR_lshrv2di3_ti
, 0, IX86_BUILTIN_PSRLQ128
, 0, 0 },
12462 { MASK_SSE2
, CODE_FOR_lshrv2di3
, 0, IX86_BUILTIN_PSRLQI128
, 0, 0 },
12464 { MASK_SSE2
, CODE_FOR_ashrv8hi3_ti
, 0, IX86_BUILTIN_PSRAW128
, 0, 0 },
12465 { MASK_SSE2
, CODE_FOR_ashrv8hi3
, 0, IX86_BUILTIN_PSRAWI128
, 0, 0 },
12466 { MASK_SSE2
, CODE_FOR_ashrv4si3_ti
, 0, IX86_BUILTIN_PSRAD128
, 0, 0 },
12467 { MASK_SSE2
, CODE_FOR_ashrv4si3
, 0, IX86_BUILTIN_PSRADI128
, 0, 0 },
12469 { MASK_SSE2
, CODE_FOR_sse2_pmaddwd
, 0, IX86_BUILTIN_PMADDWD128
, 0, 0 },
12471 { MASK_SSE2
, CODE_FOR_cvtsi2sd
, 0, IX86_BUILTIN_CVTSI2SD
, 0, 0 },
12472 { MASK_SSE264
, CODE_FOR_cvtsi2sdq
, 0, IX86_BUILTIN_CVTSI642SD
, 0, 0 },
12473 { MASK_SSE2
, CODE_FOR_cvtsd2ss
, 0, IX86_BUILTIN_CVTSD2SS
, 0, 0 },
12474 { MASK_SSE2
, CODE_FOR_cvtss2sd
, 0, IX86_BUILTIN_CVTSS2SD
, 0, 0 }
12477 static const struct builtin_description bdesc_1arg
[] =
12479 { MASK_SSE1
| MASK_3DNOW_A
, CODE_FOR_mmx_pmovmskb
, 0, IX86_BUILTIN_PMOVMSKB
, 0, 0 },
12480 { MASK_SSE1
, CODE_FOR_sse_movmskps
, 0, IX86_BUILTIN_MOVMSKPS
, 0, 0 },
12482 { MASK_SSE1
, CODE_FOR_sqrtv4sf2
, 0, IX86_BUILTIN_SQRTPS
, 0, 0 },
12483 { MASK_SSE1
, CODE_FOR_rsqrtv4sf2
, 0, IX86_BUILTIN_RSQRTPS
, 0, 0 },
12484 { MASK_SSE1
, CODE_FOR_rcpv4sf2
, 0, IX86_BUILTIN_RCPPS
, 0, 0 },
12486 { MASK_SSE1
, CODE_FOR_cvtps2pi
, 0, IX86_BUILTIN_CVTPS2PI
, 0, 0 },
12487 { MASK_SSE1
, CODE_FOR_cvtss2si
, 0, IX86_BUILTIN_CVTSS2SI
, 0, 0 },
12488 { MASK_SSE164
, CODE_FOR_cvtss2siq
, 0, IX86_BUILTIN_CVTSS2SI64
, 0, 0 },
12489 { MASK_SSE1
, CODE_FOR_cvttps2pi
, 0, IX86_BUILTIN_CVTTPS2PI
, 0, 0 },
12490 { MASK_SSE1
, CODE_FOR_cvttss2si
, 0, IX86_BUILTIN_CVTTSS2SI
, 0, 0 },
12491 { MASK_SSE164
, CODE_FOR_cvttss2siq
, 0, IX86_BUILTIN_CVTTSS2SI64
, 0, 0 },
12493 { MASK_SSE2
, CODE_FOR_sse2_pmovmskb
, 0, IX86_BUILTIN_PMOVMSKB128
, 0, 0 },
12494 { MASK_SSE2
, CODE_FOR_sse2_movmskpd
, 0, IX86_BUILTIN_MOVMSKPD
, 0, 0 },
12495 { MASK_SSE2
, CODE_FOR_sse2_movq2dq
, 0, IX86_BUILTIN_MOVQ2DQ
, 0, 0 },
12496 { MASK_SSE2
, CODE_FOR_sse2_movdq2q
, 0, IX86_BUILTIN_MOVDQ2Q
, 0, 0 },
12498 { MASK_SSE2
, CODE_FOR_sqrtv2df2
, 0, IX86_BUILTIN_SQRTPD
, 0, 0 },
12500 { MASK_SSE2
, CODE_FOR_cvtdq2pd
, 0, IX86_BUILTIN_CVTDQ2PD
, 0, 0 },
12501 { MASK_SSE2
, CODE_FOR_cvtdq2ps
, 0, IX86_BUILTIN_CVTDQ2PS
, 0, 0 },
12503 { MASK_SSE2
, CODE_FOR_cvtpd2dq
, 0, IX86_BUILTIN_CVTPD2DQ
, 0, 0 },
12504 { MASK_SSE2
, CODE_FOR_cvtpd2pi
, 0, IX86_BUILTIN_CVTPD2PI
, 0, 0 },
12505 { MASK_SSE2
, CODE_FOR_cvtpd2ps
, 0, IX86_BUILTIN_CVTPD2PS
, 0, 0 },
12506 { MASK_SSE2
, CODE_FOR_cvttpd2dq
, 0, IX86_BUILTIN_CVTTPD2DQ
, 0, 0 },
12507 { MASK_SSE2
, CODE_FOR_cvttpd2pi
, 0, IX86_BUILTIN_CVTTPD2PI
, 0, 0 },
12509 { MASK_SSE2
, CODE_FOR_cvtpi2pd
, 0, IX86_BUILTIN_CVTPI2PD
, 0, 0 },
12511 { MASK_SSE2
, CODE_FOR_cvtsd2si
, 0, IX86_BUILTIN_CVTSD2SI
, 0, 0 },
12512 { MASK_SSE2
, CODE_FOR_cvttsd2si
, 0, IX86_BUILTIN_CVTTSD2SI
, 0, 0 },
12513 { MASK_SSE264
, CODE_FOR_cvtsd2siq
, 0, IX86_BUILTIN_CVTSD2SI64
, 0, 0 },
12514 { MASK_SSE264
, CODE_FOR_cvttsd2siq
, 0, IX86_BUILTIN_CVTTSD2SI64
, 0, 0 },
12516 { MASK_SSE2
, CODE_FOR_cvtps2dq
, 0, IX86_BUILTIN_CVTPS2DQ
, 0, 0 },
12517 { MASK_SSE2
, CODE_FOR_cvtps2pd
, 0, IX86_BUILTIN_CVTPS2PD
, 0, 0 },
12518 { MASK_SSE2
, CODE_FOR_cvttps2dq
, 0, IX86_BUILTIN_CVTTPS2DQ
, 0, 0 },
12520 { MASK_SSE2
, CODE_FOR_sse2_movq
, 0, IX86_BUILTIN_MOVQ
, 0, 0 }
12524 ix86_init_builtins ()
12527 ix86_init_mmx_sse_builtins ();
12530 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
12531 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
12534 ix86_init_mmx_sse_builtins ()
12536 const struct builtin_description
* d
;
12539 tree pchar_type_node
= build_pointer_type (char_type_node
);
12540 tree pcchar_type_node
= build_pointer_type (
12541 build_type_variant (char_type_node
, 1, 0));
12542 tree pfloat_type_node
= build_pointer_type (float_type_node
);
12543 tree pcfloat_type_node
= build_pointer_type (
12544 build_type_variant (float_type_node
, 1, 0));
12545 tree pv2si_type_node
= build_pointer_type (V2SI_type_node
);
12546 tree pv2di_type_node
= build_pointer_type (V2DI_type_node
);
12547 tree pdi_type_node
= build_pointer_type (long_long_unsigned_type_node
);
12550 tree int_ftype_v4sf_v4sf
12551 = build_function_type_list (integer_type_node
,
12552 V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
12553 tree v4si_ftype_v4sf_v4sf
12554 = build_function_type_list (V4SI_type_node
,
12555 V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
12556 /* MMX/SSE/integer conversions. */
12557 tree int_ftype_v4sf
12558 = build_function_type_list (integer_type_node
,
12559 V4SF_type_node
, NULL_TREE
);
12560 tree int64_ftype_v4sf
12561 = build_function_type_list (long_long_integer_type_node
,
12562 V4SF_type_node
, NULL_TREE
);
12563 tree int_ftype_v8qi
12564 = build_function_type_list (integer_type_node
, V8QI_type_node
, NULL_TREE
);
12565 tree v4sf_ftype_v4sf_int
12566 = build_function_type_list (V4SF_type_node
,
12567 V4SF_type_node
, integer_type_node
, NULL_TREE
);
12568 tree v4sf_ftype_v4sf_int64
12569 = build_function_type_list (V4SF_type_node
,
12570 V4SF_type_node
, long_long_integer_type_node
,
12572 tree v4sf_ftype_v4sf_v2si
12573 = build_function_type_list (V4SF_type_node
,
12574 V4SF_type_node
, V2SI_type_node
, NULL_TREE
);
12575 tree int_ftype_v4hi_int
12576 = build_function_type_list (integer_type_node
,
12577 V4HI_type_node
, integer_type_node
, NULL_TREE
);
12578 tree v4hi_ftype_v4hi_int_int
12579 = build_function_type_list (V4HI_type_node
, V4HI_type_node
,
12580 integer_type_node
, integer_type_node
,
12582 /* Miscellaneous. */
12583 tree v8qi_ftype_v4hi_v4hi
12584 = build_function_type_list (V8QI_type_node
,
12585 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
12586 tree v4hi_ftype_v2si_v2si
12587 = build_function_type_list (V4HI_type_node
,
12588 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
12589 tree v4sf_ftype_v4sf_v4sf_int
12590 = build_function_type_list (V4SF_type_node
,
12591 V4SF_type_node
, V4SF_type_node
,
12592 integer_type_node
, NULL_TREE
);
12593 tree v2si_ftype_v4hi_v4hi
12594 = build_function_type_list (V2SI_type_node
,
12595 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
12596 tree v4hi_ftype_v4hi_int
12597 = build_function_type_list (V4HI_type_node
,
12598 V4HI_type_node
, integer_type_node
, NULL_TREE
);
12599 tree v4hi_ftype_v4hi_di
12600 = build_function_type_list (V4HI_type_node
,
12601 V4HI_type_node
, long_long_unsigned_type_node
,
12603 tree v2si_ftype_v2si_di
12604 = build_function_type_list (V2SI_type_node
,
12605 V2SI_type_node
, long_long_unsigned_type_node
,
12607 tree void_ftype_void
12608 = build_function_type (void_type_node
, void_list_node
);
12609 tree void_ftype_unsigned
12610 = build_function_type_list (void_type_node
, unsigned_type_node
, NULL_TREE
);
12611 tree unsigned_ftype_void
12612 = build_function_type (unsigned_type_node
, void_list_node
);
12614 = build_function_type (long_long_unsigned_type_node
, void_list_node
);
12615 tree v4sf_ftype_void
12616 = build_function_type (V4SF_type_node
, void_list_node
);
12617 tree v2si_ftype_v4sf
12618 = build_function_type_list (V2SI_type_node
, V4SF_type_node
, NULL_TREE
);
12619 /* Loads/stores. */
12620 tree void_ftype_v8qi_v8qi_pchar
12621 = build_function_type_list (void_type_node
,
12622 V8QI_type_node
, V8QI_type_node
,
12623 pchar_type_node
, NULL_TREE
);
12624 tree v4sf_ftype_pcfloat
12625 = build_function_type_list (V4SF_type_node
, pcfloat_type_node
, NULL_TREE
);
12626 /* @@@ the type is bogus */
12627 tree v4sf_ftype_v4sf_pv2si
12628 = build_function_type_list (V4SF_type_node
,
12629 V4SF_type_node
, pv2si_type_node
, NULL_TREE
);
12630 tree void_ftype_pv2si_v4sf
12631 = build_function_type_list (void_type_node
,
12632 pv2si_type_node
, V4SF_type_node
, NULL_TREE
);
12633 tree void_ftype_pfloat_v4sf
12634 = build_function_type_list (void_type_node
,
12635 pfloat_type_node
, V4SF_type_node
, NULL_TREE
);
12636 tree void_ftype_pdi_di
12637 = build_function_type_list (void_type_node
,
12638 pdi_type_node
, long_long_unsigned_type_node
,
12640 tree void_ftype_pv2di_v2di
12641 = build_function_type_list (void_type_node
,
12642 pv2di_type_node
, V2DI_type_node
, NULL_TREE
);
12643 /* Normal vector unops. */
12644 tree v4sf_ftype_v4sf
12645 = build_function_type_list (V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
12647 /* Normal vector binops. */
12648 tree v4sf_ftype_v4sf_v4sf
12649 = build_function_type_list (V4SF_type_node
,
12650 V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
12651 tree v8qi_ftype_v8qi_v8qi
12652 = build_function_type_list (V8QI_type_node
,
12653 V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
12654 tree v4hi_ftype_v4hi_v4hi
12655 = build_function_type_list (V4HI_type_node
,
12656 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
12657 tree v2si_ftype_v2si_v2si
12658 = build_function_type_list (V2SI_type_node
,
12659 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
12660 tree di_ftype_di_di
12661 = build_function_type_list (long_long_unsigned_type_node
,
12662 long_long_unsigned_type_node
,
12663 long_long_unsigned_type_node
, NULL_TREE
);
12665 tree v2si_ftype_v2sf
12666 = build_function_type_list (V2SI_type_node
, V2SF_type_node
, NULL_TREE
);
12667 tree v2sf_ftype_v2si
12668 = build_function_type_list (V2SF_type_node
, V2SI_type_node
, NULL_TREE
);
12669 tree v2si_ftype_v2si
12670 = build_function_type_list (V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
12671 tree v2sf_ftype_v2sf
12672 = build_function_type_list (V2SF_type_node
, V2SF_type_node
, NULL_TREE
);
12673 tree v2sf_ftype_v2sf_v2sf
12674 = build_function_type_list (V2SF_type_node
,
12675 V2SF_type_node
, V2SF_type_node
, NULL_TREE
);
12676 tree v2si_ftype_v2sf_v2sf
12677 = build_function_type_list (V2SI_type_node
,
12678 V2SF_type_node
, V2SF_type_node
, NULL_TREE
);
12679 tree pint_type_node
= build_pointer_type (integer_type_node
);
12680 tree pcint_type_node
= build_pointer_type (
12681 build_type_variant (integer_type_node
, 1, 0));
12682 tree pdouble_type_node
= build_pointer_type (double_type_node
);
12683 tree pcdouble_type_node
= build_pointer_type (
12684 build_type_variant (double_type_node
, 1, 0));
12685 tree int_ftype_v2df_v2df
12686 = build_function_type_list (integer_type_node
,
12687 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
12690 = build_function_type (intTI_type_node
, void_list_node
);
12691 tree v2di_ftype_void
12692 = build_function_type (V2DI_type_node
, void_list_node
);
12693 tree ti_ftype_ti_ti
12694 = build_function_type_list (intTI_type_node
,
12695 intTI_type_node
, intTI_type_node
, NULL_TREE
);
12696 tree void_ftype_pcvoid
12697 = build_function_type_list (void_type_node
, const_ptr_type_node
, NULL_TREE
);
12699 = build_function_type_list (V2DI_type_node
,
12700 long_long_unsigned_type_node
, NULL_TREE
);
12702 = build_function_type_list (long_long_unsigned_type_node
,
12703 V2DI_type_node
, NULL_TREE
);
12704 tree v4sf_ftype_v4si
12705 = build_function_type_list (V4SF_type_node
, V4SI_type_node
, NULL_TREE
);
12706 tree v4si_ftype_v4sf
12707 = build_function_type_list (V4SI_type_node
, V4SF_type_node
, NULL_TREE
);
12708 tree v2df_ftype_v4si
12709 = build_function_type_list (V2DF_type_node
, V4SI_type_node
, NULL_TREE
);
12710 tree v4si_ftype_v2df
12711 = build_function_type_list (V4SI_type_node
, V2DF_type_node
, NULL_TREE
);
12712 tree v2si_ftype_v2df
12713 = build_function_type_list (V2SI_type_node
, V2DF_type_node
, NULL_TREE
);
12714 tree v4sf_ftype_v2df
12715 = build_function_type_list (V4SF_type_node
, V2DF_type_node
, NULL_TREE
);
12716 tree v2df_ftype_v2si
12717 = build_function_type_list (V2DF_type_node
, V2SI_type_node
, NULL_TREE
);
12718 tree v2df_ftype_v4sf
12719 = build_function_type_list (V2DF_type_node
, V4SF_type_node
, NULL_TREE
);
12720 tree int_ftype_v2df
12721 = build_function_type_list (integer_type_node
, V2DF_type_node
, NULL_TREE
);
12722 tree int64_ftype_v2df
12723 = build_function_type_list (long_long_integer_type_node
,
12724 V2DF_type_node
, NULL_TREE
);
12725 tree v2df_ftype_v2df_int
12726 = build_function_type_list (V2DF_type_node
,
12727 V2DF_type_node
, integer_type_node
, NULL_TREE
);
12728 tree v2df_ftype_v2df_int64
12729 = build_function_type_list (V2DF_type_node
,
12730 V2DF_type_node
, long_long_integer_type_node
,
12732 tree v4sf_ftype_v4sf_v2df
12733 = build_function_type_list (V4SF_type_node
,
12734 V4SF_type_node
, V2DF_type_node
, NULL_TREE
);
12735 tree v2df_ftype_v2df_v4sf
12736 = build_function_type_list (V2DF_type_node
,
12737 V2DF_type_node
, V4SF_type_node
, NULL_TREE
);
12738 tree v2df_ftype_v2df_v2df_int
12739 = build_function_type_list (V2DF_type_node
,
12740 V2DF_type_node
, V2DF_type_node
,
12743 tree v2df_ftype_v2df_pv2si
12744 = build_function_type_list (V2DF_type_node
,
12745 V2DF_type_node
, pv2si_type_node
, NULL_TREE
);
12746 tree void_ftype_pv2si_v2df
12747 = build_function_type_list (void_type_node
,
12748 pv2si_type_node
, V2DF_type_node
, NULL_TREE
);
12749 tree void_ftype_pdouble_v2df
12750 = build_function_type_list (void_type_node
,
12751 pdouble_type_node
, V2DF_type_node
, NULL_TREE
);
12752 tree void_ftype_pint_int
12753 = build_function_type_list (void_type_node
,
12754 pint_type_node
, integer_type_node
, NULL_TREE
);
12755 tree void_ftype_v16qi_v16qi_pchar
12756 = build_function_type_list (void_type_node
,
12757 V16QI_type_node
, V16QI_type_node
,
12758 pchar_type_node
, NULL_TREE
);
12759 tree v2df_ftype_pcdouble
12760 = build_function_type_list (V2DF_type_node
, pcdouble_type_node
, NULL_TREE
);
12761 tree v2df_ftype_v2df_v2df
12762 = build_function_type_list (V2DF_type_node
,
12763 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
12764 tree v16qi_ftype_v16qi_v16qi
12765 = build_function_type_list (V16QI_type_node
,
12766 V16QI_type_node
, V16QI_type_node
, NULL_TREE
);
12767 tree v8hi_ftype_v8hi_v8hi
12768 = build_function_type_list (V8HI_type_node
,
12769 V8HI_type_node
, V8HI_type_node
, NULL_TREE
);
12770 tree v4si_ftype_v4si_v4si
12771 = build_function_type_list (V4SI_type_node
,
12772 V4SI_type_node
, V4SI_type_node
, NULL_TREE
);
12773 tree v2di_ftype_v2di_v2di
12774 = build_function_type_list (V2DI_type_node
,
12775 V2DI_type_node
, V2DI_type_node
, NULL_TREE
);
12776 tree v2di_ftype_v2df_v2df
12777 = build_function_type_list (V2DI_type_node
,
12778 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
12779 tree v2df_ftype_v2df
12780 = build_function_type_list (V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
12781 tree v2df_ftype_double
12782 = build_function_type_list (V2DF_type_node
, double_type_node
, NULL_TREE
);
12783 tree v2df_ftype_double_double
12784 = build_function_type_list (V2DF_type_node
,
12785 double_type_node
, double_type_node
, NULL_TREE
);
12786 tree int_ftype_v8hi_int
12787 = build_function_type_list (integer_type_node
,
12788 V8HI_type_node
, integer_type_node
, NULL_TREE
);
12789 tree v8hi_ftype_v8hi_int_int
12790 = build_function_type_list (V8HI_type_node
,
12791 V8HI_type_node
, integer_type_node
,
12792 integer_type_node
, NULL_TREE
);
12793 tree v2di_ftype_v2di_int
12794 = build_function_type_list (V2DI_type_node
,
12795 V2DI_type_node
, integer_type_node
, NULL_TREE
);
12796 tree v4si_ftype_v4si_int
12797 = build_function_type_list (V4SI_type_node
,
12798 V4SI_type_node
, integer_type_node
, NULL_TREE
);
12799 tree v8hi_ftype_v8hi_int
12800 = build_function_type_list (V8HI_type_node
,
12801 V8HI_type_node
, integer_type_node
, NULL_TREE
);
12802 tree v8hi_ftype_v8hi_v2di
12803 = build_function_type_list (V8HI_type_node
,
12804 V8HI_type_node
, V2DI_type_node
, NULL_TREE
);
12805 tree v4si_ftype_v4si_v2di
12806 = build_function_type_list (V4SI_type_node
,
12807 V4SI_type_node
, V2DI_type_node
, NULL_TREE
);
12808 tree v4si_ftype_v8hi_v8hi
12809 = build_function_type_list (V4SI_type_node
,
12810 V8HI_type_node
, V8HI_type_node
, NULL_TREE
);
12811 tree di_ftype_v8qi_v8qi
12812 = build_function_type_list (long_long_unsigned_type_node
,
12813 V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
12814 tree v2di_ftype_v16qi_v16qi
12815 = build_function_type_list (V2DI_type_node
,
12816 V16QI_type_node
, V16QI_type_node
, NULL_TREE
);
12817 tree int_ftype_v16qi
12818 = build_function_type_list (integer_type_node
, V16QI_type_node
, NULL_TREE
);
12819 tree v16qi_ftype_pcchar
12820 = build_function_type_list (V16QI_type_node
, pcchar_type_node
, NULL_TREE
);
12821 tree void_ftype_pchar_v16qi
12822 = build_function_type_list (void_type_node
,
12823 pchar_type_node
, V16QI_type_node
, NULL_TREE
);
12824 tree v4si_ftype_pcint
12825 = build_function_type_list (V4SI_type_node
, pcint_type_node
, NULL_TREE
);
12826 tree void_ftype_pcint_v4si
12827 = build_function_type_list (void_type_node
,
12828 pcint_type_node
, V4SI_type_node
, NULL_TREE
);
12829 tree v2di_ftype_v2di
12830 = build_function_type_list (V2DI_type_node
, V2DI_type_node
, NULL_TREE
);
12832 /* Add all builtins that are more or less simple operations on two
12834 for (i
= 0, d
= bdesc_2arg
; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
12836 /* Use one of the operands; the target can have a different mode for
12837 mask-generating compares. */
12838 enum machine_mode mode
;
12843 mode
= insn_data
[d
->icode
].operand
[1].mode
;
12848 type
= v16qi_ftype_v16qi_v16qi
;
12851 type
= v8hi_ftype_v8hi_v8hi
;
12854 type
= v4si_ftype_v4si_v4si
;
12857 type
= v2di_ftype_v2di_v2di
;
12860 type
= v2df_ftype_v2df_v2df
;
12863 type
= ti_ftype_ti_ti
;
12866 type
= v4sf_ftype_v4sf_v4sf
;
12869 type
= v8qi_ftype_v8qi_v8qi
;
12872 type
= v4hi_ftype_v4hi_v4hi
;
12875 type
= v2si_ftype_v2si_v2si
;
12878 type
= di_ftype_di_di
;
12885 /* Override for comparisons. */
12886 if (d
->icode
== CODE_FOR_maskcmpv4sf3
12887 || d
->icode
== CODE_FOR_maskncmpv4sf3
12888 || d
->icode
== CODE_FOR_vmmaskcmpv4sf3
12889 || d
->icode
== CODE_FOR_vmmaskncmpv4sf3
)
12890 type
= v4si_ftype_v4sf_v4sf
;
12892 if (d
->icode
== CODE_FOR_maskcmpv2df3
12893 || d
->icode
== CODE_FOR_maskncmpv2df3
12894 || d
->icode
== CODE_FOR_vmmaskcmpv2df3
12895 || d
->icode
== CODE_FOR_vmmaskncmpv2df3
)
12896 type
= v2di_ftype_v2df_v2df
;
12898 def_builtin (d
->mask
, d
->name
, type
, d
->code
);
12901 /* Add the remaining MMX insns with somewhat more complicated types. */
12902 def_builtin (MASK_MMX
, "__builtin_ia32_mmx_zero", di_ftype_void
, IX86_BUILTIN_MMX_ZERO
);
12903 def_builtin (MASK_MMX
, "__builtin_ia32_emms", void_ftype_void
, IX86_BUILTIN_EMMS
);
12904 def_builtin (MASK_MMX
, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSLLW
);
12905 def_builtin (MASK_MMX
, "__builtin_ia32_pslld", v2si_ftype_v2si_di
, IX86_BUILTIN_PSLLD
);
12906 def_builtin (MASK_MMX
, "__builtin_ia32_psllq", di_ftype_di_di
, IX86_BUILTIN_PSLLQ
);
12908 def_builtin (MASK_MMX
, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSRLW
);
12909 def_builtin (MASK_MMX
, "__builtin_ia32_psrld", v2si_ftype_v2si_di
, IX86_BUILTIN_PSRLD
);
12910 def_builtin (MASK_MMX
, "__builtin_ia32_psrlq", di_ftype_di_di
, IX86_BUILTIN_PSRLQ
);
12912 def_builtin (MASK_MMX
, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSRAW
);
12913 def_builtin (MASK_MMX
, "__builtin_ia32_psrad", v2si_ftype_v2si_di
, IX86_BUILTIN_PSRAD
);
12915 def_builtin (MASK_MMX
, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int
, IX86_BUILTIN_PSHUFW
);
12916 def_builtin (MASK_MMX
, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi
, IX86_BUILTIN_PMADDWD
);
12918 /* comi/ucomi insns. */
12919 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
12920 if (d
->mask
== MASK_SSE2
)
12921 def_builtin (d
->mask
, d
->name
, int_ftype_v2df_v2df
, d
->code
);
12923 def_builtin (d
->mask
, d
->name
, int_ftype_v4sf_v4sf
, d
->code
);
12925 def_builtin (MASK_MMX
, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi
, IX86_BUILTIN_PACKSSWB
);
12926 def_builtin (MASK_MMX
, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si
, IX86_BUILTIN_PACKSSDW
);
12927 def_builtin (MASK_MMX
, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi
, IX86_BUILTIN_PACKUSWB
);
12929 def_builtin (MASK_SSE1
, "__builtin_ia32_ldmxcsr", void_ftype_unsigned
, IX86_BUILTIN_LDMXCSR
);
12930 def_builtin (MASK_SSE1
, "__builtin_ia32_stmxcsr", unsigned_ftype_void
, IX86_BUILTIN_STMXCSR
);
12931 def_builtin (MASK_SSE1
, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si
, IX86_BUILTIN_CVTPI2PS
);
12932 def_builtin (MASK_SSE1
, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf
, IX86_BUILTIN_CVTPS2PI
);
12933 def_builtin (MASK_SSE1
, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int
, IX86_BUILTIN_CVTSI2SS
);
12934 def_builtin (MASK_SSE164
, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64
, IX86_BUILTIN_CVTSI642SS
);
12935 def_builtin (MASK_SSE1
, "__builtin_ia32_cvtss2si", int_ftype_v4sf
, IX86_BUILTIN_CVTSS2SI
);
12936 def_builtin (MASK_SSE164
, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf
, IX86_BUILTIN_CVTSS2SI64
);
12937 def_builtin (MASK_SSE1
, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf
, IX86_BUILTIN_CVTTPS2PI
);
12938 def_builtin (MASK_SSE1
, "__builtin_ia32_cvttss2si", int_ftype_v4sf
, IX86_BUILTIN_CVTTSS2SI
);
12939 def_builtin (MASK_SSE164
, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf
, IX86_BUILTIN_CVTTSS2SI64
);
12941 def_builtin (MASK_SSE1
| MASK_3DNOW_A
, "__builtin_ia32_pextrw", int_ftype_v4hi_int
, IX86_BUILTIN_PEXTRW
);
12942 def_builtin (MASK_SSE1
| MASK_3DNOW_A
, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int
, IX86_BUILTIN_PINSRW
);
12944 def_builtin (MASK_SSE1
| MASK_3DNOW_A
, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar
, IX86_BUILTIN_MASKMOVQ
);
12946 def_builtin (MASK_SSE1
, "__builtin_ia32_loadaps", v4sf_ftype_pcfloat
, IX86_BUILTIN_LOADAPS
);
12947 def_builtin (MASK_SSE1
, "__builtin_ia32_loadups", v4sf_ftype_pcfloat
, IX86_BUILTIN_LOADUPS
);
12948 def_builtin (MASK_SSE1
, "__builtin_ia32_loadss", v4sf_ftype_pcfloat
, IX86_BUILTIN_LOADSS
);
12949 def_builtin (MASK_SSE1
, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf
, IX86_BUILTIN_STOREAPS
);
12950 def_builtin (MASK_SSE1
, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf
, IX86_BUILTIN_STOREUPS
);
12951 def_builtin (MASK_SSE1
, "__builtin_ia32_storess", void_ftype_pfloat_v4sf
, IX86_BUILTIN_STORESS
);
12953 def_builtin (MASK_SSE1
, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si
, IX86_BUILTIN_LOADHPS
);
12954 def_builtin (MASK_SSE1
, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si
, IX86_BUILTIN_LOADLPS
);
12955 def_builtin (MASK_SSE1
, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf
, IX86_BUILTIN_STOREHPS
);
12956 def_builtin (MASK_SSE1
, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf
, IX86_BUILTIN_STORELPS
);
12958 def_builtin (MASK_SSE1
, "__builtin_ia32_movmskps", int_ftype_v4sf
, IX86_BUILTIN_MOVMSKPS
);
12959 def_builtin (MASK_SSE1
| MASK_3DNOW_A
, "__builtin_ia32_pmovmskb", int_ftype_v8qi
, IX86_BUILTIN_PMOVMSKB
);
12960 def_builtin (MASK_SSE1
, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf
, IX86_BUILTIN_MOVNTPS
);
12961 def_builtin (MASK_SSE1
| MASK_3DNOW_A
, "__builtin_ia32_movntq", void_ftype_pdi_di
, IX86_BUILTIN_MOVNTQ
);
12963 def_builtin (MASK_SSE1
| MASK_3DNOW_A
, "__builtin_ia32_sfence", void_ftype_void
, IX86_BUILTIN_SFENCE
);
12965 def_builtin (MASK_SSE1
| MASK_3DNOW_A
, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi
, IX86_BUILTIN_PSADBW
);
12967 def_builtin (MASK_SSE1
, "__builtin_ia32_rcpps", v4sf_ftype_v4sf
, IX86_BUILTIN_RCPPS
);
12968 def_builtin (MASK_SSE1
, "__builtin_ia32_rcpss", v4sf_ftype_v4sf
, IX86_BUILTIN_RCPSS
);
12969 def_builtin (MASK_SSE1
, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf
, IX86_BUILTIN_RSQRTPS
);
12970 def_builtin (MASK_SSE1
, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf
, IX86_BUILTIN_RSQRTSS
);
12971 def_builtin (MASK_SSE1
, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf
, IX86_BUILTIN_SQRTPS
);
12972 def_builtin (MASK_SSE1
, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf
, IX86_BUILTIN_SQRTSS
);
12974 def_builtin (MASK_SSE1
, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int
, IX86_BUILTIN_SHUFPS
);
12976 /* Original 3DNow! */
12977 def_builtin (MASK_3DNOW
, "__builtin_ia32_femms", void_ftype_void
, IX86_BUILTIN_FEMMS
);
12978 def_builtin (MASK_3DNOW
, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi
, IX86_BUILTIN_PAVGUSB
);
12979 def_builtin (MASK_3DNOW
, "__builtin_ia32_pf2id", v2si_ftype_v2sf
, IX86_BUILTIN_PF2ID
);
12980 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFACC
);
12981 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFADD
);
12982 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPEQ
);
12983 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPGE
);
12984 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPGT
);
12985 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMAX
);
12986 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMIN
);
12987 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMUL
);
12988 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf
, IX86_BUILTIN_PFRCP
);
12989 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRCPIT1
);
12990 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRCPIT2
);
12991 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf
, IX86_BUILTIN_PFRSQRT
);
12992 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRSQIT1
);
12993 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFSUB
);
12994 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFSUBR
);
12995 def_builtin (MASK_3DNOW
, "__builtin_ia32_pi2fd", v2sf_ftype_v2si
, IX86_BUILTIN_PI2FD
);
12996 def_builtin (MASK_3DNOW
, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi
, IX86_BUILTIN_PMULHRW
);
12998 /* 3DNow! extension as used in the Athlon CPU. */
12999 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pf2iw", v2si_ftype_v2sf
, IX86_BUILTIN_PF2IW
);
13000 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFNACC
);
13001 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFPNACC
);
13002 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pi2fw", v2sf_ftype_v2si
, IX86_BUILTIN_PI2FW
);
13003 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf
, IX86_BUILTIN_PSWAPDSF
);
13004 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pswapdsi", v2si_ftype_v2si
, IX86_BUILTIN_PSWAPDSI
);
13006 def_builtin (MASK_SSE1
, "__builtin_ia32_setzerops", v4sf_ftype_void
, IX86_BUILTIN_SSE_ZERO
);
13009 def_builtin (MASK_SSE2
, "__builtin_ia32_pextrw128", int_ftype_v8hi_int
, IX86_BUILTIN_PEXTRW128
);
13010 def_builtin (MASK_SSE2
, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int
, IX86_BUILTIN_PINSRW128
);
13012 def_builtin (MASK_SSE2
, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar
, IX86_BUILTIN_MASKMOVDQU
);
13013 def_builtin (MASK_SSE2
, "__builtin_ia32_movq2dq", v2di_ftype_di
, IX86_BUILTIN_MOVQ2DQ
);
13014 def_builtin (MASK_SSE2
, "__builtin_ia32_movdq2q", di_ftype_v2di
, IX86_BUILTIN_MOVDQ2Q
);
13016 def_builtin (MASK_SSE2
, "__builtin_ia32_loadapd", v2df_ftype_pcdouble
, IX86_BUILTIN_LOADAPD
);
13017 def_builtin (MASK_SSE2
, "__builtin_ia32_loadupd", v2df_ftype_pcdouble
, IX86_BUILTIN_LOADUPD
);
13018 def_builtin (MASK_SSE2
, "__builtin_ia32_loadsd", v2df_ftype_pcdouble
, IX86_BUILTIN_LOADSD
);
13019 def_builtin (MASK_SSE2
, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df
, IX86_BUILTIN_STOREAPD
);
13020 def_builtin (MASK_SSE2
, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df
, IX86_BUILTIN_STOREUPD
);
13021 def_builtin (MASK_SSE2
, "__builtin_ia32_storesd", void_ftype_pdouble_v2df
, IX86_BUILTIN_STORESD
);
13023 def_builtin (MASK_SSE2
, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si
, IX86_BUILTIN_LOADHPD
);
13024 def_builtin (MASK_SSE2
, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si
, IX86_BUILTIN_LOADLPD
);
13025 def_builtin (MASK_SSE2
, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df
, IX86_BUILTIN_STOREHPD
);
13026 def_builtin (MASK_SSE2
, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df
, IX86_BUILTIN_STORELPD
);
13028 def_builtin (MASK_SSE2
, "__builtin_ia32_movmskpd", int_ftype_v2df
, IX86_BUILTIN_MOVMSKPD
);
13029 def_builtin (MASK_SSE2
, "__builtin_ia32_pmovmskb128", int_ftype_v16qi
, IX86_BUILTIN_PMOVMSKB128
);
13030 def_builtin (MASK_SSE2
, "__builtin_ia32_movnti", void_ftype_pint_int
, IX86_BUILTIN_MOVNTI
);
13031 def_builtin (MASK_SSE2
, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df
, IX86_BUILTIN_MOVNTPD
);
13032 def_builtin (MASK_SSE2
, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di
, IX86_BUILTIN_MOVNTDQ
);
13034 def_builtin (MASK_SSE2
, "__builtin_ia32_pshufd", v4si_ftype_v4si_int
, IX86_BUILTIN_PSHUFD
);
13035 def_builtin (MASK_SSE2
, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSHUFLW
);
13036 def_builtin (MASK_SSE2
, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSHUFHW
);
13037 def_builtin (MASK_SSE2
, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi
, IX86_BUILTIN_PSADBW128
);
13039 def_builtin (MASK_SSE2
, "__builtin_ia32_sqrtpd", v2df_ftype_v2df
, IX86_BUILTIN_SQRTPD
);
13040 def_builtin (MASK_SSE2
, "__builtin_ia32_sqrtsd", v2df_ftype_v2df
, IX86_BUILTIN_SQRTSD
);
13042 def_builtin (MASK_SSE2
, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int
, IX86_BUILTIN_SHUFPD
);
13044 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si
, IX86_BUILTIN_CVTDQ2PD
);
13045 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si
, IX86_BUILTIN_CVTDQ2PS
);
13047 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df
, IX86_BUILTIN_CVTPD2DQ
);
13048 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df
, IX86_BUILTIN_CVTPD2PI
);
13049 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df
, IX86_BUILTIN_CVTPD2PS
);
13050 def_builtin (MASK_SSE2
, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df
, IX86_BUILTIN_CVTTPD2DQ
);
13051 def_builtin (MASK_SSE2
, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df
, IX86_BUILTIN_CVTTPD2PI
);
13053 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si
, IX86_BUILTIN_CVTPI2PD
);
13055 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtsd2si", int_ftype_v2df
, IX86_BUILTIN_CVTSD2SI
);
13056 def_builtin (MASK_SSE2
, "__builtin_ia32_cvttsd2si", int_ftype_v2df
, IX86_BUILTIN_CVTTSD2SI
);
13057 def_builtin (MASK_SSE264
, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df
, IX86_BUILTIN_CVTSD2SI64
);
13058 def_builtin (MASK_SSE264
, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df
, IX86_BUILTIN_CVTTSD2SI64
);
13060 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf
, IX86_BUILTIN_CVTPS2DQ
);
13061 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf
, IX86_BUILTIN_CVTPS2PD
);
13062 def_builtin (MASK_SSE2
, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf
, IX86_BUILTIN_CVTTPS2DQ
);
13064 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int
, IX86_BUILTIN_CVTSI2SD
);
13065 def_builtin (MASK_SSE264
, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64
, IX86_BUILTIN_CVTSI642SD
);
13066 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df
, IX86_BUILTIN_CVTSD2SS
);
13067 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf
, IX86_BUILTIN_CVTSS2SD
);
13069 def_builtin (MASK_SSE2
, "__builtin_ia32_setpd1", v2df_ftype_double
, IX86_BUILTIN_SETPD1
);
13070 def_builtin (MASK_SSE2
, "__builtin_ia32_setpd", v2df_ftype_double_double
, IX86_BUILTIN_SETPD
);
13071 def_builtin (MASK_SSE2
, "__builtin_ia32_setzeropd", ti_ftype_void
, IX86_BUILTIN_CLRPD
);
13072 def_builtin (MASK_SSE2
, "__builtin_ia32_loadpd1", v2df_ftype_pcdouble
, IX86_BUILTIN_LOADPD1
);
13073 def_builtin (MASK_SSE2
, "__builtin_ia32_loadrpd", v2df_ftype_pcdouble
, IX86_BUILTIN_LOADRPD
);
13074 def_builtin (MASK_SSE2
, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df
, IX86_BUILTIN_STOREPD1
);
13075 def_builtin (MASK_SSE2
, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df
, IX86_BUILTIN_STORERPD
);
13077 def_builtin (MASK_SSE2
, "__builtin_ia32_clflush", void_ftype_pcvoid
, IX86_BUILTIN_CLFLUSH
);
13078 def_builtin (MASK_SSE2
, "__builtin_ia32_lfence", void_ftype_void
, IX86_BUILTIN_LFENCE
);
13079 def_builtin (MASK_SSE2
, "__builtin_ia32_mfence", void_ftype_void
, IX86_BUILTIN_MFENCE
);
13081 def_builtin (MASK_SSE2
, "__builtin_ia32_loaddqa", v16qi_ftype_pcchar
, IX86_BUILTIN_LOADDQA
);
13082 def_builtin (MASK_SSE2
, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar
, IX86_BUILTIN_LOADDQU
);
13083 def_builtin (MASK_SSE2
, "__builtin_ia32_loadd", v4si_ftype_pcint
, IX86_BUILTIN_LOADD
);
13084 def_builtin (MASK_SSE2
, "__builtin_ia32_storedqa", void_ftype_pchar_v16qi
, IX86_BUILTIN_STOREDQA
);
13085 def_builtin (MASK_SSE2
, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi
, IX86_BUILTIN_STOREDQU
);
13086 def_builtin (MASK_SSE2
, "__builtin_ia32_stored", void_ftype_pcint_v4si
, IX86_BUILTIN_STORED
);
13087 def_builtin (MASK_SSE2
, "__builtin_ia32_movq", v2di_ftype_v2di
, IX86_BUILTIN_MOVQ
);
13089 def_builtin (MASK_SSE1
, "__builtin_ia32_setzero128", v2di_ftype_void
, IX86_BUILTIN_CLRTI
);
13091 def_builtin (MASK_SSE2
, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di
, IX86_BUILTIN_PSLLW128
);
13092 def_builtin (MASK_SSE2
, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di
, IX86_BUILTIN_PSLLD128
);
13093 def_builtin (MASK_SSE2
, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di
, IX86_BUILTIN_PSLLQ128
);
13095 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di
, IX86_BUILTIN_PSRLW128
);
13096 def_builtin (MASK_SSE2
, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di
, IX86_BUILTIN_PSRLD128
);
13097 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di
, IX86_BUILTIN_PSRLQ128
);
13099 def_builtin (MASK_SSE2
, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di
, IX86_BUILTIN_PSRAW128
);
13100 def_builtin (MASK_SSE2
, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di
, IX86_BUILTIN_PSRAD128
);
13102 def_builtin (MASK_SSE2
, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSLLDQI128
);
13103 def_builtin (MASK_SSE2
, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSLLWI128
);
13104 def_builtin (MASK_SSE2
, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSLLDI128
);
13105 def_builtin (MASK_SSE2
, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSLLQI128
);
13107 def_builtin (MASK_SSE2
, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSRLDQI128
);
13108 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSRLWI128
);
13109 def_builtin (MASK_SSE2
, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSRLDI128
);
13110 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSRLQI128
);
13112 def_builtin (MASK_SSE2
, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSRAWI128
);
13113 def_builtin (MASK_SSE2
, "__builtin_ia32_psradi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSRADI128
);
13115 def_builtin (MASK_SSE2
, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi
, IX86_BUILTIN_PMADDWD128
);
13118 /* Errors in the source file can cause expand_expr to return const0_rtx
13119 where we expect a vector. To avoid crashing, use one of the vector
13120 clear instructions. */
13122 safe_vector_operand (x
, mode
)
13124 enum machine_mode mode
;
13126 if (x
!= const0_rtx
)
13128 x
= gen_reg_rtx (mode
);
13130 if (VALID_MMX_REG_MODE (mode
) || VALID_MMX_REG_MODE_3DNOW (mode
))
13131 emit_insn (gen_mmx_clrdi (mode
== DImode
? x
13132 : gen_rtx_SUBREG (DImode
, x
, 0)));
13134 emit_insn (gen_sse_clrv4sf (mode
== V4SFmode
? x
13135 : gen_rtx_SUBREG (V4SFmode
, x
, 0)));
13139 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
13142 ix86_expand_binop_builtin (icode
, arglist
, target
)
13143 enum insn_code icode
;
13148 tree arg0
= TREE_VALUE (arglist
);
13149 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
13150 rtx op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13151 rtx op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
13152 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
13153 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
13154 enum machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
13156 if (VECTOR_MODE_P (mode0
))
13157 op0
= safe_vector_operand (op0
, mode0
);
13158 if (VECTOR_MODE_P (mode1
))
13159 op1
= safe_vector_operand (op1
, mode1
);
13162 || GET_MODE (target
) != tmode
13163 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
13164 target
= gen_reg_rtx (tmode
);
13166 if (GET_MODE (op1
) == SImode
&& mode1
== TImode
)
13168 rtx x
= gen_reg_rtx (V4SImode
);
13169 emit_insn (gen_sse2_loadd (x
, op1
));
13170 op1
= gen_lowpart (TImode
, x
);
13173 /* In case the insn wants input operands in modes different from
13174 the result, abort. */
13175 if (GET_MODE (op0
) != mode0
|| GET_MODE (op1
) != mode1
)
13178 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
13179 op0
= copy_to_mode_reg (mode0
, op0
);
13180 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
13181 op1
= copy_to_mode_reg (mode1
, op1
);
13183 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
13184 yet one of the two must not be a memory. This is normally enforced
13185 by expanders, but we didn't bother to create one here. */
13186 if (GET_CODE (op0
) == MEM
&& GET_CODE (op1
) == MEM
)
13187 op0
= copy_to_mode_reg (mode0
, op0
);
13189 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
13196 /* Subroutine of ix86_expand_builtin to take care of stores. */
13199 ix86_expand_store_builtin (icode
, arglist
)
13200 enum insn_code icode
;
13204 tree arg0
= TREE_VALUE (arglist
);
13205 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
13206 rtx op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13207 rtx op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
13208 enum machine_mode mode0
= insn_data
[icode
].operand
[0].mode
;
13209 enum machine_mode mode1
= insn_data
[icode
].operand
[1].mode
;
13211 if (VECTOR_MODE_P (mode1
))
13212 op1
= safe_vector_operand (op1
, mode1
);
13214 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
13215 op1
= copy_to_mode_reg (mode1
, op1
);
13217 pat
= GEN_FCN (icode
) (op0
, op1
);
13223 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
13226 ix86_expand_unop_builtin (icode
, arglist
, target
, do_load
)
13227 enum insn_code icode
;
13233 tree arg0
= TREE_VALUE (arglist
);
13234 rtx op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13235 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
13236 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
13239 || GET_MODE (target
) != tmode
13240 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
13241 target
= gen_reg_rtx (tmode
);
13243 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
13246 if (VECTOR_MODE_P (mode0
))
13247 op0
= safe_vector_operand (op0
, mode0
);
13249 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
13250 op0
= copy_to_mode_reg (mode0
, op0
);
13253 pat
= GEN_FCN (icode
) (target
, op0
);
13260 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
13261 sqrtss, rsqrtss, rcpss. */
13264 ix86_expand_unop1_builtin (icode
, arglist
, target
)
13265 enum insn_code icode
;
13270 tree arg0
= TREE_VALUE (arglist
);
13271 rtx op1
, op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13272 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
13273 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
13276 || GET_MODE (target
) != tmode
13277 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
13278 target
= gen_reg_rtx (tmode
);
13280 if (VECTOR_MODE_P (mode0
))
13281 op0
= safe_vector_operand (op0
, mode0
);
13283 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
13284 op0
= copy_to_mode_reg (mode0
, op0
);
13287 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode0
))
13288 op1
= copy_to_mode_reg (mode0
, op1
);
13290 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
13297 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
13300 ix86_expand_sse_compare (d
, arglist
, target
)
13301 const struct builtin_description
*d
;
13306 tree arg0
= TREE_VALUE (arglist
);
13307 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
13308 rtx op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13309 rtx op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
13311 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
13312 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
13313 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[2].mode
;
13314 enum rtx_code comparison
= d
->comparison
;
13316 if (VECTOR_MODE_P (mode0
))
13317 op0
= safe_vector_operand (op0
, mode0
);
13318 if (VECTOR_MODE_P (mode1
))
13319 op1
= safe_vector_operand (op1
, mode1
);
13321 /* Swap operands if we have a comparison that isn't available in
13325 rtx tmp
= gen_reg_rtx (mode1
);
13326 emit_move_insn (tmp
, op1
);
13332 || GET_MODE (target
) != tmode
13333 || ! (*insn_data
[d
->icode
].operand
[0].predicate
) (target
, tmode
))
13334 target
= gen_reg_rtx (tmode
);
13336 if (! (*insn_data
[d
->icode
].operand
[1].predicate
) (op0
, mode0
))
13337 op0
= copy_to_mode_reg (mode0
, op0
);
13338 if (! (*insn_data
[d
->icode
].operand
[2].predicate
) (op1
, mode1
))
13339 op1
= copy_to_mode_reg (mode1
, op1
);
13341 op2
= gen_rtx_fmt_ee (comparison
, mode0
, op0
, op1
);
13342 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
, op2
);
13349 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
13352 ix86_expand_sse_comi (d
, arglist
, target
)
13353 const struct builtin_description
*d
;
13358 tree arg0
= TREE_VALUE (arglist
);
13359 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
13360 rtx op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13361 rtx op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
13363 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[0].mode
;
13364 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[1].mode
;
13365 enum rtx_code comparison
= d
->comparison
;
13367 if (VECTOR_MODE_P (mode0
))
13368 op0
= safe_vector_operand (op0
, mode0
);
13369 if (VECTOR_MODE_P (mode1
))
13370 op1
= safe_vector_operand (op1
, mode1
);
13372 /* Swap operands if we have a comparison that isn't available in
13381 target
= gen_reg_rtx (SImode
);
13382 emit_move_insn (target
, const0_rtx
);
13383 target
= gen_rtx_SUBREG (QImode
, target
, 0);
13385 if (! (*insn_data
[d
->icode
].operand
[0].predicate
) (op0
, mode0
))
13386 op0
= copy_to_mode_reg (mode0
, op0
);
13387 if (! (*insn_data
[d
->icode
].operand
[1].predicate
) (op1
, mode1
))
13388 op1
= copy_to_mode_reg (mode1
, op1
);
13390 op2
= gen_rtx_fmt_ee (comparison
, mode0
, op0
, op1
);
13391 pat
= GEN_FCN (d
->icode
) (op0
, op1
);
13395 emit_insn (gen_rtx_SET (VOIDmode
,
13396 gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
13397 gen_rtx_fmt_ee (comparison
, QImode
,
13401 return SUBREG_REG (target
);
13404 /* Expand an expression EXP that calls a built-in function,
13405 with result going to TARGET if that's convenient
13406 (and in mode MODE if that's convenient).
13407 SUBTARGET may be used as the target for computing one of EXP's operands.
13408 IGNORE is nonzero if the value is to be ignored. */
13411 ix86_expand_builtin (exp
, target
, subtarget
, mode
, ignore
)
13414 rtx subtarget ATTRIBUTE_UNUSED
;
13415 enum machine_mode mode ATTRIBUTE_UNUSED
;
13416 int ignore ATTRIBUTE_UNUSED
;
13418 const struct builtin_description
*d
;
13420 enum insn_code icode
;
13421 tree fndecl
= TREE_OPERAND (TREE_OPERAND (exp
, 0), 0);
13422 tree arglist
= TREE_OPERAND (exp
, 1);
13423 tree arg0
, arg1
, arg2
;
13424 rtx op0
, op1
, op2
, pat
;
13425 enum machine_mode tmode
, mode0
, mode1
, mode2
;
13426 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
13430 case IX86_BUILTIN_EMMS
:
13431 emit_insn (gen_emms ());
13434 case IX86_BUILTIN_SFENCE
:
13435 emit_insn (gen_sfence ());
13438 case IX86_BUILTIN_PEXTRW
:
13439 case IX86_BUILTIN_PEXTRW128
:
13440 icode
= (fcode
== IX86_BUILTIN_PEXTRW
13441 ? CODE_FOR_mmx_pextrw
13442 : CODE_FOR_sse2_pextrw
);
13443 arg0
= TREE_VALUE (arglist
);
13444 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
13445 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13446 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
13447 tmode
= insn_data
[icode
].operand
[0].mode
;
13448 mode0
= insn_data
[icode
].operand
[1].mode
;
13449 mode1
= insn_data
[icode
].operand
[2].mode
;
13451 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
13452 op0
= copy_to_mode_reg (mode0
, op0
);
13453 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
13455 /* @@@ better error message */
13456 error ("selector must be an immediate");
13457 return gen_reg_rtx (tmode
);
13460 || GET_MODE (target
) != tmode
13461 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
13462 target
= gen_reg_rtx (tmode
);
13463 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
13469 case IX86_BUILTIN_PINSRW
:
13470 case IX86_BUILTIN_PINSRW128
:
13471 icode
= (fcode
== IX86_BUILTIN_PINSRW
13472 ? CODE_FOR_mmx_pinsrw
13473 : CODE_FOR_sse2_pinsrw
);
13474 arg0
= TREE_VALUE (arglist
);
13475 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
13476 arg2
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
13477 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13478 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
13479 op2
= expand_expr (arg2
, NULL_RTX
, VOIDmode
, 0);
13480 tmode
= insn_data
[icode
].operand
[0].mode
;
13481 mode0
= insn_data
[icode
].operand
[1].mode
;
13482 mode1
= insn_data
[icode
].operand
[2].mode
;
13483 mode2
= insn_data
[icode
].operand
[3].mode
;
13485 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
13486 op0
= copy_to_mode_reg (mode0
, op0
);
13487 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
13488 op1
= copy_to_mode_reg (mode1
, op1
);
13489 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
13491 /* @@@ better error message */
13492 error ("selector must be an immediate");
13496 || GET_MODE (target
) != tmode
13497 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
13498 target
= gen_reg_rtx (tmode
);
13499 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
13505 case IX86_BUILTIN_MASKMOVQ
:
13506 case IX86_BUILTIN_MASKMOVDQU
:
13507 icode
= (fcode
== IX86_BUILTIN_MASKMOVQ
13508 ? (TARGET_64BIT
? CODE_FOR_mmx_maskmovq_rex
: CODE_FOR_mmx_maskmovq
)
13509 : (TARGET_64BIT
? CODE_FOR_sse2_maskmovdqu_rex64
13510 : CODE_FOR_sse2_maskmovdqu
));
13511 /* Note the arg order is different from the operand order. */
13512 arg1
= TREE_VALUE (arglist
);
13513 arg2
= TREE_VALUE (TREE_CHAIN (arglist
));
13514 arg0
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
13515 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13516 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
13517 op2
= expand_expr (arg2
, NULL_RTX
, VOIDmode
, 0);
13518 mode0
= insn_data
[icode
].operand
[0].mode
;
13519 mode1
= insn_data
[icode
].operand
[1].mode
;
13520 mode2
= insn_data
[icode
].operand
[2].mode
;
13522 if (! (*insn_data
[icode
].operand
[0].predicate
) (op0
, mode0
))
13523 op0
= copy_to_mode_reg (mode0
, op0
);
13524 if (! (*insn_data
[icode
].operand
[1].predicate
) (op1
, mode1
))
13525 op1
= copy_to_mode_reg (mode1
, op1
);
13526 if (! (*insn_data
[icode
].operand
[2].predicate
) (op2
, mode2
))
13527 op2
= copy_to_mode_reg (mode2
, op2
);
13528 pat
= GEN_FCN (icode
) (op0
, op1
, op2
);
13534 case IX86_BUILTIN_SQRTSS
:
13535 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2
, arglist
, target
);
13536 case IX86_BUILTIN_RSQRTSS
:
13537 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2
, arglist
, target
);
13538 case IX86_BUILTIN_RCPSS
:
13539 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2
, arglist
, target
);
13541 case IX86_BUILTIN_LOADAPS
:
13542 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps
, arglist
, target
, 1);
13544 case IX86_BUILTIN_LOADUPS
:
13545 return ix86_expand_unop_builtin (CODE_FOR_sse_movups
, arglist
, target
, 1);
13547 case IX86_BUILTIN_STOREAPS
:
13548 return ix86_expand_store_builtin (CODE_FOR_sse_movaps
, arglist
);
13550 case IX86_BUILTIN_STOREUPS
:
13551 return ix86_expand_store_builtin (CODE_FOR_sse_movups
, arglist
);
13553 case IX86_BUILTIN_LOADSS
:
13554 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss
, arglist
, target
, 1);
13556 case IX86_BUILTIN_STORESS
:
13557 return ix86_expand_store_builtin (CODE_FOR_sse_storess
, arglist
);
13559 case IX86_BUILTIN_LOADHPS
:
13560 case IX86_BUILTIN_LOADLPS
:
13561 case IX86_BUILTIN_LOADHPD
:
13562 case IX86_BUILTIN_LOADLPD
:
13563 icode
= (fcode
== IX86_BUILTIN_LOADHPS
? CODE_FOR_sse_movhps
13564 : fcode
== IX86_BUILTIN_LOADLPS
? CODE_FOR_sse_movlps
13565 : fcode
== IX86_BUILTIN_LOADHPD
? CODE_FOR_sse2_movhpd
13566 : CODE_FOR_sse2_movlpd
);
13567 arg0
= TREE_VALUE (arglist
);
13568 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
13569 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13570 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
13571 tmode
= insn_data
[icode
].operand
[0].mode
;
13572 mode0
= insn_data
[icode
].operand
[1].mode
;
13573 mode1
= insn_data
[icode
].operand
[2].mode
;
13575 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
13576 op0
= copy_to_mode_reg (mode0
, op0
);
13577 op1
= gen_rtx_MEM (mode1
, copy_to_mode_reg (Pmode
, op1
));
13579 || GET_MODE (target
) != tmode
13580 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
13581 target
= gen_reg_rtx (tmode
);
13582 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
13588 case IX86_BUILTIN_STOREHPS
:
13589 case IX86_BUILTIN_STORELPS
:
13590 case IX86_BUILTIN_STOREHPD
:
13591 case IX86_BUILTIN_STORELPD
:
13592 icode
= (fcode
== IX86_BUILTIN_STOREHPS
? CODE_FOR_sse_movhps
13593 : fcode
== IX86_BUILTIN_STORELPS
? CODE_FOR_sse_movlps
13594 : fcode
== IX86_BUILTIN_STOREHPD
? CODE_FOR_sse2_movhpd
13595 : CODE_FOR_sse2_movlpd
);
13596 arg0
= TREE_VALUE (arglist
);
13597 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
13598 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13599 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
13600 mode0
= insn_data
[icode
].operand
[1].mode
;
13601 mode1
= insn_data
[icode
].operand
[2].mode
;
13603 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
13604 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
13605 op1
= copy_to_mode_reg (mode1
, op1
);
13607 pat
= GEN_FCN (icode
) (op0
, op0
, op1
);
13613 case IX86_BUILTIN_MOVNTPS
:
13614 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf
, arglist
);
13615 case IX86_BUILTIN_MOVNTQ
:
13616 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi
, arglist
);
13618 case IX86_BUILTIN_LDMXCSR
:
13619 op0
= expand_expr (TREE_VALUE (arglist
), NULL_RTX
, VOIDmode
, 0);
13620 target
= assign_386_stack_local (SImode
, 0);
13621 emit_move_insn (target
, op0
);
13622 emit_insn (gen_ldmxcsr (target
));
13625 case IX86_BUILTIN_STMXCSR
:
13626 target
= assign_386_stack_local (SImode
, 0);
13627 emit_insn (gen_stmxcsr (target
));
13628 return copy_to_mode_reg (SImode
, target
);
13630 case IX86_BUILTIN_SHUFPS
:
13631 case IX86_BUILTIN_SHUFPD
:
13632 icode
= (fcode
== IX86_BUILTIN_SHUFPS
13633 ? CODE_FOR_sse_shufps
13634 : CODE_FOR_sse2_shufpd
);
13635 arg0
= TREE_VALUE (arglist
);
13636 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
13637 arg2
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
13638 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13639 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
13640 op2
= expand_expr (arg2
, NULL_RTX
, VOIDmode
, 0);
13641 tmode
= insn_data
[icode
].operand
[0].mode
;
13642 mode0
= insn_data
[icode
].operand
[1].mode
;
13643 mode1
= insn_data
[icode
].operand
[2].mode
;
13644 mode2
= insn_data
[icode
].operand
[3].mode
;
13646 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
13647 op0
= copy_to_mode_reg (mode0
, op0
);
13648 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
13649 op1
= copy_to_mode_reg (mode1
, op1
);
13650 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
13652 /* @@@ better error message */
13653 error ("mask must be an immediate");
13654 return gen_reg_rtx (tmode
);
13657 || GET_MODE (target
) != tmode
13658 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
13659 target
= gen_reg_rtx (tmode
);
13660 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
13666 case IX86_BUILTIN_PSHUFW
:
13667 case IX86_BUILTIN_PSHUFD
:
13668 case IX86_BUILTIN_PSHUFHW
:
13669 case IX86_BUILTIN_PSHUFLW
:
13670 icode
= ( fcode
== IX86_BUILTIN_PSHUFHW
? CODE_FOR_sse2_pshufhw
13671 : fcode
== IX86_BUILTIN_PSHUFLW
? CODE_FOR_sse2_pshuflw
13672 : fcode
== IX86_BUILTIN_PSHUFD
? CODE_FOR_sse2_pshufd
13673 : CODE_FOR_mmx_pshufw
);
13674 arg0
= TREE_VALUE (arglist
);
13675 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
13676 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13677 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
13678 tmode
= insn_data
[icode
].operand
[0].mode
;
13679 mode1
= insn_data
[icode
].operand
[1].mode
;
13680 mode2
= insn_data
[icode
].operand
[2].mode
;
13682 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
13683 op0
= copy_to_mode_reg (mode1
, op0
);
13684 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
13686 /* @@@ better error message */
13687 error ("mask must be an immediate");
13691 || GET_MODE (target
) != tmode
13692 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
13693 target
= gen_reg_rtx (tmode
);
13694 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
13700 case IX86_BUILTIN_PSLLDQI128
:
13701 case IX86_BUILTIN_PSRLDQI128
:
13702 icode
= ( fcode
== IX86_BUILTIN_PSLLDQI128
? CODE_FOR_sse2_ashlti3
13703 : CODE_FOR_sse2_lshrti3
);
13704 arg0
= TREE_VALUE (arglist
);
13705 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
13706 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13707 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
13708 tmode
= insn_data
[icode
].operand
[0].mode
;
13709 mode1
= insn_data
[icode
].operand
[1].mode
;
13710 mode2
= insn_data
[icode
].operand
[2].mode
;
13712 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
13714 op0
= copy_to_reg (op0
);
13715 op0
= simplify_gen_subreg (mode1
, op0
, GET_MODE (op0
), 0);
13717 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
13719 error ("shift must be an immediate");
13722 target
= gen_reg_rtx (V2DImode
);
13723 pat
= GEN_FCN (icode
) (simplify_gen_subreg (tmode
, target
, V2DImode
, 0), op0
, op1
);
13729 case IX86_BUILTIN_FEMMS
:
13730 emit_insn (gen_femms ());
13733 case IX86_BUILTIN_PAVGUSB
:
13734 return ix86_expand_binop_builtin (CODE_FOR_pavgusb
, arglist
, target
);
13736 case IX86_BUILTIN_PF2ID
:
13737 return ix86_expand_unop_builtin (CODE_FOR_pf2id
, arglist
, target
, 0);
13739 case IX86_BUILTIN_PFACC
:
13740 return ix86_expand_binop_builtin (CODE_FOR_pfacc
, arglist
, target
);
13742 case IX86_BUILTIN_PFADD
:
13743 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3
, arglist
, target
);
13745 case IX86_BUILTIN_PFCMPEQ
:
13746 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3
, arglist
, target
);
13748 case IX86_BUILTIN_PFCMPGE
:
13749 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3
, arglist
, target
);
13751 case IX86_BUILTIN_PFCMPGT
:
13752 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3
, arglist
, target
);
13754 case IX86_BUILTIN_PFMAX
:
13755 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3
, arglist
, target
);
13757 case IX86_BUILTIN_PFMIN
:
13758 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3
, arglist
, target
);
13760 case IX86_BUILTIN_PFMUL
:
13761 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3
, arglist
, target
);
13763 case IX86_BUILTIN_PFRCP
:
13764 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2
, arglist
, target
, 0);
13766 case IX86_BUILTIN_PFRCPIT1
:
13767 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3
, arglist
, target
);
13769 case IX86_BUILTIN_PFRCPIT2
:
13770 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3
, arglist
, target
);
13772 case IX86_BUILTIN_PFRSQIT1
:
13773 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3
, arglist
, target
);
13775 case IX86_BUILTIN_PFRSQRT
:
13776 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2
, arglist
, target
, 0);
13778 case IX86_BUILTIN_PFSUB
:
13779 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3
, arglist
, target
);
13781 case IX86_BUILTIN_PFSUBR
:
13782 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3
, arglist
, target
);
13784 case IX86_BUILTIN_PI2FD
:
13785 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2
, arglist
, target
, 0);
13787 case IX86_BUILTIN_PMULHRW
:
13788 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3
, arglist
, target
);
13790 case IX86_BUILTIN_PF2IW
:
13791 return ix86_expand_unop_builtin (CODE_FOR_pf2iw
, arglist
, target
, 0);
13793 case IX86_BUILTIN_PFNACC
:
13794 return ix86_expand_binop_builtin (CODE_FOR_pfnacc
, arglist
, target
);
13796 case IX86_BUILTIN_PFPNACC
:
13797 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc
, arglist
, target
);
13799 case IX86_BUILTIN_PI2FW
:
13800 return ix86_expand_unop_builtin (CODE_FOR_pi2fw
, arglist
, target
, 0);
13802 case IX86_BUILTIN_PSWAPDSI
:
13803 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2
, arglist
, target
, 0);
13805 case IX86_BUILTIN_PSWAPDSF
:
13806 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2
, arglist
, target
, 0);
13808 case IX86_BUILTIN_SSE_ZERO
:
13809 target
= gen_reg_rtx (V4SFmode
);
13810 emit_insn (gen_sse_clrv4sf (target
));
13813 case IX86_BUILTIN_MMX_ZERO
:
13814 target
= gen_reg_rtx (DImode
);
13815 emit_insn (gen_mmx_clrdi (target
));
13818 case IX86_BUILTIN_CLRTI
:
13819 target
= gen_reg_rtx (V2DImode
);
13820 emit_insn (gen_sse2_clrti (simplify_gen_subreg (TImode
, target
, V2DImode
, 0)));
13824 case IX86_BUILTIN_SQRTSD
:
13825 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2
, arglist
, target
);
13826 case IX86_BUILTIN_LOADAPD
:
13827 return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd
, arglist
, target
, 1);
13828 case IX86_BUILTIN_LOADUPD
:
13829 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd
, arglist
, target
, 1);
13831 case IX86_BUILTIN_STOREAPD
:
13832 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd
, arglist
);
13833 case IX86_BUILTIN_STOREUPD
:
13834 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd
, arglist
);
13836 case IX86_BUILTIN_LOADSD
:
13837 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd
, arglist
, target
, 1);
13839 case IX86_BUILTIN_STORESD
:
13840 return ix86_expand_store_builtin (CODE_FOR_sse2_storesd
, arglist
);
13842 case IX86_BUILTIN_SETPD1
:
13843 target
= assign_386_stack_local (DFmode
, 0);
13844 arg0
= TREE_VALUE (arglist
);
13845 emit_move_insn (adjust_address (target
, DFmode
, 0),
13846 expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0));
13847 op0
= gen_reg_rtx (V2DFmode
);
13848 emit_insn (gen_sse2_loadsd (op0
, adjust_address (target
, V2DFmode
, 0)));
13849 emit_insn (gen_sse2_shufpd (op0
, op0
, op0
, GEN_INT (0)));
13852 case IX86_BUILTIN_SETPD
:
13853 target
= assign_386_stack_local (V2DFmode
, 0);
13854 arg0
= TREE_VALUE (arglist
);
13855 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
13856 emit_move_insn (adjust_address (target
, DFmode
, 0),
13857 expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0));
13858 emit_move_insn (adjust_address (target
, DFmode
, 8),
13859 expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0));
13860 op0
= gen_reg_rtx (V2DFmode
);
13861 emit_insn (gen_sse2_movapd (op0
, target
));
13864 case IX86_BUILTIN_LOADRPD
:
13865 target
= ix86_expand_unop_builtin (CODE_FOR_sse2_movapd
, arglist
,
13866 gen_reg_rtx (V2DFmode
), 1);
13867 emit_insn (gen_sse2_shufpd (target
, target
, target
, GEN_INT (1)));
13870 case IX86_BUILTIN_LOADPD1
:
13871 target
= ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd
, arglist
,
13872 gen_reg_rtx (V2DFmode
), 1);
13873 emit_insn (gen_sse2_shufpd (target
, target
, target
, const0_rtx
));
13876 case IX86_BUILTIN_STOREPD1
:
13877 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd
, arglist
);
13878 case IX86_BUILTIN_STORERPD
:
13879 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd
, arglist
);
13881 case IX86_BUILTIN_CLRPD
:
13882 target
= gen_reg_rtx (V2DFmode
);
13883 emit_insn (gen_sse_clrv2df (target
));
13886 case IX86_BUILTIN_MFENCE
:
13887 emit_insn (gen_sse2_mfence ());
13889 case IX86_BUILTIN_LFENCE
:
13890 emit_insn (gen_sse2_lfence ());
13893 case IX86_BUILTIN_CLFLUSH
:
13894 arg0
= TREE_VALUE (arglist
);
13895 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13896 icode
= CODE_FOR_sse2_clflush
;
13897 if (! (*insn_data
[icode
].operand
[0].predicate
) (op0
, Pmode
))
13898 op0
= copy_to_mode_reg (Pmode
, op0
);
13900 emit_insn (gen_sse2_clflush (op0
));
13903 case IX86_BUILTIN_MOVNTPD
:
13904 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df
, arglist
);
13905 case IX86_BUILTIN_MOVNTDQ
:
13906 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di
, arglist
);
13907 case IX86_BUILTIN_MOVNTI
:
13908 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi
, arglist
);
13910 case IX86_BUILTIN_LOADDQA
:
13911 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqa
, arglist
, target
, 1);
13912 case IX86_BUILTIN_LOADDQU
:
13913 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu
, arglist
, target
, 1);
13914 case IX86_BUILTIN_LOADD
:
13915 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadd
, arglist
, target
, 1);
13917 case IX86_BUILTIN_STOREDQA
:
13918 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqa
, arglist
);
13919 case IX86_BUILTIN_STOREDQU
:
13920 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu
, arglist
);
13921 case IX86_BUILTIN_STORED
:
13922 return ix86_expand_store_builtin (CODE_FOR_sse2_stored
, arglist
);
13928 for (i
= 0, d
= bdesc_2arg
; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
13929 if (d
->code
== fcode
)
13931 /* Compares are treated specially. */
13932 if (d
->icode
== CODE_FOR_maskcmpv4sf3
13933 || d
->icode
== CODE_FOR_vmmaskcmpv4sf3
13934 || d
->icode
== CODE_FOR_maskncmpv4sf3
13935 || d
->icode
== CODE_FOR_vmmaskncmpv4sf3
13936 || d
->icode
== CODE_FOR_maskcmpv2df3
13937 || d
->icode
== CODE_FOR_vmmaskcmpv2df3
13938 || d
->icode
== CODE_FOR_maskncmpv2df3
13939 || d
->icode
== CODE_FOR_vmmaskncmpv2df3
)
13940 return ix86_expand_sse_compare (d
, arglist
, target
);
13942 return ix86_expand_binop_builtin (d
->icode
, arglist
, target
);
13945 for (i
= 0, d
= bdesc_1arg
; i
< ARRAY_SIZE (bdesc_1arg
); i
++, d
++)
13946 if (d
->code
== fcode
)
13947 return ix86_expand_unop_builtin (d
->icode
, arglist
, target
, 0);
13949 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
13950 if (d
->code
== fcode
)
13951 return ix86_expand_sse_comi (d
, arglist
, target
);
13953 /* @@@ Should really do something sensible here. */
13957 /* Store OPERAND to the memory after reload is completed. This means
13958 that we can't easily use assign_stack_local. */
13960 ix86_force_to_memory (mode
, operand
)
13961 enum machine_mode mode
;
13965 if (!reload_completed
)
13967 if (TARGET_64BIT
&& TARGET_RED_ZONE
)
13969 result
= gen_rtx_MEM (mode
,
13970 gen_rtx_PLUS (Pmode
,
13972 GEN_INT (-RED_ZONE_SIZE
)));
13973 emit_move_insn (result
, operand
);
13975 else if (TARGET_64BIT
&& !TARGET_RED_ZONE
)
13981 operand
= gen_lowpart (DImode
, operand
);
13985 gen_rtx_SET (VOIDmode
,
13986 gen_rtx_MEM (DImode
,
13987 gen_rtx_PRE_DEC (DImode
,
13988 stack_pointer_rtx
)),
13994 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
14003 split_di (&operand
, 1, operands
, operands
+ 1);
14005 gen_rtx_SET (VOIDmode
,
14006 gen_rtx_MEM (SImode
,
14007 gen_rtx_PRE_DEC (Pmode
,
14008 stack_pointer_rtx
)),
14011 gen_rtx_SET (VOIDmode
,
14012 gen_rtx_MEM (SImode
,
14013 gen_rtx_PRE_DEC (Pmode
,
14014 stack_pointer_rtx
)),
14019 /* It is better to store HImodes as SImodes. */
14020 if (!TARGET_PARTIAL_REG_STALL
)
14021 operand
= gen_lowpart (SImode
, operand
);
14025 gen_rtx_SET (VOIDmode
,
14026 gen_rtx_MEM (GET_MODE (operand
),
14027 gen_rtx_PRE_DEC (SImode
,
14028 stack_pointer_rtx
)),
14034 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
14039 /* Free operand from the memory. */
14041 ix86_free_from_memory (mode
)
14042 enum machine_mode mode
;
14044 if (!TARGET_64BIT
|| !TARGET_RED_ZONE
)
14048 if (mode
== DImode
|| TARGET_64BIT
)
14050 else if (mode
== HImode
&& TARGET_PARTIAL_REG_STALL
)
14054 /* Use LEA to deallocate stack space. In peephole2 it will be converted
14055 to pop or add instruction if registers are available. */
14056 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
14057 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
14062 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
14063 QImode must go into class Q_REGS.
14064 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
14065 movdf to do mem-to-mem moves through integer regs. */
14067 ix86_preferred_reload_class (x
, class)
14069 enum reg_class
class;
14071 if (GET_CODE (x
) == CONST_VECTOR
&& x
!= CONST0_RTX (GET_MODE (x
)))
14073 if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) != VOIDmode
)
14075 /* SSE can't load any constant directly yet. */
14076 if (SSE_CLASS_P (class))
14078 /* Floats can load 0 and 1. */
14079 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x
))
14081 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
14082 if (MAYBE_SSE_CLASS_P (class))
14083 return (reg_class_subset_p (class, GENERAL_REGS
)
14084 ? GENERAL_REGS
: FLOAT_REGS
);
14088 /* General regs can load everything. */
14089 if (reg_class_subset_p (class, GENERAL_REGS
))
14090 return GENERAL_REGS
;
14091 /* In case we haven't resolved FLOAT or SSE yet, give up. */
14092 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
14095 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x
))
14097 if (GET_MODE (x
) == QImode
&& ! reg_class_subset_p (class, Q_REGS
))
14102 /* If we are copying between general and FP registers, we need a memory
14103 location. The same is true for SSE and MMX registers.
14105 The macro can't work reliably when one of the CLASSES is class containing
14106 registers from multiple units (SSE, MMX, integer). We avoid this by never
14107 combining those units in single alternative in the machine description.
14108 Ensure that this constraint holds to avoid unexpected surprises.
14110 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
14111 enforce these sanity checks. */
14113 ix86_secondary_memory_needed (class1
, class2
, mode
, strict
)
14114 enum reg_class class1
, class2
;
14115 enum machine_mode mode
;
14118 if (MAYBE_FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class1
)
14119 || MAYBE_FLOAT_CLASS_P (class2
) != FLOAT_CLASS_P (class2
)
14120 || MAYBE_SSE_CLASS_P (class1
) != SSE_CLASS_P (class1
)
14121 || MAYBE_SSE_CLASS_P (class2
) != SSE_CLASS_P (class2
)
14122 || MAYBE_MMX_CLASS_P (class1
) != MMX_CLASS_P (class1
)
14123 || MAYBE_MMX_CLASS_P (class2
) != MMX_CLASS_P (class2
))
14130 return (FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class2
)
14131 || (SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
)
14132 && (mode
) != SImode
)
14133 || (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
)
14134 && (mode
) != SImode
));
14136 /* Return the cost of moving data from a register in class CLASS1 to
14137 one in class CLASS2.
14139 It is not required that the cost always equal 2 when FROM is the same as TO;
14140 on some machines it is expensive to move between registers if they are not
14141 general registers. */
14143 ix86_register_move_cost (mode
, class1
, class2
)
14144 enum machine_mode mode
;
14145 enum reg_class class1
, class2
;
14147 /* In case we require secondary memory, compute cost of the store followed
14148 by load. In order to avoid bad register allocation choices, we need
14149 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
14151 if (ix86_secondary_memory_needed (class1
, class2
, mode
, 0))
14155 cost
+= MAX (MEMORY_MOVE_COST (mode
, class1
, 0),
14156 MEMORY_MOVE_COST (mode
, class1
, 1));
14157 cost
+= MAX (MEMORY_MOVE_COST (mode
, class2
, 0),
14158 MEMORY_MOVE_COST (mode
, class2
, 1));
14160 /* In case of copying from general_purpose_register we may emit multiple
14161 stores followed by single load causing memory size mismatch stall.
14162 Count this as arbitarily high cost of 20. */
14163 if (CLASS_MAX_NREGS (class1
, mode
) > CLASS_MAX_NREGS (class2
, mode
))
14166 /* In the case of FP/MMX moves, the registers actually overlap, and we
14167 have to switch modes in order to treat them differently. */
14168 if ((MMX_CLASS_P (class1
) && MAYBE_FLOAT_CLASS_P (class2
))
14169 || (MMX_CLASS_P (class2
) && MAYBE_FLOAT_CLASS_P (class1
)))
14175 /* Moves between SSE/MMX and integer unit are expensive. */
14176 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
)
14177 || SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
14178 return ix86_cost
->mmxsse_to_integer
;
14179 if (MAYBE_FLOAT_CLASS_P (class1
))
14180 return ix86_cost
->fp_move
;
14181 if (MAYBE_SSE_CLASS_P (class1
))
14182 return ix86_cost
->sse_move
;
14183 if (MAYBE_MMX_CLASS_P (class1
))
14184 return ix86_cost
->mmx_move
;
14188 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
14190 ix86_hard_regno_mode_ok (regno
, mode
)
14192 enum machine_mode mode
;
14194 /* Flags and only flags can only hold CCmode values. */
14195 if (CC_REGNO_P (regno
))
14196 return GET_MODE_CLASS (mode
) == MODE_CC
;
14197 if (GET_MODE_CLASS (mode
) == MODE_CC
14198 || GET_MODE_CLASS (mode
) == MODE_RANDOM
14199 || GET_MODE_CLASS (mode
) == MODE_PARTIAL_INT
)
14201 if (FP_REGNO_P (regno
))
14202 return VALID_FP_MODE_P (mode
);
14203 if (SSE_REGNO_P (regno
))
14204 return (TARGET_SSE
? VALID_SSE_REG_MODE (mode
) : 0);
14205 if (MMX_REGNO_P (regno
))
14207 ? VALID_MMX_REG_MODE (mode
) || VALID_MMX_REG_MODE_3DNOW (mode
) : 0);
14208 /* We handle both integer and floats in the general purpose registers.
14209 In future we should be able to handle vector modes as well. */
14210 if (!VALID_INT_MODE_P (mode
) && !VALID_FP_MODE_P (mode
))
14212 /* Take care for QImode values - they can be in non-QI regs, but then
14213 they do cause partial register stalls. */
14214 if (regno
< 4 || mode
!= QImode
|| TARGET_64BIT
)
14216 return reload_in_progress
|| reload_completed
|| !TARGET_PARTIAL_REG_STALL
;
14219 /* Return the cost of moving data of mode M between a
14220 register and memory. A value of 2 is the default; this cost is
14221 relative to those in `REGISTER_MOVE_COST'.
14223 If moving between registers and memory is more expensive than
14224 between two registers, you should define this macro to express the
14227 Model also increased moving costs of QImode registers in non
14231 ix86_memory_move_cost (mode
, class, in
)
14232 enum machine_mode mode
;
14233 enum reg_class
class;
14236 if (FLOAT_CLASS_P (class))
14254 return in
? ix86_cost
->fp_load
[index
] : ix86_cost
->fp_store
[index
];
14256 if (SSE_CLASS_P (class))
14259 switch (GET_MODE_SIZE (mode
))
14273 return in
? ix86_cost
->sse_load
[index
] : ix86_cost
->sse_store
[index
];
14275 if (MMX_CLASS_P (class))
14278 switch (GET_MODE_SIZE (mode
))
14289 return in
? ix86_cost
->mmx_load
[index
] : ix86_cost
->mmx_store
[index
];
14291 switch (GET_MODE_SIZE (mode
))
14295 return (Q_CLASS_P (class) ? ix86_cost
->int_load
[0]
14296 : ix86_cost
->movzbl_load
);
14298 return (Q_CLASS_P (class) ? ix86_cost
->int_store
[0]
14299 : ix86_cost
->int_store
[0] + 4);
14302 return in
? ix86_cost
->int_load
[1] : ix86_cost
->int_store
[1];
14304 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
14305 if (mode
== TFmode
)
14307 return ((in
? ix86_cost
->int_load
[2] : ix86_cost
->int_store
[2])
14308 * ((int) GET_MODE_SIZE (mode
)
14309 + UNITS_PER_WORD
-1 ) / UNITS_PER_WORD
);
14313 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
14315 ix86_svr3_asm_out_constructor (symbol
, priority
)
14317 int priority ATTRIBUTE_UNUSED
;
14320 fputs ("\tpushl $", asm_out_file
);
14321 assemble_name (asm_out_file
, XSTR (symbol
, 0));
14322 fputc ('\n', asm_out_file
);
14328 static int current_machopic_label_num
;
14330 /* Given a symbol name and its associated stub, write out the
14331 definition of the stub. */
14334 machopic_output_stub (file
, symb
, stub
)
14336 const char *symb
, *stub
;
14338 unsigned int length
;
14339 char *binder_name
, *symbol_name
, lazy_ptr_name
[32];
14340 int label
= ++current_machopic_label_num
;
14342 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
14343 symb
= (*targetm
.strip_name_encoding
) (symb
);
14345 length
= strlen (stub
);
14346 binder_name
= alloca (length
+ 32);
14347 GEN_BINDER_NAME_FOR_STUB (binder_name
, stub
, length
);
14349 length
= strlen (symb
);
14350 symbol_name
= alloca (length
+ 32);
14351 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name
, symb
, length
);
14353 sprintf (lazy_ptr_name
, "L%d$lz", label
);
14356 machopic_picsymbol_stub_section ();
14358 machopic_symbol_stub_section ();
14360 fprintf (file
, "%s:\n", stub
);
14361 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
14365 fprintf (file
, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label
, label
);
14366 fprintf (file
, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name
, label
);
14367 fprintf (file
, "\tjmp %%edx\n");
14370 fprintf (file
, "\tjmp *%s\n", lazy_ptr_name
);
14372 fprintf (file
, "%s:\n", binder_name
);
14376 fprintf (file
, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name
, label
);
14377 fprintf (file
, "\tpushl %%eax\n");
14380 fprintf (file
, "\t pushl $%s\n", lazy_ptr_name
);
14382 fprintf (file
, "\tjmp dyld_stub_binding_helper\n");
14384 machopic_lazy_symbol_ptr_section ();
14385 fprintf (file
, "%s:\n", lazy_ptr_name
);
14386 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
14387 fprintf (file
, "\t.long %s\n", binder_name
);
14389 #endif /* TARGET_MACHO */
14391 /* Order the registers for register allocator. */
14394 x86_order_regs_for_local_alloc ()
14399 /* First allocate the local general purpose registers. */
14400 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
14401 if (GENERAL_REGNO_P (i
) && call_used_regs
[i
])
14402 reg_alloc_order
[pos
++] = i
;
14404 /* Global general purpose registers. */
14405 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
14406 if (GENERAL_REGNO_P (i
) && !call_used_regs
[i
])
14407 reg_alloc_order
[pos
++] = i
;
14409 /* x87 registers come first in case we are doing FP math
14411 if (!TARGET_SSE_MATH
)
14412 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
14413 reg_alloc_order
[pos
++] = i
;
14415 /* SSE registers. */
14416 for (i
= FIRST_SSE_REG
; i
<= LAST_SSE_REG
; i
++)
14417 reg_alloc_order
[pos
++] = i
;
14418 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
14419 reg_alloc_order
[pos
++] = i
;
14421 /* x87 registerts. */
14422 if (TARGET_SSE_MATH
)
14423 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
14424 reg_alloc_order
[pos
++] = i
;
14426 for (i
= FIRST_MMX_REG
; i
<= LAST_MMX_REG
; i
++)
14427 reg_alloc_order
[pos
++] = i
;
14429 /* Initialize the rest of array as we do not allocate some registers
14431 while (pos
< FIRST_PSEUDO_REGISTER
)
14432 reg_alloc_order
[pos
++] = 0;
14435 /* Returns an expression indicating where the this parameter is
14436 located on entry to the FUNCTION. */
14439 x86_this_parameter (function
)
14442 tree type
= TREE_TYPE (function
);
14446 int n
= aggregate_value_p (TREE_TYPE (type
)) != 0;
14447 return gen_rtx_REG (DImode
, x86_64_int_parameter_registers
[n
]);
14450 if (ix86_fntype_regparm (type
) > 0)
14454 parm
= TYPE_ARG_TYPES (type
);
14455 /* Figure out whether or not the function has a variable number of
14457 for (; parm
; parm
= TREE_CHAIN (parm
))
14458 if (TREE_VALUE (parm
) == void_type_node
)
14460 /* If not, the this parameter is in %eax. */
14462 return gen_rtx_REG (SImode
, 0);
14465 if (aggregate_value_p (TREE_TYPE (type
)))
14466 return gen_rtx_MEM (SImode
, plus_constant (stack_pointer_rtx
, 8));
14468 return gen_rtx_MEM (SImode
, plus_constant (stack_pointer_rtx
, 4));
14471 /* Determine whether x86_output_mi_thunk can succeed. */
14474 x86_can_output_mi_thunk (thunk
, delta
, vcall_offset
, function
)
14475 tree thunk ATTRIBUTE_UNUSED
;
14476 HOST_WIDE_INT delta ATTRIBUTE_UNUSED
;
14477 HOST_WIDE_INT vcall_offset
;
14480 /* 64-bit can handle anything. */
14484 /* For 32-bit, everything's fine if we have one free register. */
14485 if (ix86_fntype_regparm (TREE_TYPE (function
)) < 3)
14488 /* Need a free register for vcall_offset. */
14492 /* Need a free register for GOT references. */
14493 if (flag_pic
&& !(*targetm
.binds_local_p
) (function
))
14496 /* Otherwise ok. */
14500 /* Output the assembler code for a thunk function. THUNK_DECL is the
14501 declaration for the thunk function itself, FUNCTION is the decl for
14502 the target function. DELTA is an immediate constant offset to be
14503 added to THIS. If VCALL_OFFSET is non-zero, the word at
14504 *(*this + vcall_offset) should be added to THIS. */
14507 x86_output_mi_thunk (file
, thunk
, delta
, vcall_offset
, function
)
14508 FILE *file ATTRIBUTE_UNUSED
;
14509 tree thunk ATTRIBUTE_UNUSED
;
14510 HOST_WIDE_INT delta
;
14511 HOST_WIDE_INT vcall_offset
;
14515 rtx
this = x86_this_parameter (function
);
14518 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
14519 pull it in now and let DELTA benefit. */
14522 else if (vcall_offset
)
14524 /* Put the this parameter into %eax. */
14526 xops
[1] = this_reg
= gen_rtx_REG (Pmode
, 0);
14527 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops
);
14530 this_reg
= NULL_RTX
;
14532 /* Adjust the this parameter by a fixed constant. */
14535 xops
[0] = GEN_INT (delta
);
14536 xops
[1] = this_reg
? this_reg
: this;
14539 if (!x86_64_general_operand (xops
[0], DImode
))
14541 tmp
= gen_rtx_REG (DImode
, FIRST_REX_INT_REG
+ 2 /* R10 */);
14543 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops
);
14547 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops
);
14550 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops
);
14553 /* Adjust the this parameter by a value stored in the vtable. */
14557 tmp
= gen_rtx_REG (DImode
, FIRST_REX_INT_REG
+ 2 /* R10 */);
14559 tmp
= gen_rtx_REG (SImode
, 2 /* ECX */);
14561 xops
[0] = gen_rtx_MEM (Pmode
, this_reg
);
14564 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops
);
14566 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops
);
14568 /* Adjust the this parameter. */
14569 xops
[0] = gen_rtx_MEM (Pmode
, plus_constant (tmp
, vcall_offset
));
14570 if (TARGET_64BIT
&& !memory_operand (xops
[0], Pmode
))
14572 rtx tmp2
= gen_rtx_REG (DImode
, FIRST_REX_INT_REG
+ 3 /* R11 */);
14573 xops
[0] = GEN_INT (vcall_offset
);
14575 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops
);
14576 xops
[0] = gen_rtx_MEM (Pmode
, gen_rtx_PLUS (Pmode
, tmp
, tmp2
));
14578 xops
[1] = this_reg
;
14580 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops
);
14582 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops
);
14585 /* If necessary, drop THIS back to its stack slot. */
14586 if (this_reg
&& this_reg
!= this)
14588 xops
[0] = this_reg
;
14590 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops
);
14593 xops
[0] = DECL_RTL (function
);
14596 if (!flag_pic
|| (*targetm
.binds_local_p
) (function
))
14597 output_asm_insn ("jmp\t%P0", xops
);
14600 tmp
= XEXP (xops
[0], 0);
14601 tmp
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, tmp
), UNSPEC_GOTPCREL
);
14602 tmp
= gen_rtx_CONST (Pmode
, tmp
);
14603 tmp
= gen_rtx_MEM (QImode
, tmp
);
14605 output_asm_insn ("jmp\t%A0", xops
);
14610 if (!flag_pic
|| (*targetm
.binds_local_p
) (function
))
14611 output_asm_insn ("jmp\t%P0", xops
);
14614 tmp
= gen_rtx_REG (SImode
, 2 /* ECX */);
14615 output_set_got (tmp
);
14618 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops
);
14619 output_asm_insn ("jmp\t{*}%1", xops
);
14624 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
14625 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
14629 ix86_ms_bitfield_layout_p (record_type
)
14630 tree record_type ATTRIBUTE_UNUSED
;
14632 return TARGET_USE_MS_BITFIELD_LAYOUT
;
14636 x86_field_alignment (field
, computed
)
14640 enum machine_mode mode
;
14641 tree type
= TREE_TYPE (field
);
14643 if (TARGET_64BIT
|| TARGET_ALIGN_DOUBLE
)
14645 mode
= TYPE_MODE (TREE_CODE (type
) == ARRAY_TYPE
14646 ? get_inner_array_type (type
) : type
);
14647 if (mode
== DFmode
|| mode
== DCmode
14648 || GET_MODE_CLASS (mode
) == MODE_INT
14649 || GET_MODE_CLASS (mode
) == MODE_COMPLEX_INT
)
14650 return MIN (32, computed
);
14654 /* Output assembler code to FILE to increment profiler label # LABELNO
14655 for profiling a function entry. */
14657 x86_function_profiler (file
, labelno
)
14664 #ifndef NO_PROFILE_COUNTERS
14665 fprintf (file
, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX
, labelno
);
14667 fprintf (file
, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME
);
14671 #ifndef NO_PROFILE_COUNTERS
14672 fprintf (file
, "\tmovq\t$%sP%d,%%r11\n", LPREFIX
, labelno
);
14674 fprintf (file
, "\tcall\t%s\n", MCOUNT_NAME
);
14678 #ifndef NO_PROFILE_COUNTERS
14679 fprintf (file
, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
14680 LPREFIX
, labelno
, PROFILE_COUNT_REGISTER
);
14682 fprintf (file
, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME
);
14686 #ifndef NO_PROFILE_COUNTERS
14687 fprintf (file
, "\tmovl\t$%sP%d,%%%s\n", LPREFIX
, labelno
,
14688 PROFILE_COUNT_REGISTER
);
14690 fprintf (file
, "\tcall\t%s\n", MCOUNT_NAME
);
14694 /* Implement machine specific optimizations.
14695 At the moment we implement single transformation: AMD Athlon works faster
14696 when RET is not destination of conditional jump or directly preceeded
14697 by other jump instruction. We avoid the penalty by inserting NOP just
14698 before the RET instructions in such cases. */
14700 x86_machine_dependent_reorg (first
)
14701 rtx first ATTRIBUTE_UNUSED
;
14705 if (!TARGET_ATHLON
|| !optimize
|| optimize_size
)
14707 for (e
= EXIT_BLOCK_PTR
->pred
; e
; e
= e
->pred_next
)
14709 basic_block bb
= e
->src
;
14712 bool insert
= false;
14714 if (!returnjump_p (ret
) || !maybe_hot_bb_p (bb
))
14716 prev
= prev_nonnote_insn (ret
);
14717 if (prev
&& GET_CODE (prev
) == CODE_LABEL
)
14720 for (e
= bb
->pred
; e
; e
= e
->pred_next
)
14721 if (EDGE_FREQUENCY (e
) && e
->src
->index
> 0
14722 && !(e
->flags
& EDGE_FALLTHRU
))
14727 prev
= prev_real_insn (ret
);
14728 if (prev
&& GET_CODE (prev
) == JUMP_INSN
14729 && any_condjump_p (prev
))
14733 emit_insn_before (gen_nop (), ret
);
14737 #include "gt-i386.h"